summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Huang <huangtao@rock-chips.com>2018-01-26 18:59:12 +0800
committerTao Huang <huangtao@rock-chips.com>2018-01-26 19:26:47 +0800
commit640193f76baa240dcd0721127a1fa3c56eb6d014 (patch)
treeb5137dfdf582fc3d1fa26535dd22d443d947bd1f
parent6802789fb99360f9ef281b372a31fbcd22f0e745 (diff)
parentc210bc406de6a7993b8d7f30c28436be80de7694 (diff)
Merge branch 'linux-linaro-lsk-v4.4-android' of git://git.linaro.org/kernel/linux-linaro-stable.git
* linux-linaro-lsk-v4.4-android: (733 commits) LSK-ANDROID: memcg: Remove wrong ->attach callback LSK-ANDROID: arm64: mm: Fix __create_pgd_mapping() call ANDROID: sdcardfs: Move default_normal to superblock blkdev: Refactoring block io latency histogram codes FROMLIST: arm64: kpti: Fix the interaction between ASID switching and software PAN FROMLIST: arm64: Move post_ttbr_update_workaround to C code FROMLIST: arm64: mm: Rename post_ttbr0_update_workaround sched: EAS: Initialize push_task as NULL to avoid direct reference on out_unlock path fscrypt: updates on 4.15-rc4 ANDROID: uid_sys_stats: fix the comment BACKPORT: tee: indicate privileged dev in gen_caps BACKPORT: tee: optee: sync with new naming of interrupts BACKPORT: tee: tee_shm: Constify dma_buf_ops structures. BACKPORT: tee: optee: interruptible RPC sleep BACKPORT: tee: optee: add const to tee_driver_ops and tee_desc structures BACKPORT: tee.txt: standardize document format BACKPORT: tee: add forward declaration for struct device BACKPORT: tee: optee: fix uninitialized symbol 'parg' BACKPORT: tee: add ARM_SMCCC dependency BACKPORT: selinux: nlmsgtab: add SOCK_DESTROY to the netlink mapping tables ... Conflicts: arch/arm64/kernel/vdso.c drivers/usb/host/xhci-plat.c include/drm/drmP.h include/linux/kasan.h kernel/time/timekeeping.c mm/kasan/kasan.c security/selinux/nlmsgtab.c Also add this commit: 0bcdc0987cce ("time: Fix ktime_get_raw() incorrect base accumulation")
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu16
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs43
-rw-r--r--Documentation/kasan.txt5
-rw-r--r--Documentation/kcov.txt111
-rw-r--r--Documentation/kernel-parameters.txt19
-rw-r--r--Documentation/tee.txt53
-rw-r--r--Documentation/x86/pti.txt186
-rw-r--r--MAINTAINERS14
-rw-r--r--Makefile16
-rw-r--r--arch/alpha/include/asm/mmu_context.h1
-rw-r--r--arch/arc/include/asm/uaccess.h5
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts1
-rw-r--r--arch/arm/boot/dts/dra7.dtsi2
-rw-r--r--arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts2
-rw-r--r--arch/arm/include/asm/assembler.h18
-rw-r--r--arch/arm/include/asm/exception.h2
-rw-r--r--arch/arm/include/asm/kvm_arm.h4
-rw-r--r--arch/arm/include/asm/mmu_context.h2
-rw-r--r--arch/arm/include/asm/traps.h7
-rw-r--r--arch/arm/kernel/entry-header.S6
-rw-r--r--arch/arm/kernel/vmlinux.lds.S1
-rw-r--r--arch/arm/kvm/handle_exit.c19
-rw-r--r--arch/arm/kvm/mmio.c6
-rw-r--r--arch/arm/mach-omap1/dma.c16
-rw-r--r--arch/arm/mach-omap2/gpmc-onenand.c10
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_3xxx_data.c25
-rw-r--r--arch/arm/mm/dma-mapping.c20
-rw-r--r--arch/arm/probes/kprobes/core.c24
-rw-r--r--arch/arm/probes/kprobes/test-core.c11
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/include/asm/assembler.h13
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/efi.h12
-rw-r--r--arch/arm64/include/asm/exception.h2
-rw-r--r--arch/arm64/include/asm/fixmap.h5
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h12
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h2
-rw-r--r--arch/arm64/include/asm/memory.h6
-rw-r--r--arch/arm64/include/asm/mmu.h12
-rw-r--r--arch/arm64/include/asm/mmu_context.h20
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h20
-rw-r--r--arch/arm64/include/asm/proc-fns.h6
-rw-r--r--arch/arm64/include/asm/tlbflush.h50
-rw-r--r--arch/arm64/include/asm/traps.h7
-rw-r--r--arch/arm64/include/asm/uaccess.h62
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c3
-rw-r--r--arch/arm64/kernel/asm-offsets.c5
-rw-r--r--arch/arm64/kernel/cpufeature.c40
-rw-r--r--arch/arm64/kernel/entry.S217
-rw-r--r--arch/arm64/kernel/insn.c2
-rw-r--r--arch/arm64/kernel/process.c23
-rw-r--r--arch/arm64/kernel/psci.c4
-rw-r--r--arch/arm64/kernel/setup.c21
-rw-r--r--arch/arm64/kernel/sleep.S2
-rw-r--r--arch/arm64/kernel/smp_spin_table.c3
-rw-r--r--arch/arm64/kernel/vdso.c14
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S22
-rw-r--r--arch/arm64/kvm/handle_exit.c19
-rw-r--r--arch/arm64/lib/clear_user.S4
-rw-r--r--arch/arm64/lib/copy_from_user.S4
-rw-r--r--arch/arm64/lib/copy_in_user.S4
-rw-r--r--arch/arm64/lib/copy_to_user.S4
-rw-r--r--arch/arm64/mm/cache.S4
-rw-r--r--arch/arm64/mm/context.c34
-rw-r--r--arch/arm64/mm/init.c11
-rw-r--r--arch/arm64/mm/kasan_init.c25
-rw-r--r--arch/arm64/mm/mmu.c66
-rw-r--r--arch/arm64/mm/proc.S15
-rw-r--r--arch/arm64/xen/hypercall.S4
-rw-r--r--arch/blackfin/Kconfig7
-rw-r--r--arch/blackfin/Kconfig.debug1
-rw-r--r--arch/blackfin/kernel/vmlinux.lds.S1
-rw-r--r--arch/c6x/kernel/vmlinux.lds.S1
-rw-r--r--arch/metag/kernel/vmlinux.lds.S1
-rw-r--r--arch/microblaze/kernel/vmlinux.lds.S1
-rw-r--r--arch/mips/kernel/process.c12
-rw-r--r--arch/mips/kernel/ptrace.c147
-rw-r--r--arch/mips/kernel/vmlinux.lds.S1
-rw-r--r--arch/mips/math-emu/cp1emu.c28
-rw-r--r--arch/nios2/kernel/vmlinux.lds.S1
-rw-r--r--arch/openrisc/include/asm/uaccess.h2
-rw-r--r--arch/openrisc/kernel/vmlinux.lds.S1
-rw-r--r--arch/parisc/include/asm/ldcw.h2
-rw-r--r--arch/parisc/kernel/entry.S13
-rw-r--r--arch/parisc/kernel/pacache.S9
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/perf/core-book3s.c8
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c6
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c3
-rw-r--r--arch/powerpc/platforms/powernv/setup.c2
-rw-r--r--arch/powerpc/sysdev/axonram.c5
-rw-r--r--arch/powerpc/sysdev/ipic.c4
-rw-r--r--arch/s390/include/asm/asm-prototypes.h8
-rw-r--r--arch/s390/include/asm/pci_insn.h2
-rw-r--r--arch/s390/include/asm/runtime_instr.h4
-rw-r--r--arch/s390/include/asm/switch_to.h19
-rw-r--r--arch/s390/kernel/compat_linux.c1
-rw-r--r--arch/s390/kernel/process.c2
-rw-r--r--arch/s390/kernel/runtime_instr.c30
-rw-r--r--arch/s390/kernel/syscalls.S6
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/pci/pci.c5
-rw-r--r--arch/s390/pci/pci_insn.c6
-rw-r--r--arch/sh/kernel/vmlinux.lds.S1
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S1
-rw-r--r--arch/sparc/mm/init_64.c9
-rw-r--r--arch/tile/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/Makefile7
-rw-r--r--arch/x86/boot/compressed/Makefile3
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/crypto/salsa20_glue.c7
-rw-r--r--arch/x86/entry/entry_64.S170
-rw-r--r--arch/x86/entry/entry_64_compat.S7
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c99
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S3
-rw-r--r--arch/x86/entry/vdso/vdso2c.c3
-rw-r--r--arch/x86/entry/vdso/vma.c14
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c5
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/cmdline.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h9
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/disabled-features.h4
-rw-r--r--arch/x86/include/asm/hardirq.h6
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/kaiser.h151
-rw-r--r--arch/x86/include/asm/mmu.h6
-rw-r--r--arch/x86/include/asm/mmu_context.h103
-rw-r--r--arch/x86/include/asm/pgtable.h28
-rw-r--r--arch/x86/include/asm/pgtable_64.h25
-rw-r--r--arch/x86/include/asm/pgtable_types.h29
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/pvclock.h9
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h232
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/vsyscall.h2
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h3
-rw-r--r--arch/x86/kernel/Makefile18
-rw-r--r--arch/x86/kernel/acpi/boot.c61
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S9
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/apic/Makefile4
-rw-r--r--arch/x86/kernel/cpu/Makefile8
-rw-r--r--arch/x86/kernel/cpu/bugs.c63
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c33
-rw-r--r--arch/x86/kernel/cpu/common.c115
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c57
-rw-r--r--arch/x86/kernel/espfix_64.c10
-rw-r--r--arch/x86/kernel/head_64.S35
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c5
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c23
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/ldt.c41
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/tracepoint.c2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kvm/emulate.c32
-rw-r--r--arch/x86/kvm/svm.c21
-rw-r--r--arch/x86/kvm/vmx.c68
-rw-r--r--arch/x86/kvm/x86.c20
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/cmdline.c105
-rw-r--r--arch/x86/mm/Makefile7
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/kaiser.c484
-rw-r--r--arch/x86/mm/kasan_init_64.c17
-rw-r--r--arch/x86/mm/pageattr.c63
-rw-r--r--arch/x86/mm/pat.c5
-rw-r--r--arch/x86/mm/pgtable.c16
-rw-r--r--arch/x86/mm/tlb.c198
-rw-r--r--arch/x86/pci/broadcom_bus.c2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c39
-rw-r--r--arch/x86/realmode/init.c4
-rw-r--r--arch/x86/realmode/rm/Makefile3
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S3
-rw-r--r--arch/x86/um/ldt.c7
-rw-r--r--arch/x86/xen/enlighten.c6
-rw-r--r--block/bio.c14
-rw-r--r--block/blk-core.c97
-rw-r--r--crypto/algapi.c12
-rw-r--r--crypto/api.c13
-rw-r--r--crypto/asymmetric_keys/x509_cert_parser.c2
-rw-r--r--crypto/chacha20poly1305.c6
-rw-r--r--crypto/hmac.c6
-rw-r--r--crypto/mcryptd.c23
-rw-r--r--crypto/pcrypt.c19
-rw-r--r--crypto/salsa20_generic.c7
-rw-r--r--crypto/shash.c5
-rw-r--r--crypto/tcrypt.c6
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/android/binder.c9
-rw-r--r--drivers/ata/libata-sff.c1
-rw-r--r--drivers/atm/horizon.c2
-rw-r--r--drivers/base/Kconfig3
-rw-r--r--drivers/base/cpu.c48
-rw-r--r--drivers/base/isa.c10
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/zram/zram_drv.c2
-rw-r--r--drivers/bus/arm-ccn.c1
-rw-r--r--drivers/bus/sunxi-rsb.c1
-rw-r--r--drivers/char/hw_random/core.c6
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c44
-rw-r--r--drivers/char/mem.c6
-rw-r--r--drivers/clk/imx/clk-imx6q.c2
-rw-r--r--drivers/clk/mediatek/clk-mtk.h1
-rw-r--r--drivers/clk/mediatek/clk-pll.c5
-rw-r--r--drivers/clk/tegra/clk-tegra30.c2
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c18
-rw-r--r--drivers/cpuidle/cpuidle.c1
-rw-r--r--drivers/cpuidle/sysfs.c12
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.h10
-rw-r--r--drivers/crypto/n2_core.c3
-rw-r--r--drivers/crypto/s5p-sss.c5
-rw-r--r--drivers/dma/dmaengine.c2
-rw-r--r--drivers/dma/dmatest.c55
-rw-r--r--drivers/dma/pl330.c19
-rw-r--r--drivers/dma/ti-dma-crossbar.c8
-rw-r--r--drivers/edac/i5000_edac.c8
-rw-r--r--drivers/edac/i5400_edac.c9
-rw-r--r--drivers/edac/sb_edac.c1
-rw-r--r--drivers/firmware/efi/efi.c36
-rw-r--r--drivers/firmware/efi/esrt.c17
-rw-r--r--drivers/firmware/efi/libstub/Makefile3
-rw-r--r--drivers/firmware/efi/runtime-map.c10
-rw-r--r--drivers/gpio/gpio-altera.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c5
-rw-r--r--drivers/gpu/drm/armada/Makefile2
-rw-r--r--drivers/gpu/drm/exynos/exynos5433_drm_decon.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c9
-rw-r--r--drivers/gpu/drm/i915/intel_i2c.c4
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c3
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c1
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c38
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c1
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c2
-rw-r--r--drivers/hid/Kconfig4
-rw-r--r--drivers/hid/hid-chicony.c1
-rw-r--r--drivers/hid/hid-core.c2
-rw-r--r--drivers/hid/hid-ids.h2
-rw-r--r--drivers/hid/hid-xinmo.c1
-rw-r--r--drivers/hwmon/asus_atk0110.c3
-rw-r--r--drivers/hwtracing/intel_th/pci.c5
-rw-r--r--drivers/i2c/busses/i2c-riic.c6
-rw-r--r--drivers/infiniband/core/cma.c11
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c7
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c8
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c3
-rw-r--r--drivers/input/mouse/elantech.c2
-rw-r--r--drivers/input/serio/i8042-x86ia64io.h7
-rw-r--r--drivers/iommu/arm-smmu-v3.c8
-rw-r--r--drivers/iommu/intel-iommu.c8
-rw-r--r--drivers/irqchip/irq-crossbar.c8
-rw-r--r--drivers/isdn/capi/kcapi.c1
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/extents.c2
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/request.c15
-rw-r--r--drivers/md/bcache/super.c6
-rw-r--r--drivers/md/dm-bufio.c7
-rw-r--r--drivers/md/md-cluster.c1
-rw-r--r--drivers/md/raid5.c5
-rw-r--r--drivers/media/usb/dvb-usb/dibusb-common.c16
-rw-r--r--drivers/media/usb/usbvision/usbvision-video.c7
-rw-r--r--drivers/memory/omap-gpmc.c4
-rw-r--r--drivers/mfd/cros_ec_spi.c1
-rw-r--r--drivers/mfd/twl4030-audio.c9
-rw-r--r--drivers/mfd/twl6040.c12
-rw-r--r--drivers/misc/cxl/pci.c13
-rw-r--r--drivers/misc/eeprom/at24.c6
-rw-r--r--drivers/misc/uid_sys_stats.c2
-rw-r--r--drivers/mmc/core/bus.c3
-rw-r--r--drivers/mmc/core/core.c22
-rw-r--r--drivers/mmc/host/mtk-sd.c4
-rw-r--r--drivers/mtd/nand/nand_base.c9
-rw-r--r--drivers/net/appletalk/ipddp.c2
-rw-r--r--drivers/net/can/ti_hecc.c3
-rw-r--r--drivers/net/can/usb/ems_usb.c2
-rw-r--r--drivers/net/can/usb/esd_usb2.c2
-rw-r--r--drivers/net/can/usb/gs_usb.c2
-rw-r--r--drivers/net/can/usb/kvaser_usb.c13
-rw-r--r--drivers/net/can/usb/usb_8dev.c2
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c23
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c20
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c23
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c13
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c95
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h10
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/brocade/bna/bfa_ioc.c10
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad_debugfs.c2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c23
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c11
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_iov.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c16
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c2
-rw-r--r--drivers/net/ethernet/marvell/mvmdio.c3
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h4
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c8
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c29
-rw-r--r--drivers/net/ethernet/sfc/ef10.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c6
-rw-r--r--drivers/net/fjes/fjes_main.c2
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c2
-rw-r--r--drivers/net/irda/vlsi_ir.c8
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/phy/at803x.c2
-rw-r--r--drivers/net/phy/micrel.c1
-rw-r--r--drivers/net/phy/spi_ks8995.c1
-rw-r--r--drivers/net/ppp/ppp_generic.c1
-rw-r--r--drivers/net/usb/cx82310_eth.c7
-rw-r--r--drivers/net/usb/lan78xx.c9
-rw-r--r--drivers/net/usb/qmi_wwan.c5
-rw-r--r--drivers/net/usb/r8152.c151
-rw-r--r--drivers/net/usb/smsc75xx.c8
-rw-r--r--drivers/net/usb/sr9700.c9
-rw-r--r--drivers/net/wimax/i2400m/usb.c3
-rw-r--r--drivers/net/wireless/ath/ath9k/tx99.c5
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c5
-rw-r--r--drivers/net/xen-netfront.c28
-rw-r--r--drivers/parisc/lba_pci.c33
-rw-r--r--drivers/pci/iov.c3
-rw-r--r--drivers/pci/pci-driver.c7
-rw-r--r--drivers/pci/pci.c4
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c9
-rw-r--r--drivers/pci/pcie/pme.c5
-rw-r--r--drivers/pci/remove.c2
-rw-r--r--drivers/pinctrl/Kconfig3
-rw-r--r--drivers/pinctrl/pinctrl-st.c30
-rw-r--r--drivers/rtc/interface.c2
-rw-r--r--drivers/rtc/rtc-pcf8563.c2
-rw-r--r--drivers/rtc/rtc-pl031.c14
-rw-r--r--drivers/s390/net/qeth_l3_main.c15
-rw-r--r--drivers/scsi/bfa/bfad_debugfs.c5
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/cxgb4i.c1
-rw-r--r--drivers/scsi/hpsa.c57
-rw-r--r--drivers/scsi/hpsa.h1
-rw-r--r--drivers/scsi/hpsa_cmd.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c17
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c5
-rw-r--r--drivers/scsi/scsi_devinfo.c2
-rw-r--r--drivers/scsi/sd.c12
-rw-r--r--drivers/scsi/storvsc_drv.c27
-rw-r--r--drivers/scsi/ufs/ufshcd.c23
-rw-r--r--drivers/scsi/ufs/ufshcd.h3
-rw-r--r--drivers/spi/Kconfig1
-rw-r--r--drivers/spi/spi-sh-msiof.c2
-rw-r--r--drivers/spi/spi-xilinx.c11
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c4
-rw-r--r--drivers/staging/vt6655/device_main.c3
-rw-r--r--drivers/target/iscsi/iscsi_target.c23
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c3
-rw-r--r--drivers/target/target_core_alua.c33
-rw-r--r--drivers/target/target_core_file.c4
-rw-r--r--drivers/target/target_core_pr.c4
-rw-r--r--drivers/target/target_core_tmr.c9
-rw-r--r--drivers/target/target_core_transport.c2
-rw-r--r--drivers/tee/Kconfig1
-rw-r--r--drivers/tee/optee/core.c19
-rw-r--r--drivers/tee/optee/optee_smc.h12
-rw-r--r--drivers/tee/optee/rpc.c15
-rw-r--r--drivers/tee/tee_core.c5
-rw-r--r--drivers/tee/tee_shm.c2
-rw-r--r--drivers/thermal/hisi_thermal.c5
-rw-r--r--drivers/thermal/step_wise.c11
-rw-r--r--drivers/tty/n_tty.c4
-rw-r--r--drivers/tty/serial/8250/8250_fintek.c2
-rw-r--r--drivers/tty/serial/8250/8250_pci.c3
-rw-r--r--drivers/tty/serial/8250/8250_port.c5
-rw-r--r--drivers/tty/sysrq.c15
-rw-r--r--drivers/usb/core/config.c35
-rw-r--r--drivers/usb/core/devio.c56
-rw-r--r--drivers/usb/core/hub.c9
-rw-r--r--drivers/usb/core/quirks.c9
-rw-r--r--drivers/usb/gadget/configfs.c1
-rw-r--r--drivers/usb/gadget/function/f_fs.c5
-rw-r--r--drivers/usb/gadget/function/f_uvc.c8
-rw-r--r--drivers/usb/gadget/legacy/inode.c4
-rw-r--r--drivers/usb/gadget/udc/pch_udc.c1
-rw-r--r--drivers/usb/host/ehci-dbg.c2
-rw-r--r--drivers/usb/host/xhci-mem.c23
-rw-r--r--drivers/usb/host/xhci-pci.c3
-rw-r--r--drivers/usb/host/xhci-plat.c2
-rw-r--r--drivers/usb/misc/usb3503.c2
-rw-r--r--drivers/usb/mon/mon_bin.c8
-rw-r--r--drivers/usb/musb/da8xx.c10
-rw-r--r--drivers/usb/musb/ux500.c7
-rw-r--r--drivers/usb/phy/phy-isp1301.c7
-rw-r--r--drivers/usb/phy/phy-tahvo.c3
-rw-r--r--drivers/usb/serial/cp210x.c2
-rw-r--r--drivers/usb/serial/ftdi_sio.c1
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h6
-rw-r--r--drivers/usb/serial/option.c20
-rw-r--r--drivers/usb/serial/qcserial.c3
-rw-r--r--drivers/usb/storage/uas-detect.h4
-rw-r--r--drivers/usb/storage/unusual_devs.h7
-rw-r--r--drivers/usb/storage/unusual_uas.h14
-rw-r--r--drivers/usb/usbip/stub_main.c5
-rw-r--r--drivers/usb/usbip/stub_rx.c7
-rw-r--r--drivers/usb/usbip/stub_tx.c11
-rw-r--r--drivers/usb/usbip/usbip_common.c17
-rw-r--r--drivers/usb/usbip/vhci_hcd.c10
-rw-r--r--drivers/usb/usbip/vhci_rx.c23
-rw-r--r--drivers/usb/usbip/vhci_tx.c3
-rw-r--r--drivers/video/backlight/pwm_bl.c7
-rw-r--r--drivers/video/fbdev/au1200fb.c7
-rw-r--r--drivers/video/fbdev/controlfb.h2
-rw-r--r--drivers/video/fbdev/udlfb.c10
-rw-r--r--drivers/virtio/virtio.c2
-rw-r--r--fs/afs/callback.c7
-rw-r--r--fs/afs/cmservice.c3
-rw-r--r--fs/afs/file.c1
-rw-r--r--fs/afs/fsclient.c22
-rw-r--r--fs/afs/inode.c11
-rw-r--r--fs/afs/internal.h12
-rw-r--r--fs/afs/security.c7
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/vlocation.c16
-rw-r--r--fs/afs/write.c32
-rw-r--r--fs/autofs4/waitq.c1
-rw-r--r--fs/btrfs/extent-tree.c14
-rw-r--r--fs/btrfs/inode.c14
-rw-r--r--fs/ceph/mds_client.c42
-rw-r--r--fs/crypto/Makefile2
-rw-r--r--fs/crypto/crypto.c37
-rw-r--r--fs/crypto/fname.c39
-rw-r--r--fs/crypto/fscrypt_private.h12
-rw-r--r--fs/crypto/hooks.c112
-rw-r--r--fs/crypto/keyinfo.c28
-rw-r--r--fs/crypto/policy.c6
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/checkpoint.c64
-rw-r--r--fs/f2fs/data.c38
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/f2fs.h232
-rw-r--r--fs/f2fs/file.c120
-rw-r--r--fs/f2fs/gc.c37
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c31
-rw-r--r--fs/f2fs/namei.c101
-rw-r--r--fs/f2fs/node.c410
-rw-r--r--fs/f2fs/node.h16
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c512
-rw-r--r--fs/f2fs/segment.h39
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c226
-rw-r--r--fs/f2fs/sysfs.c53
-rw-r--r--fs/f2fs/xattr.c174
-rw-r--r--fs/fs-writeback.c35
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/locks.c16
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/nfs4client.c4
-rw-r--r--fs/nfs/nfs4proc.c3
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfsd/auth.c3
-rw-r--r--fs/nfsd/nfs4state.c114
-rw-r--r--fs/nfsd/nfssvc.c30
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/proc/proc_tty.c3
-rw-r--r--fs/sdcardfs/file.c2
-rw-r--r--fs/sdcardfs/inode.c7
-rw-r--r--fs/sdcardfs/main.c7
-rw-r--r--fs/sdcardfs/sdcardfs.h9
-rw-r--r--fs/sdcardfs/super.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/userfaultfd.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--include/asm-generic/sections.h4
-rw-r--r--include/asm-generic/vmlinux.lds.h17
-rw-r--r--include/crypto/internal/hash.h8
-rw-r--r--include/crypto/mcryptd.h1
-rw-r--r--include/drm/drmP.h23
-rw-r--r--include/linux/blkdev.h39
-rw-r--r--include/linux/bpf.h2
-rw-r--r--include/linux/buffer_head.h4
-rw-r--r--include/linux/cpu.h7
-rw-r--r--include/linux/cred.h1
-rw-r--r--include/linux/crypto.h40
-rw-r--r--include/linux/f2fs_fs.h10
-rw-r--r--include/linux/filter.h3
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/fscache.h2
-rw-r--r--include/linux/fscrypt.h290
-rw-r--r--include/linux/fscrypt_common.h138
-rw-r--r--include/linux/fscrypt_notsupp.h39
-rw-r--r--include/linux/fscrypt_supp.h17
-rw-r--r--include/linux/ftrace.h11
-rw-r--r--include/linux/genalloc.h3
-rw-r--r--include/linux/interrupt.h8
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/kaiser.h52
-rw-r--r--include/linux/kasan-checks.h12
-rw-r--r--include/linux/kasan.h71
-rw-r--r--include/linux/kcov.h33
-rw-r--r--include/linux/mlx4/device.h1
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/mman.h3
-rw-r--r--include/linux/mmc/host.h3
-rw-r--r--include/linux/mmu_context.h7
-rw-r--r--include/linux/mmu_notifier.h13
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/linux/omap-gpmc.h5
-rw-r--r--include/linux/percpu-defs.h32
-rw-r--r--include/linux/phy.h11
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/sh_eth.h1
-rw-r--r--include/linux/slab.h10
-rw-r--r--include/linux/slab_def.h18
-rw-r--r--include/linux/slub_def.h16
-rw-r--r--include/linux/stackdepot.h32
-rw-r--r--include/linux/sysfs.h6
-rw-r--r--include/linux/tee_drv.h1
-rw-r--r--include/linux/timekeeper_internal.h4
-rw-r--r--include/linux/usb.h1
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--include/net/genetlink.h2
-rw-r--r--include/net/ip.h2
-rw-r--r--include/scsi/libsas.h2
-rw-r--r--include/target/target_core_base.h3
-rw-r--r--include/trace/events/f2fs.h116
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--include/uapi/linux/bcache.h2
-rw-r--r--include/uapi/linux/kcov.h34
-rw-r--r--include/uapi/linux/tee.h1
-rw-r--r--include/uapi/linux/usb/ch9.h19
-rw-r--r--init/initramfs.c5
-rw-r--r--init/main.c2
-rw-r--r--kernel/Makefile12
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/audit.c10
-rw-r--r--kernel/bpf/arraymap.c37
-rw-r--r--kernel/bpf/core.c71
-rw-r--r--kernel/bpf/syscall.c54
-rw-r--r--kernel/bpf/verifier.c217
-rw-r--r--kernel/debug/kdb/kdb_io.c2
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c8
-rw-r--r--kernel/groups.c5
-rw-r--r--kernel/jump_label.c2
-rw-r--r--kernel/kcov.c431
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/mutex.c5
-rw-r--r--kernel/module.c11
-rw-r--r--kernel/rcu/Makefile4
-rw-r--r--kernel/sched/Makefile4
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/cpufreq_schedutil.c11
-rw-r--r--kernel/sched/deadline.c62
-rw-r--r--kernel/sched/fair.c3
-rw-r--r--kernel/sched/rt.c8
-rw-r--r--kernel/signal.c18
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/time/tick-sched.c19
-rw-r--r--kernel/time/timekeeping.c43
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c42
-rw-r--r--kernel/trace/trace_functions_graph.c1
-rw-r--r--kernel/uid16.c1
-rw-r--r--kernel/workqueue.c1
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/Kconfig.debug21
-rw-r--r--lib/Kconfig.kasan5
-rw-r--r--lib/Makefile15
-rw-r--r--lib/asn1_decoder.c49
-rw-r--r--lib/dynamic_debug.c4
-rw-r--r--lib/genalloc.c10
-rw-r--r--lib/stackdepot.c287
-rw-r--r--lib/test_kasan.c138
-rw-r--r--mm/Makefile16
-rw-r--r--mm/compaction.c50
-rw-r--r--mm/dmapool.c10
-rw-r--r--mm/huge_memory.c99
-rw-r--r--mm/kasan/Makefile3
-rw-r--r--mm/kasan/kasan.c342
-rw-r--r--mm/kasan/kasan.h51
-rw-r--r--mm/kasan/quarantine.c328
-rw-r--r--mm/kasan/report.c262
-rw-r--r--mm/kmemcheck.c3
-rw-r--r--mm/kmemleak.c18
-rw-r--r--mm/madvise.c4
-rw-r--r--mm/memblock.c3
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/memory_hotplug.c3
-rw-r--r--mm/mempolicy.c4
-rw-r--r--mm/mempool.c24
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/mmu_context.c2
-rw-r--r--mm/oom_kill.c3
-rw-r--r--mm/page-writeback.c11
-rw-r--r--mm/page_alloc.c37
-rw-r--r--mm/percpu.c4
-rw-r--r--mm/rmap.c28
-rw-r--r--mm/slab.c239
-rw-r--r--mm/slab.h2
-rw-r--r--mm/slab_common.c20
-rw-r--r--mm/slub.c97
-rw-r--r--mm/sparse-vmemmap.c8
-rw-r--r--mm/sparse.c8
-rw-r--r--mm/swapfile.c3
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmstat.c5
-rw-r--r--mm/zswap.c24
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bridge/br_netfilter_hooks.c12
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/igmp.c44
-rw-r--r--net/ipv4/ip_fragment.c25
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c19
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv6/af_inet6.c11
-rw-r--r--net/ipv6/ip6_output.c16
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/ipv6/ip6_vti.c10
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_netlink.c2
-rw-r--r--net/mac80211/debugfs.c7
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nfnetlink_cthelper.c267
-rw-r--r--net/netfilter/nfnetlink_queue.c9
-rw-r--r--net/netlink/af_netlink.c48
-rw-r--r--net/netlink/genetlink.c16
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/rds/rdma.c6
-rw-r--r--net/sched/sch_dsmark.c10
-rw-r--r--net/sctp/debug.c2
-rw-r--r--net/sctp/socket.c42
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c1
-rw-r--r--net/sunrpc/sched.c3
-rw-r--r--net/sunrpc/svcauth_unix.c2
-rw-r--r--net/tipc/server.c5
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--net/xfrm/xfrm_state.c7
-rw-r--r--net/xfrm/xfrm_user.c25
-rw-r--r--scripts/Makefile.lib6
-rw-r--r--scripts/genksyms/genksyms.c6
-rw-r--r--scripts/module-common.lds2
-rw-r--r--scripts/package/Makefile5
-rw-r--r--security/Kconfig10
-rw-r--r--security/integrity/ima/ima_main.c4
-rw-r--r--security/keys/request_key.c48
-rw-r--r--security/selinux/nlmsgtab.c8
-rw-r--r--sound/core/oss/pcm_oss.c41
-rw-r--r--sound/core/oss/pcm_plugin.c14
-rw-r--r--sound/core/pcm.c2
-rw-r--r--sound/core/pcm_lib.c4
-rw-r--r--sound/core/rawmidi.c15
-rw-r--r--sound/core/seq/seq_timer.c2
-rw-r--r--sound/drivers/aloop.c98
-rw-r--r--sound/hda/hdac_i915.c6
-rw-r--r--sound/pci/hda/hda_intel.c6
-rw-r--r--sound/pci/hda/patch_conexant.c11
-rw-r--r--sound/pci/hda/patch_realtek.c19
-rw-r--r--sound/soc/codecs/twl4030.c4
-rw-r--r--sound/soc/fsl/fsl_ssi.c18
-rw-r--r--sound/usb/mixer.c26
-rw-r--r--tools/hv/hv_kvp_daemon.c70
-rw-r--r--tools/perf/tests/attr.c2
-rw-r--r--tools/perf/util/symbol.c2
-rw-r--r--tools/testing/selftests/powerpc/harness.c6
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c17
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c500
-rw-r--r--tools/usb/usbip/src/utils.c9
-rw-r--r--virt/kvm/arm/arch_timer.c3
-rw-r--r--virt/kvm/kvm_main.c2
730 files changed, 11966 insertions, 4053 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index b683e8ee69ec..ea6a043f5beb 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -271,3 +271,19 @@ Description: Parameters for the CPU cache attributes
- WriteBack: data is written only to the cache line and
the modified cache line is written to main
memory only when it is replaced
+
+What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date: January 2018
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Information about CPU vulnerabilities
+
+ The files are named after the code names of CPU
+ vulnerabilities. The output of those files reflects the
+ state of the CPUs in the system. Possible output values:
+
+ "Not affected" CPU is not affected by the vulnerability
+ "Vulnerable" CPU is affected and no mitigation in effect
+ "Mitigation: $M" CPU is affected and mitigation $M is in effect
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 500c60403653..2baed1151eac 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,18 @@ Description:
Controls the dirty page count condition for the in-place-update
policies.
+What: /sys/fs/f2fs/<disk>/min_hot_blocks
+Date: March 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the dirty page count condition for redefining hot data.
+
+What: /sys/fs/f2fs/<disk>/min_ssr_sections
+Date: October 2017
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description:
+ Controls the fee section threshold to trigger SSR allocation.
+
What: /sys/fs/f2fs/<disk>/max_small_discards
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -96,6 +108,18 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the checkpoint timing.
+What: /sys/fs/f2fs/<disk>/idle_interval
+Date: January 2016
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the idle timing.
+
+What: /sys/fs/f2fs/<disk>/iostat_enable
+Date: August 2017
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description:
+ Controls to enable/disable IO stat.
+
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
@@ -116,6 +140,12 @@ Contact: "Shuoran Liu" <liushuoran@huawei.com>
Description:
Shows total written kbytes issued to disk.
+What: /sys/fs/f2fs/<disk>/feature
+Date: July 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Shows all enabled features in current device.
+
What: /sys/fs/f2fs/<disk>/inject_rate
Date: May 2016
Contact: "Sheng Yong" <shengyong1@huawei.com>
@@ -132,7 +162,18 @@ What: /sys/fs/f2fs/<disk>/reserved_blocks
Date: June 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
- Controls current reserved blocks in system.
+ Controls target reserved blocks in system, the threshold
+ is soft, it could exceed current available user space.
+
+What: /sys/fs/f2fs/<disk>/current_reserved_blocks
+Date: October 2017
+Contact: "Yunlong Song" <yunlong.song@huawei.com>
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description:
+ Shows current reserved blocks in system, it may be temporarily
+ smaller than target_reserved_blocks, but will gradually
+ increase to target_reserved_blocks when more free blocks are
+ freed by user later.
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
index aa1e0c91e368..7dd95b35cd7c 100644
--- a/Documentation/kasan.txt
+++ b/Documentation/kasan.txt
@@ -12,8 +12,7 @@ KASAN uses compile-time instrumentation for checking every memory access,
therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
required for detection of out-of-bounds accesses to stack or global variables.
-Currently KASAN is supported only for x86_64 architecture and requires the
-kernel to be built with the SLUB allocator.
+Currently KASAN is supported only for x86_64 architecture.
1. Usage
========
@@ -27,7 +26,7 @@ inline are compiler instrumentation types. The former produces smaller binary
the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
version 5.0 or later.
-Currently KASAN works only with the SLUB memory allocator.
+KASAN works with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
To disable instrumentation for specific files or directories, add a line
diff --git a/Documentation/kcov.txt b/Documentation/kcov.txt
new file mode 100644
index 000000000000..779ff4ab1c1d
--- /dev/null
+++ b/Documentation/kcov.txt
@@ -0,0 +1,111 @@
+kcov: code coverage for fuzzing
+===============================
+
+kcov exposes kernel code coverage information in a form suitable for coverage-
+guided fuzzing (randomized testing). Coverage data of a running kernel is
+exported via the "kcov" debugfs file. Coverage collection is enabled on a task
+basis, and thus it can capture precise coverage of a single system call.
+
+Note that kcov does not aim to collect as much coverage as possible. It aims
+to collect more or less stable coverage that is function of syscall inputs.
+To achieve this goal it does not collect coverage in soft/hard interrupts
+and instrumentation of some inherently non-deterministic parts of kernel is
+disbled (e.g. scheduler, locking).
+
+Usage:
+======
+
+Configure kernel with:
+
+ CONFIG_KCOV=y
+
+CONFIG_KCOV requires gcc built on revision 231296 or later.
+Profiling data will only become accessible once debugfs has been mounted:
+
+ mount -t debugfs none /sys/kernel/debug
+
+The following program demonstrates kcov usage from within a test program:
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
+#define KCOV_ENABLE _IO('c', 100)
+#define KCOV_DISABLE _IO('c', 101)
+#define COVER_SIZE (64<<10)
+
+int main(int argc, char **argv)
+{
+ int fd;
+ unsigned long *cover, n, i;
+
+ /* A single fd descriptor allows coverage collection on a single
+ * thread.
+ */
+ fd = open("/sys/kernel/debug/kcov", O_RDWR);
+ if (fd == -1)
+ perror("open"), exit(1);
+ /* Setup trace mode and trace size. */
+ if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+ perror("ioctl"), exit(1);
+ /* Mmap buffer shared between kernel- and user-space. */
+ cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if ((void*)cover == MAP_FAILED)
+ perror("mmap"), exit(1);
+ /* Enable coverage collection on the current thread. */
+ if (ioctl(fd, KCOV_ENABLE, 0))
+ perror("ioctl"), exit(1);
+ /* Reset coverage from the tail of the ioctl() call. */
+ __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
+ /* That's the target syscal call. */
+ read(-1, NULL, 0);
+ /* Read number of PCs collected. */
+ n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+ for (i = 0; i < n; i++)
+ printf("0x%lx\n", cover[i + 1]);
+ /* Disable coverage collection for the current thread. After this call
+ * coverage can be enabled for a different thread.
+ */
+ if (ioctl(fd, KCOV_DISABLE, 0))
+ perror("ioctl"), exit(1);
+ /* Free resources. */
+ if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+ perror("munmap"), exit(1);
+ if (close(fd))
+ perror("close"), exit(1);
+ return 0;
+}
+
+After piping through addr2line output of the program looks as follows:
+
+SyS_read
+fs/read_write.c:562
+__fdget_pos
+fs/file.c:774
+__fget_light
+fs/file.c:746
+__fget_light
+fs/file.c:750
+__fget_light
+fs/file.c:760
+__fdget_pos
+fs/file.c:784
+SyS_read
+fs/read_write.c:562
+
+If a program needs to collect coverage from several threads (independently),
+it needs to open /sys/kernel/debug/kcov in each thread separately.
+
+The interface is fine-grained to allow efficient forking of test processes.
+That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
+mmaps coverage buffer and then forks child processes in a loop. Child processes
+only need to enable coverage (disable happens automatically on thread end).
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 21321b9acfb0..79af1716bf4d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2525,6 +2525,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nointroute [IA-64]
+ noinvpcid [X86] Disable the INVPCID cpu feature.
+
nojitter [IA-64] Disables jitter checking for ITC timers.
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
@@ -2559,6 +2561,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nopat [X86] Disable PAT (page attribute table extension of
pagetables) support.
+ nopcid [X86-64] Disable the PCID cpu feature.
+
norandmaps Don't use address space randomization. Equivalent to
echo 0 > /proc/sys/kernel/randomize_va_space
@@ -3056,6 +3060,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
+ pti= [X86_64] Control Page Table Isolation of user and
+ kernel address spaces. Disabling this feature
+ removes hardening, but improves performance of
+ system calls and interrupts.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable to issues that PTI mitigates
+
+ Not specifying this option is equivalent to pti=auto.
+
+ nopti [X86_64]
+ Equivalent to pti=off
+
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
diff --git a/Documentation/tee.txt b/Documentation/tee.txt
index 718599357596..56ea85ffebf2 100644
--- a/Documentation/tee.txt
+++ b/Documentation/tee.txt
@@ -1,4 +1,7 @@
+=============
TEE subsystem
+=============
+
This document describes the TEE subsystem in Linux.
A TEE (Trusted Execution Environment) is a trusted OS running in some
@@ -80,27 +83,27 @@ The GlobalPlatform TEE Client API [5] is implemented on top of the generic
TEE API.
Picture of the relationship between the different components in the
-OP-TEE architecture.
-
- User space Kernel Secure world
- ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~
- +--------+ +-------------+
- | Client | | Trusted |
- +--------+ | Application |
- /\ +-------------+
- || +----------+ /\
- || |tee- | ||
- || |supplicant| \/
- || +----------+ +-------------+
- \/ /\ | TEE Internal|
- +-------+ || | API |
- + TEE | || +--------+--------+ +-------------+
- | Client| || | TEE | OP-TEE | | OP-TEE |
- | API | \/ | subsys | driver | | Trusted OS |
- +-------+----------------+----+-------+----+-----------+-------------+
- | Generic TEE API | | OP-TEE MSG |
- | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) |
- +-----------------------------+ +------------------------------+
+OP-TEE architecture::
+
+ User space Kernel Secure world
+ ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~
+ +--------+ +-------------+
+ | Client | | Trusted |
+ +--------+ | Application |
+ /\ +-------------+
+ || +----------+ /\
+ || |tee- | ||
+ || |supplicant| \/
+ || +----------+ +-------------+
+ \/ /\ | TEE Internal|
+ +-------+ || | API |
+ + TEE | || +--------+--------+ +-------------+
+ | Client| || | TEE | OP-TEE | | OP-TEE |
+ | API | \/ | subsys | driver | | Trusted OS |
+ +-------+----------------+----+-------+----+-----------+-------------+
+ | Generic TEE API | | OP-TEE MSG |
+ | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) |
+ +-----------------------------+ +------------------------------+
RPC (Remote Procedure Call) are requests from secure world to kernel driver
or tee-supplicant. An RPC is identified by a special range of SMCCC return
@@ -109,10 +112,16 @@ kernel are handled by the kernel driver. Other RPC messages will be forwarded to
tee-supplicant without further involvement of the driver, except switching
shared memory buffer representation.
-References:
+References
+==========
+
[1] https://github.com/OP-TEE/optee_os
+
[2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+
[3] drivers/tee/optee/optee_smc.h
+
[4] drivers/tee/optee/optee_msg.h
+
[5] http://www.globalplatform.org/specificationsdevice.asp look for
"TEE Client API Specification v1.0" and click download.
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644
index 000000000000..d11eff61fc9a
--- /dev/null
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications. When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy. When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT). There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled. It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI. This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level. This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel. This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal. The only difference is when the kernel
+makes entries in the top (PGD) level. In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables. This leaves a single, shared set of
+userspace page tables to manage. One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important. But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+ a. Each process now needs an order-1 PGD instead of order-0.
+ (Consumes an additional 4k per process).
+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+ aligned so that it can be mapped by setting a single PMD
+ entry. This consumes nearly 2MB of RAM once the kernel
+ is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+ a. CR3 manipulation to switch between the page table copies
+ must be done at interrupt, syscall, and exception entry
+ and exit (it can be skipped when the kernel is interrupted,
+ though.) Moves to CR3 are on the order of a hundred
+ cycles, and are required at every entry and exit.
+ b. A "trampoline" must be used for SYSCALL entry. This
+ trampoline depends on a smaller set of resources than the
+ non-PTI SYSCALL entry code, so requires mapping fewer
+ things into the userspace page tables. The downside is
+ that stacks must be switched at entry time.
+ d. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
+ TLB misses after a context switch. The actual loss of
+ performance is very small, however, never exceeding 1%.
+ d. Process Context IDentifiers (PCID) is a CPU feature that
+ allows us to skip flushing the entire TLB when switching page
+ tables by setting a special bit in CR3 when the page tables
+ are changed. This makes switching the page tables (at context
+ switch, or kernel entry/exit) cheaper. But, on systems with
+ PCID support, the context switch code must flush both the user
+ and kernel entries out of the TLB. The user PCID TLB flush is
+ deferred until the exit to userspace, minimizing the cost.
+ See intel.com/sdm for the gory PCID/INVPCID details.
+ e. The userspace page tables must be populated for each new
+ process. Even without PTI, the shared kernel mappings
+ are created by copying top-level (PGD) entries into each
+ new process. But, with PTI, there are now *two* kernel
+ mappings: one in the kernel page tables that maps everything
+ and one for the entry/exit structures. At fork(), we need to
+ copy both.
+ f. In addition to the fork()-time copying, there must also
+ be an update to the userspace PGD any time a set_pgd() is done
+ on a PGD used to map userspace. This ensures that the kernel
+ and userspace copies always map the same userspace
+ memory.
+ g. On systems without PCID support, each CR3 write flushes
+ the entire TLB. That means that each syscall, interrupt
+ or exception flushes the TLB.
+ h. INVPCID is a TLB-flushing instruction which allows flushing
+ of TLB entries for non-current PCIDs. Some systems support
+ PCIDs, but do not support INVPCID. On these systems, addresses
+ can only be flushed from the TLB for the current PCID. When
+ flushing a kernel address, we need to flush all PCIDs, so a
+ single kernel address flush will require a TLB-flushing CR3
+ write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+ unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+ boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+ (excluding MPX and protection_keys) in a loop on multiple CPUs for
+ several minutes. These tests frequently uncover corner cases in the
+ kernel entry code. In general, old kernels might cause these tests
+ themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+ frequent performance monitoring non-maskable interrupts (see "NMI"
+ in /proc/interrupts). This exercises the NMI entry/exit code which
+ is known to trigger bugs in code paths that did not expect to be
+ interrupted, including nested NMIs. Using "-c" boosts the rate of
+ NMIs, and using two -c with separate counters encourages nested NMIs
+ and less deterministic behavior.
+
+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+ This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code. Usually a bug in one of the
+ more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup. Bugs
+ in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
+ like screwing up a page table switch. Also caused by
+ incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI. The NMI code is separate from main
+ interrupt handlers and can have bugs that do not affect
+ normal interrupts. Also caused by incorrectly mapping NMI
+ code. NMIs that interrupt the entry code must be very
+ careful and can be the cause of crashes that show up when
+ running perf.
+ * Kernel crashes at the first exit to userspace. entry_64.S
+ bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+ in entry_64.S that return to userspace are sometimes separate
+ from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+ faults upon page faults. Caused by touching non-pti-mapped
+ data in the entry code, or forgetting to switch to kernel
+ CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+ as mount(8) failing to mount the rootfs. These have
+ tended to be TLB invalidation issues. Usually invalidating
+ the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
diff --git a/MAINTAINERS b/MAINTAINERS
index e86ddedc813c..b88e249026a1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5986,6 +5986,20 @@ S: Maintained
F: Documentation/hwmon/k8temp
F: drivers/hwmon/k8temp.c
+KASAN
+M: Andrey Ryabinin <aryabinin@virtuozzo.com>
+R: Alexander Potapenko <glider@google.com>
+R: Dmitry Vyukov <dvyukov@google.com>
+L: kasan-dev@googlegroups.com
+S: Maintained
+F: arch/*/include/asm/kasan.h
+F: arch/*/mm/kasan_init*
+F: Documentation/kasan.txt
+F: include/linux/kasan*.h
+F: lib/test_kasan.c
+F: mm/kasan/
+F: scripts/Makefile.kasan
+
KCONFIG
M: "Yann E. MORIN" <yann.morin.1998@free.fr>
L: linux-kbuild@vger.kernel.org
diff --git a/Makefile b/Makefile
index a785aeed4674..fdc4239287d4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
-SUBLEVEL = 103
+SUBLEVEL = 112
EXTRAVERSION =
NAME = Blurry Fish Butt
@@ -386,6 +386,7 @@ LDFLAGS_MODULE =
CFLAGS_KERNEL =
AFLAGS_KERNEL =
CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+CFLAGS_KCOV = -fsanitize-coverage=trace-pc
# Use USERINCLUDE when you must reference the UAPI directories only.
@@ -433,7 +434,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN
export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@ -715,6 +716,14 @@ endif
endif
KBUILD_CFLAGS += $(stackp-flag)
+ifdef CONFIG_KCOV
+ ifeq ($(call cc-option, $(CFLAGS_KCOV)),)
+ $(warning Cannot use CONFIG_KCOV: \
+ -fsanitize-coverage=trace-pc is not supported by compiler)
+ CFLAGS_KCOV =
+ endif
+endif
+
ifeq ($(cc-name),clang)
ifneq ($(CROSS_COMPILE),)
CLANG_TRIPLE ?= $(CROSS_COMPILE)
@@ -817,6 +826,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
+KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
+
# conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h
index 4c51c05333c6..4cafffa80e2c 100644
--- a/arch/alpha/include/asm/mmu_context.h
+++ b/arch/alpha/include/asm/mmu_context.h
@@ -7,6 +7,7 @@
* Copyright (C) 1996, Linus Torvalds
*/
+#include <linux/sched.h>
#include <asm/machvec.h>
#include <asm/compiler.h>
#include <asm-generic/mm_hooks.h>
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index d4d8df706efa..57387b567f34 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -673,6 +673,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
return 0;
__asm__ __volatile__(
+ " mov lp_count, %5 \n"
" lp 3f \n"
"1: ldb.ab %3, [%2, 1] \n"
" breq.d %3, 0, 3f \n"
@@ -689,8 +690,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
" .word 1b, 4b \n"
" .previous \n"
: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
- : "g"(-EFAULT), "l"(count)
- : "memory");
+ : "g"(-EFAULT), "r"(count)
+ : "lp_count", "lp_start", "lp_end", "memory");
return res;
}
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 89442e98a837..3af570517903 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -668,6 +668,7 @@
ti,non-removable;
bus-width = <4>;
cap-power-off-card;
+ keep-power-in-suspend;
pinctrl-names = "default";
pinctrl-0 = <&mmc2_pins>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index c2a03c740e79..02bd6312d1d9 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -227,6 +227,7 @@
device_type = "pci";
ranges = <0x81000000 0 0 0x03000 0 0x00010000
0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+ bus-range = <0x00 0xff>;
#interrupt-cells = <1>;
num-lanes = <1>;
ti,hwmods = "pcie1";
@@ -262,6 +263,7 @@
device_type = "pci";
ranges = <0x81000000 0 0 0x03000 0 0x00010000
0x82000000 0 0x30013000 0x13000 0 0xffed000>;
+ bus-range = <0x00 0xff>;
#interrupt-cells = <1>;
num-lanes = <1>;
ti,hwmods = "pcie2";
diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 5b0430041ec6..fec92cd36ae3 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -88,7 +88,7 @@
interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>;
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins &mmc1_cd>;
- cd-gpios = <&gpio4 31 IRQ_TYPE_LEVEL_LOW>; /* gpio127 */
+ cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio127 */
vmmc-supply = <&vmmc1>;
bus-width = <4>;
cap-power-off-card;
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index b2bc8e11471d..2c16d9e7c03c 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -512,4 +512,22 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#endif
.endm
+ .macro bug, msg, line
+#ifdef CONFIG_THUMB2_KERNEL
+1: .inst 0xde02
+#else
+1: .inst 0xe7f001f2
+#endif
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+ .pushsection .rodata.str, "aMS", %progbits, 1
+2: .asciz "\msg"
+ .popsection
+ .pushsection __bug_table, "aw"
+ .align 2
+ .word 1b, 2b
+ .hword \line
+ .popsection
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h
index 5abaf5bbd985..bf1991263d2d 100644
--- a/arch/arm/include/asm/exception.h
+++ b/arch/arm/include/asm/exception.h
@@ -7,7 +7,7 @@
#ifndef __ASM_ARM_EXCEPTION_H
#define __ASM_ARM_EXCEPTION_H
-#include <linux/ftrace.h>
+#include <linux/interrupt.h>
#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089fb44dc..48e6ee2d4f5c 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
@@ -209,6 +208,7 @@
#define HSR_EC_IABT_HYP (0x21)
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
+#define HSR_EC_MAX (0x3f)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 9b32f76bb0dd..10f662498eb7 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -61,6 +61,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
cpu_switch_mm(mm->pgd, mm);
}
+#ifndef MODULE
#define finish_arch_post_lock_switch \
finish_arch_post_lock_switch
static inline void finish_arch_post_lock_switch(void)
@@ -82,6 +83,7 @@ static inline void finish_arch_post_lock_switch(void)
preempt_enable_no_resched();
}
}
+#endif /* !MODULE */
#endif /* CONFIG_MMU */
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f555bb3664dc..683d9230984a 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -18,7 +18,6 @@ struct undef_hook {
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
extern char __irqentry_text_start[];
@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
- return 0;
-}
-#endif
static inline int in_exception_text(unsigned long ptr)
{
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 0d22ad206d52..6d243e830516 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -295,6 +295,8 @@
mov r2, sp
ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr
ldr lr, [r2, #\offset + S_PC]! @ get pc
+ tst r1, #PSR_I_BIT | 0x0f
+ bne 1f
msr spsr_cxsf, r1 @ save in spsr_svc
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
@ We must avoid clrex due to Cortex-A15 erratum #830321
@@ -309,6 +311,7 @@
@ after ldm {}^
add sp, sp, #\offset + S_FRAME_SIZE
movs pc, lr @ return & move spsr_svc into cpsr
+1: bug "Returning to usermode but unexpected PSR bits set?", \@
#elif defined(CONFIG_CPU_V7M)
@ V7M restore.
@ Note that we don't need to do clrex here as clearing the local
@@ -324,6 +327,8 @@
ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
ldr lr, [sp, #\offset + S_PC] @ get pc
add sp, sp, #\offset + S_SP
+ tst r1, #PSR_I_BIT | 0x0f
+ bne 1f
msr spsr_cxsf, r1 @ save in spsr_svc
@ We must avoid clrex due to Cortex-A15 erratum #830321
@@ -336,6 +341,7 @@
.endif
add sp, sp, #S_FRAME_SIZE - S_SP
movs pc, lr @ return & move spsr_svc into cpsr
+1: bug "Returning to usermode but unexpected PSR bits set?", \@
#endif /* !CONFIG_THUMB2_KERNEL */
.endm
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index be2ab6d3b91f..b2e234468cb5 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -105,6 +105,7 @@ SECTIONS
*(.exception.text)
__exception_text_end = .;
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 95f12b2ccdcb..f36b5b1acd1f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -100,7 +100,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+ hsr);
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
[HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
@@ -122,13 +134,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x\n",
- (unsigned int)kvm_vcpu_get_hsr(vcpu));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 3a10c9f1d0a4..387ee2a11e36 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -113,7 +113,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
- data);
+ &data);
data = vcpu_data_host_to_guest(vcpu, data, len);
vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
}
@@ -189,14 +189,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
len);
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
mmio_write_buf(data_buf, len, data);
ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
} else {
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
- fault_ipa, 0);
+ fault_ipa, NULL);
ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c
index 7b02ed218a42..0c120b2ea2f9 100644
--- a/arch/arm/mach-omap1/dma.c
+++ b/arch/arm/mach-omap1/dma.c
@@ -31,7 +31,6 @@
#include "soc.h"
#define OMAP1_DMA_BASE (0xfffed800)
-#define OMAP1_LOGICAL_DMA_CH_COUNT 17
static u32 enable_1510_mode;
@@ -311,8 +310,6 @@ static int __init omap1_system_dma_init(void)
goto exit_iounmap;
}
- d->lch_count = OMAP1_LOGICAL_DMA_CH_COUNT;
-
/* Valid attributes for omap1 plus processors */
if (cpu_is_omap15xx())
d->dev_caps = ENABLE_1510_MODE;
@@ -329,13 +326,14 @@ static int __init omap1_system_dma_init(void)
d->dev_caps |= CLEAR_CSR_ON_READ;
d->dev_caps |= IS_WORD_16;
- if (cpu_is_omap15xx())
- d->chan_count = 9;
- else if (cpu_is_omap16xx() || cpu_is_omap7xx()) {
- if (!(d->dev_caps & ENABLE_1510_MODE))
- d->chan_count = 16;
+ /* available logical channels */
+ if (cpu_is_omap15xx()) {
+ d->lch_count = 9;
+ } else {
+ if (d->dev_caps & ENABLE_1510_MODE)
+ d->lch_count = 9;
else
- d->chan_count = 9;
+ d->lch_count = 16;
}
p = dma_plat_info;
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 8633c703546a..2944af820558 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr)
return ret;
}
-void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
+int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
{
int err;
struct device *dev = &gpmc_onenand_device.dev;
@@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
if (err < 0) {
dev_err(dev, "Cannot request GPMC CS %d, error %d\n",
gpmc_onenand_data->cs, err);
- return;
+ return err;
}
gpmc_onenand_resource.end = gpmc_onenand_resource.start +
ONENAND_IO_SIZE - 1;
- if (platform_device_register(&gpmc_onenand_device) < 0) {
+ err = platform_device_register(&gpmc_onenand_device);
+ if (err) {
dev_err(dev, "Unable to register OneNAND device\n");
gpmc_cs_free(gpmc_onenand_data->cs);
- return;
}
+
+ return err;
}
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 131f8967589b..13e22a4a5a20 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3885,16 +3885,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = {
* Return: 0 if device named @dev_name is not likely to be accessible,
* or 1 if it is likely to be accessible.
*/
-static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
- const char *dev_name)
+static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
+ const char *dev_name)
{
+ struct device_node *node;
+ bool available;
+
if (!bus)
- return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;
+ return omap_type() == OMAP2_DEVICE_TYPE_GP;
- if (of_device_is_available(of_find_node_by_name(bus, dev_name)))
- return 1;
+ node = of_get_child_by_name(bus, dev_name);
+ available = of_device_is_available(node);
+ of_node_put(node);
- return 0;
+ return available;
}
int __init omap3xxx_hwmod_init(void)
@@ -3963,15 +3967,20 @@ int __init omap3xxx_hwmod_init(void)
if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {
r = omap_hwmod_register_links(h_sham);
- if (r < 0)
+ if (r < 0) {
+ of_node_put(bus);
return r;
+ }
}
if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {
r = omap_hwmod_register_links(h_aes);
- if (r < 0)
+ if (r < 0) {
+ of_node_put(bus);
return r;
+ }
}
+ of_node_put(bus);
/*
* Register hwmod links specific to certain ES levels of a
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5d497f3c5924..d539dee3c78d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -753,13 +753,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
}
+/*
+ * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
+ * that the intention is to allow exporting memory allocated via the
+ * coherent DMA APIs through the dma_buf API, which only accepts a
+ * scattertable. This presents a couple of problems:
+ * 1. Not all memory allocated via the coherent DMA APIs is backed by
+ * a struct page
+ * 2. Passing coherent DMA memory into the streaming APIs is not allowed
+ * as we will try to flush the memory through a different alias to that
+ * actually being used (and the flushes are redundant.)
+ */
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
struct dma_attrs *attrs)
{
- struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+ unsigned long pfn = dma_to_pfn(dev, handle);
+ struct page *page;
int ret;
+ /* If the PFN is not valid, we do not have a struct page */
+ if (!pfn_valid(pfn))
+ return -ENXIO;
+
+ page = pfn_to_page(pfn);
+
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (unlikely(ret))
return ret;
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a4ec240ee7ba..3eb018fa1a1f 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -433,6 +433,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+ kprobe_opcode_t *correct_ret_addr = NULL;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
@@ -455,14 +456,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
/* another task is sharing our hash bucket */
continue;
+ orig_ret_address = (unsigned long)ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__this_cpu_write(current_kprobe, &ri->rp->kp);
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
__this_cpu_write(current_kprobe, NULL);
}
- orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
@@ -474,7 +495,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
break;
}
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index 9775de22e2ff..a48354de1aa1 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -976,7 +976,10 @@ static void coverage_end(void)
void __naked __kprobes_test_case_start(void)
{
__asm__ __volatile__ (
- "stmdb sp!, {r4-r11} \n\t"
+ "mov r2, sp \n\t"
+ "bic r3, r2, #7 \n\t"
+ "mov sp, r3 \n\t"
+ "stmdb sp!, {r2-r11} \n\t"
"sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
"bic r0, lr, #1 @ r0 = inline data \n\t"
"mov r1, sp \n\t"
@@ -996,7 +999,8 @@ void __naked __kprobes_test_case_end_32(void)
"movne pc, r0 \n\t"
"mov r0, r4 \n\t"
"add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
- "ldmia sp!, {r4-r11} \n\t"
+ "ldmia sp!, {r2-r11} \n\t"
+ "mov sp, r2 \n\t"
"mov pc, r0 \n\t"
);
}
@@ -1012,7 +1016,8 @@ void __naked __kprobes_test_case_end_16(void)
"bxne r0 \n\t"
"mov r0, r4 \n\t"
"add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
- "ldmia sp!, {r4-r11} \n\t"
+ "ldmia sp!, {r2-r11} \n\t"
+ "mov sp, r2 \n\t"
"bx r0 \n\t"
);
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 45f77ff5ad14..194b529b984b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -687,6 +687,18 @@ config FORCE_MAX_ZONEORDER
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
+config UNMAP_KERNEL_AT_EL0
+ bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ be used to bypass MMU permission checks and leak kernel data to
+ userspace. This can be defended against by unmapping the kernel
+ when running in userspace, mapping it back in on exception entry
+ via a trampoline page in the vector table.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index e450bb6d21bd..5f6c8345c0e6 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -398,17 +398,4 @@ alternative_endif
mrs \rd, sp_el0
.endm
-/*
- * Errata workaround post TTBR0_EL1 update.
- */
- .macro post_ttbr0_update_workaround
-#ifdef CONFIG_CAVIUM_ERRATUM_27456
-alternative_if ARM64_WORKAROUND_CAVIUM_27456
- ic iallu
- dsb nsh
- isb
-alternative_else_nop_endif
-#endif
- .endm
-
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8e1f826caf99..4980a9789e1b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -36,7 +36,8 @@
#define ARM64_WORKAROUND_CAVIUM_27456 11
#define ARM64_HAS_VIRT_HOST_EXTN 12
-#define ARM64_NCAPS 13
+#define ARM64_UNMAP_KERNEL_AT_EL0 23
+#define ARM64_NCAPS 24
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 932f5a56d1a6..48e317e4a32d 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -76,12 +76,14 @@ static inline void efi_set_pgd(struct mm_struct *mm)
if (mm != current->active_mm) {
/*
* Update the current thread's saved ttbr0 since it is
- * restored as part of a return from exception. Set
- * the hardware TTBR0_EL1 using cpu_switch_mm()
- * directly to enable potential errata workarounds.
+ * restored as part of a return from exception. Enable
+ * access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from
+ * exception when invoking the EFI run-time services.
*/
update_saved_ttbr0(current, mm);
- cpu_switch_mm(mm->pgd, mm);
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
} else {
/*
* Defer the switch to the current thread's TTBR0_EL1
@@ -89,7 +91,7 @@ static inline void efi_set_pgd(struct mm_struct *mm)
* thread's saved ttbr0 corresponding to its active_mm
* (if different from init_mm).
*/
- cpu_set_reserved_ttbr0();
+ uaccess_ttbr0_disable();
if (current->active_mm != &init_mm)
update_saved_ttbr0(current, current->active_mm);
}
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 6cb7e1a6bc02..0c2eec490abf 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -18,7 +18,7 @@
#ifndef __ASM_EXCEPTION_H
#define __ASM_EXCEPTION_H
-#include <linux/ftrace.h>
+#include <linux/interrupt.h>
#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 1a617d46fce9..03a1e908b8e9 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -50,6 +50,11 @@ enum fixed_addresses {
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ FIX_ENTRY_TRAMP_DATA,
+ FIX_ENTRY_TRAMP_TEXT,
+#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7803343e5881..77a27af01371 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -78,8 +78,16 @@
/*
* Initial memory map attributes.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG)
+#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
+#else
+#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS
+#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS
+#endif
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8b709f53f874..6a889e943f4e 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -154,8 +154,7 @@
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 320dc9c7e4f4..c6aae0b85cef 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -266,7 +266,7 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
}
-#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
+#define kvm_virt_to_phys(x) __pa_symbol(x)
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index ae11e8fdbfd2..279978e1a070 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -148,6 +148,11 @@ extern u64 kimage_vaddr;
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
+static inline unsigned long kaslr_offset(void)
+{
+ return kimage_vaddr - KIMAGE_VADDR;
+}
+
/*
* Allow all memory at the discovery stage. We will clip it later.
*/
@@ -188,6 +193,7 @@ static inline void *phys_to_virt(phys_addr_t x)
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x))
+#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
/*
* virt_to_page(k) convert a _valid_ virtual address to struct page *
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 5472251c8e6c..f7448b6afe58 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -16,6 +16,11 @@
#ifndef __ASM_MMU_H
#define __ASM_MMU_H
+#define USER_ASID_FLAG (UL(1) << 48)
+#define TTBR_ASID_MASK (UL(0xffff) << 48)
+
+#ifndef __ASSEMBLY__
+
typedef struct {
atomic64_t id;
void *vdso;
@@ -28,6 +33,12 @@ typedef struct {
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
+static inline bool arm64_kernel_unmapped_at_el0(void)
+{
+ return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
+ cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0);
+}
+
extern void paging_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
@@ -36,4 +47,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
pgprot_t prot, bool allow_block_mappings);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
+#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index e53d30c6f779..8848c49ac206 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -50,7 +50,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
*/
static inline void cpu_set_reserved_ttbr0(void)
{
- unsigned long ttbr = virt_to_phys(empty_zero_page);
+ unsigned long ttbr = __pa_symbol(empty_zero_page);
asm(
" msr ttbr0_el1, %0 // set TTBR0\n"
@@ -59,6 +59,13 @@ static inline void cpu_set_reserved_ttbr0(void)
: "r" (ttbr));
}
+static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUG_ON(pgd == swapper_pg_dir);
+ cpu_set_reserved_ttbr0();
+ cpu_do_switch_mm(virt_to_phys(pgd),mm);
+}
+
/*
* TCR.T0SZ value to use when the ID map is active. Usually equals
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
@@ -124,7 +131,7 @@ static inline void cpu_install_idmap(void)
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(idmap_pg_dir, &init_mm);
+ cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
@@ -139,7 +146,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgd)
phys_addr_t pgd_phys = virt_to_phys(pgd);
- replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+ replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
cpu_install_idmap();
replace_phys(pgd_phys);
@@ -179,9 +186,10 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
{
if (system_uses_ttbr0_pan()) {
+ u64 ttbr;
BUG_ON(mm->pgd == swapper_pg_dir);
- task_thread_info(tsk)->ttbr0 =
- virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
}
}
#else
@@ -228,4 +236,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
#define deactivate_mm(tsk,mm) do { } while (0)
#define activate_mm(prev,next) switch_mm(prev, next, current)
+void post_ttbr_update_workaround(void);
+
#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 9786f770088d..d7890c0f2d3d 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -224,6 +224,8 @@
#define TCR_TG1_16K (UL(1) << 30)
#define TCR_TG1_4K (UL(2) << 30)
#define TCR_TG1_64K (UL(3) << 30)
+
+#define TCR_A1 (UL(1) << 22)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 49fec68238c7..b157df1b7fe8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -61,8 +61,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pud_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG)
+#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG)
+#else
+#define PROT_DEFAULT _PROT_DEFAULT
+#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -75,6 +83,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG)
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
@@ -82,13 +91,13 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
-#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
@@ -119,7 +128,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
+#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
@@ -716,6 +725,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 14ad6e4e87d1..16cef2e8449e 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
#include <asm/memory.h>
-#define cpu_switch_mm(pgd,mm) \
-do { \
- BUG_ON(pgd == swapper_pg_dir); \
- cpu_do_switch_mm(virt_to_phys(pgd),mm); \
-} while (0)
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index b460ae28e346..ad6bd8b26ada 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -23,6 +23,30 @@
#include <linux/sched.h>
#include <asm/cputype.h>
+#include <asm/mmu.h>
+
+/*
+ * Raw TLBI operations.
+ *
+ * Where necessary, use the __tlbi() macro to avoid asm()
+ * boilerplate. Drivers and most kernel code should use the TLB
+ * management routines in preference to the macro below.
+ *
+ * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
+ * on whether a particular TLBI operation takes an argument or
+ * not. The macros handles invoking the asm with or without the
+ * register argument as appropriate.
+ */
+#define __TLBI_0(op, arg) asm ("tlbi " #op)
+#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg))
+#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
+
+#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
+
+#define __tlbi_user(op, arg) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi(op, (arg) | USER_ASID_FLAG); \
+} while (0)
/*
* TLB Management
@@ -66,7 +90,7 @@
static inline void local_flush_tlb_all(void)
{
dsb(nshst);
- asm("tlbi vmalle1");
+ __tlbi(vmalle1);
dsb(nsh);
isb();
}
@@ -74,7 +98,7 @@ static inline void local_flush_tlb_all(void)
static inline void flush_tlb_all(void)
{
dsb(ishst);
- asm("tlbi vmalle1is");
+ __tlbi(vmalle1is);
dsb(ish);
isb();
}
@@ -84,7 +108,8 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
unsigned long asid = ASID(mm) << 48;
dsb(ishst);
- asm("tlbi aside1is, %0" : : "r" (asid));
+ __tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
dsb(ish);
}
@@ -94,7 +119,8 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
dsb(ishst);
- asm("tlbi vale1is, %0" : : "r" (addr));
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
dsb(ish);
}
@@ -121,10 +147,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
- if (last_level)
- asm("tlbi vale1is, %0" : : "r"(addr));
- else
- asm("tlbi vae1is, %0" : : "r"(addr));
+ if (last_level) {
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+ } else {
+ __tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
+ }
}
dsb(ish);
}
@@ -149,7 +178,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
- asm("tlbi vaae1is, %0" : : "r"(addr));
+ __tlbi(vaae1is, addr);
dsb(ish);
isb();
}
@@ -163,7 +192,8 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
{
unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
- asm("tlbi vae1is, %0" : : "r" (addr));
+ __tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
dsb(ish);
}
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index ddee92bdb584..76e22376dcf6 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -35,18 +35,11 @@ struct undef_hook {
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
- return 0;
-}
-#endif
static inline int in_exception_text(unsigned long ptr)
{
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 064cef9ae2d1..d39d8bde42d7 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -20,6 +20,7 @@
#include <asm/alternative.h>
#include <asm/kernel-pgtable.h>
+#include <asm/mmu.h>
#include <asm/sysreg.h>
#ifndef __ASSEMBLY__
@@ -142,17 +143,23 @@ static inline void set_fs(mm_segment_t fs)
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void __uaccess_ttbr0_disable(void)
{
- unsigned long ttbr;
+ unsigned long flags, ttbr;
+ local_irq_save(flags);
+ ttbr = read_sysreg(ttbr1_el1);
+ ttbr &= ~TTBR_ASID_MASK;
/* reserved_ttbr0 placed at the end of swapper_pg_dir */
- ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
- write_sysreg(ttbr, ttbr0_el1);
+ write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1);
+ isb();
+ /* Set reserved ASID */
+ write_sysreg(ttbr, ttbr1_el1);
isb();
+ local_irq_restore(flags);
}
static inline void __uaccess_ttbr0_enable(void)
{
- unsigned long flags;
+ unsigned long flags, ttbr0, ttbr1;
/*
* Disable interrupts to avoid preemption between reading the 'ttbr0'
@@ -160,7 +167,17 @@ static inline void __uaccess_ttbr0_enable(void)
* roll-over and an update of 'ttbr0'.
*/
local_irq_save(flags);
- write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
+ ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
+
+ /* Restore active ASID */
+ ttbr1 = read_sysreg(ttbr1_el1);
+ ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
+ ttbr1 |= ttbr0 & TTBR_ASID_MASK;
+ write_sysreg(ttbr1, ttbr1_el1);
+ isb();
+
+ /* Restore user page table */
+ write_sysreg(ttbr0, ttbr0_el1);
isb();
local_irq_restore(flags);
}
@@ -439,51 +456,62 @@ extern __must_check long strnlen_user(const char __user *str, long n);
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
+ bic \tmp1, \tmp1, #TTBR_ASID_MASK
add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
+ sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE
+ msr ttbr1_el1, \tmp1 // set reserved ASID
+ isb
.endm
- .macro __uaccess_ttbr0_enable, tmp1
+ .macro __uaccess_ttbr0_enable, tmp1, tmp2
get_thread_info \tmp1
ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
+ mrs \tmp2, ttbr1_el1
+ extr \tmp2, \tmp2, \tmp1, #48
+ ror \tmp2, \tmp2, #16
+ msr ttbr1_el1, \tmp2 // set the active ASID
+ isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
- .macro uaccess_ttbr0_disable, tmp1
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp2 // avoid preemption
__uaccess_ttbr0_disable \tmp1
+ restore_irq \tmp2
alternative_else_nop_endif
.endm
- .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
alternative_if_not ARM64_HAS_PAN
- save_and_disable_irq \tmp2 // avoid preemption
- __uaccess_ttbr0_enable \tmp1
- restore_irq \tmp2
+ save_and_disable_irq \tmp3 // avoid preemption
+ __uaccess_ttbr0_enable \tmp1, \tmp2
+ restore_irq \tmp3
alternative_else_nop_endif
.endm
#else
- .macro uaccess_ttbr0_disable, tmp1
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
.endm
- .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
.endm
#endif
/*
* These macros are no-ops when UAO is present.
*/
- .macro uaccess_disable_not_uao, tmp1
- uaccess_ttbr0_disable \tmp1
+ .macro uaccess_disable_not_uao, tmp1, tmp2
+ uaccess_ttbr0_disable \tmp1, \tmp2
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(1)
alternative_else_nop_endif
.endm
- .macro uaccess_enable_not_uao, tmp1, tmp2
- uaccess_ttbr0_enable \tmp1, \tmp2
+ .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
+ uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(0)
alternative_else_nop_endif
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index 4b1e5a7a98da..89c96bd1aab9 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/acpi.h>
+#include <linux/mm.h>
#include <linux/types.h>
#include <asm/cpu_ops.h>
@@ -102,7 +103,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
* that read this address need to convert this address to the
* Boot-Loader's endianness before jumping.
*/
- writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+ writeq_relaxed(__pa_symbol(secondary_entry), &mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
arch_send_wakeup_ipi_mask(cpumask_of(cpu));
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 67ebe708e30c..686999146c16 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
#include <linux/suspend.h>
+#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -161,5 +162,9 @@ int main(void)
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
+ BLANK();
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ DEFINE(TRAMP_VALIAS, TRAMP_VALIAS);
+#endif
return 0;
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 53fab76d3c39..9bc9847072b6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -23,6 +23,7 @@
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/types.h>
+#include <linux/mm.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
@@ -655,6 +656,39 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
return is_kernel_in_hyp_mode();
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
+
+static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry)
+{
+ /* Forced on command line? */
+ if (__kpti_forced) {
+ pr_info_once("kernel page table isolation forced %s by command line option\n",
+ __kpti_forced > 0 ? "ON" : "OFF");
+ return __kpti_forced > 0;
+ }
+
+ /* Useful for KASLR robustness */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return true;
+
+ return false;
+}
+
+static int __init parse_kpti(char *str)
+{
+ bool enabled;
+ int ret = strtobool(str, &enabled);
+
+ if (ret)
+ return ret;
+
+ __kpti_forced = enabled ? 1 : -1;
+ return 0;
+}
+__setup("kpti=", parse_kpti);
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -712,6 +746,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.matches = runs_at_el2,
},
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ {
+ .capability = ARM64_UNMAP_KERNEL_AT_EL0,
+ .matches = unmap_kernel_at_el0,
+ },
+#endif
{},
};
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index fb4608e10f1d..e60e461be4e7 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,6 +29,7 @@
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
+#include <asm/mmu.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/uaccess.h>
@@ -70,8 +71,31 @@
#define BAD_FIQ 2
#define BAD_ERROR 3
- .macro kernel_entry, el, regsize = 64
+ .macro kernel_ventry, el, label, regsize = 64
+ .align 7
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+ .if \el == 0
+ .if \regsize == 64
+ mrs x30, tpidrro_el0
+ msr tpidrro_el0, xzr
+ .else
+ mov x30, xzr
+ .endif
+ .endif
+alternative_else_nop_endif
+#endif
+
sub sp, sp, #S_FRAME_SIZE
+ b el\()\el\()_\label
+ .endm
+
+ .macro tramp_alias, dst, sym
+ mov_q \dst, TRAMP_VALIAS
+ add \dst, \dst, #(\sym - .entry.tramp.text)
+ .endm
+
+ .macro kernel_entry, el, regsize = 64
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
@@ -140,7 +164,7 @@ alternative_else_nop_endif
.if \el != 0
mrs x21, ttbr0_el1
- tst x21, #0xffff << 48 // Check for the reserved ASID
+ tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
b.eq 1f // TTBR0 access already disabled
and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
@@ -205,7 +229,7 @@ alternative_else_nop_endif
tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
.endif
- __uaccess_ttbr0_enable x0
+ __uaccess_ttbr0_enable x0, x1
.if \el == 0
/*
@@ -214,7 +238,7 @@ alternative_else_nop_endif
* Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
* corruption).
*/
- post_ttbr0_update_workaround
+ bl post_ttbr_update_workaround
.endif
1:
.if \el != 0
@@ -226,24 +250,20 @@ alternative_else_nop_endif
.if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
+ tst x22, #PSR_MODE32_BIT // native task?
+ b.eq 3f
+
#ifdef CONFIG_ARM64_ERRATUM_845719
-alternative_if_not ARM64_WORKAROUND_845719
- nop
- nop
-#ifdef CONFIG_PID_IN_CONTEXTIDR
- nop
-#endif
-alternative_else
- tbz x22, #4, 1f
+alternative_if ARM64_WORKAROUND_845719
#ifdef CONFIG_PID_IN_CONTEXTIDR
mrs x29, contextidr_el1
msr contextidr_el1, x29
#else
msr contextidr_el1, xzr
#endif
-1:
-alternative_endif
+alternative_else_nop_endif
#endif
+3:
.endif
msr elr_el1, x21 // set up the return data
@@ -265,7 +285,21 @@ alternative_endif
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
- eret // return to kernel
+
+ .if \el == 0
+alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ bne 4f
+ msr far_el1, x30
+ tramp_alias x30, tramp_exit_native
+ br x30
+4:
+ tramp_alias x30, tramp_exit_compat
+ br x30
+#endif
+ .else
+ eret
+ .endif
.endm
.macro irq_stack_entry
@@ -343,31 +377,31 @@ tsk .req x28 // current thread_info
.align 11
ENTRY(vectors)
- ventry el1_sync_invalid // Synchronous EL1t
- ventry el1_irq_invalid // IRQ EL1t
- ventry el1_fiq_invalid // FIQ EL1t
- ventry el1_error_invalid // Error EL1t
+ kernel_ventry 1, sync_invalid // Synchronous EL1t
+ kernel_ventry 1, irq_invalid // IRQ EL1t
+ kernel_ventry 1, fiq_invalid // FIQ EL1t
+ kernel_ventry 1, error_invalid // Error EL1t
- ventry el1_sync // Synchronous EL1h
- ventry el1_irq // IRQ EL1h
- ventry el1_fiq_invalid // FIQ EL1h
- ventry el1_error_invalid // Error EL1h
+ kernel_ventry 1, sync // Synchronous EL1h
+ kernel_ventry 1, irq // IRQ EL1h
+ kernel_ventry 1, fiq_invalid // FIQ EL1h
+ kernel_ventry 1, error_invalid // Error EL1h
- ventry el0_sync // Synchronous 64-bit EL0
- ventry el0_irq // IRQ 64-bit EL0
- ventry el0_fiq_invalid // FIQ 64-bit EL0
- ventry el0_error_invalid // Error 64-bit EL0
+ kernel_ventry 0, sync // Synchronous 64-bit EL0
+ kernel_ventry 0, irq // IRQ 64-bit EL0
+ kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
+ kernel_ventry 0, error_invalid // Error 64-bit EL0
#ifdef CONFIG_COMPAT
- ventry el0_sync_compat // Synchronous 32-bit EL0
- ventry el0_irq_compat // IRQ 32-bit EL0
- ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
- ventry el0_error_invalid_compat // Error 32-bit EL0
+ kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
+ kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
+ kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, error_invalid_compat, 32 // Error 32-bit EL0
#else
- ventry el0_sync_invalid // Synchronous 32-bit EL0
- ventry el0_irq_invalid // IRQ 32-bit EL0
- ventry el0_fiq_invalid // FIQ 32-bit EL0
- ventry el0_error_invalid // Error 32-bit EL0
+ kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
+ kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
+ kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
END(vectors)
@@ -920,6 +954,119 @@ __ni_sys_trace:
.popsection // .entry.text
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ */
+ .pushsection ".entry.tramp.text", "ax"
+
+ .macro tramp_map_kernel, tmp
+ mrs \tmp, ttbr1_el1
+ sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ bic \tmp, \tmp, #USER_ASID_FLAG
+ msr ttbr1_el1, \tmp
+#ifdef CONFIG_ARCH_MSM8996
+ /* ASID already in \tmp[63:48] */
+ movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
+ movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
+ /* 2MB boundary containing the vectors, so we nobble the walk cache */
+ movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
+ isb
+ tlbi vae1, \tmp
+ dsb nsh
+#endif /* CONFIG_ARCH_MSM8996 */
+ .endm
+
+ .macro tramp_unmap_kernel, tmp
+ mrs \tmp, ttbr1_el1
+ add \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ orr \tmp, \tmp, #USER_ASID_FLAG
+ msr ttbr1_el1, \tmp
+ /*
+ * We avoid running the post_ttbr_update_workaround here because the
+ * user and kernel ASIDs don't have conflicting mappings, so any
+ * "blessing" as described in:
+ *
+ * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com
+ *
+ * will not hurt correctness. Whilst this may partially defeat the
+ * point of using split ASIDs in the first place, it avoids
+ * the hit of invalidating the entire I-cache on every return to
+ * userspace.
+ */
+ .endm
+
+ .macro tramp_ventry, regsize = 64
+ .align 7
+1:
+ .if \regsize == 64
+ msr tpidrro_el0, x30 // Restored in kernel_ventry
+ .endif
+ bl 2f
+ b .
+2:
+ tramp_map_kernel x30
+#ifdef CONFIG_RANDOMIZE_BASE
+ adr x30, tramp_vectors + PAGE_SIZE
+#ifndef CONFIG_ARCH_MSM8996
+ isb
+#endif
+ ldr x30, [x30]
+#else
+ ldr x30, =vectors
+#endif
+ prfm plil1strm, [x30, #(1b - tramp_vectors)]
+ msr vbar_el1, x30
+ add x30, x30, #(1b - tramp_vectors)
+ isb
+ ret
+ .endm
+
+ .macro tramp_exit, regsize = 64
+ adr x30, tramp_vectors
+ msr vbar_el1, x30
+ tramp_unmap_kernel x30
+ .if \regsize == 64
+ mrs x30, far_el1
+ .endif
+ eret
+ .endm
+
+ .align 11
+ENTRY(tramp_vectors)
+ .space 0x400
+
+ tramp_ventry
+ tramp_ventry
+ tramp_ventry
+ tramp_ventry
+
+ tramp_ventry 32
+ tramp_ventry 32
+ tramp_ventry 32
+ tramp_ventry 32
+END(tramp_vectors)
+
+ENTRY(tramp_exit_native)
+ tramp_exit
+END(tramp_exit_native)
+
+ENTRY(tramp_exit_compat)
+ tramp_exit 32
+END(tramp_exit_compat)
+
+ .ltorg
+ .popsection // .entry.tramp.text
+#ifdef CONFIG_RANDOMIZE_BASE
+ .pushsection ".rodata", "a"
+ .align PAGE_SHIFT
+ .globl __entry_tramp_data_start
+__entry_tramp_data_start:
+ .quad vectors
+ .popsection // .rodata
+#endif /* CONFIG_RANDOMIZE_BASE */
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
/*
* Special system call wrappers.
*/
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 750f422f3f2c..3c0897ca8079 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -97,7 +97,7 @@ static void __kprobes *patch_map(void *addr, int fixmap)
if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
page = vmalloc_to_page(addr);
else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
- page = virt_to_page(addr);
+ page = phys_to_page(__pa_symbol(addr));
else
return addr;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fc20a5aa9a73..cb73cf4cd496 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -324,6 +324,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
+ /*
+ * In case p was allocated the same task_struct pointer as some
+ * other recently-exited task, make sure p is disassociated from
+ * any cpu that may have run that now-exited task recently.
+ * Otherwise we could erroneously skip reloading the FPSIMD
+ * registers for p.
+ */
+ fpsimd_flush_task_state(p);
+
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
@@ -369,19 +378,17 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
static void tls_thread_switch(struct task_struct *next)
{
- unsigned long tpidr, tpidrro;
+ unsigned long tpidr;
asm("mrs %0, tpidr_el0" : "=r" (tpidr));
*task_user_tls(current) = tpidr;
- tpidr = *task_user_tls(next);
- tpidrro = is_compat_thread(task_thread_info(next)) ?
- next->thread.tp_value : 0;
+ if (is_compat_thread(task_thread_info(next)))
+ write_sysreg(next->thread.tp_value, tpidrro_el0);
+ else if (!arm64_kernel_unmapped_at_el0())
+ write_sysreg(0, tpidrro_el0);
- asm(
- " msr tpidr_el0, %0\n"
- " msr tpidrro_el0, %1"
- : : "r" (tpidr), "r" (tpidrro));
+ write_sysreg(*task_user_tls(next), tpidr_el0);
}
/* Restore the UAO state depending on next's addr_limit */
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 42816bebb1e0..665ae9b29e9a 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -19,6 +19,7 @@
#include <linux/of.h>
#include <linux/smp.h>
#include <linux/delay.h>
+#include <linux/mm.h>
#include <linux/psci.h>
#include <uapi/linux/psci.h>
@@ -45,7 +46,8 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu)
static int cpu_psci_cpu_boot(unsigned int cpu)
{
- int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+ int err = psci_ops.cpu_on(cpu_logical_map(cpu),
+ __pa_symbol(secondary_entry));
if (err)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f1fae3cbc4ca..e29a62deeb6c 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -43,6 +43,7 @@
#include <linux/of_platform.h>
#include <linux/efi.h>
#include <linux/psci.h>
+#include <linux/mm.h>
#include <asm/acpi.h>
#include <asm/fixmap.h>
@@ -199,10 +200,10 @@ static void __init request_standard_resources(void)
struct memblock_region *region;
struct resource *res;
- kernel_code.start = virt_to_phys(_text);
- kernel_code.end = virt_to_phys(__init_begin - 1);
- kernel_data.start = virt_to_phys(_sdata);
- kernel_data.end = virt_to_phys(_end - 1);
+ kernel_code.start = __pa_symbol(_text);
+ kernel_code.end = __pa_symbol(__init_begin - 1);
+ kernel_data.start = __pa_symbol(_sdata);
+ kernel_data.end = __pa_symbol(_end - 1);
for_each_memblock(memory, region) {
res = alloc_bootmem_low(sizeof(*res));
@@ -358,9 +359,9 @@ void __init setup_arch(char **cmdline_p)
* thread.
*/
#ifdef CONFIG_THREAD_INFO_IN_TASK
- init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+ init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
#else
- init_thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+ init_thread_info.ttbr0 = __pa_symbol(empty_zero_page);
#endif
#endif
@@ -412,11 +413,11 @@ subsys_initcall(topology_init);
static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
void *p)
{
- u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+ const unsigned long offset = kaslr_offset();
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
- pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
- kaslr_offset, KIMAGE_VADDR);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) {
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+ offset, KIMAGE_VADDR);
} else {
pr_emerg("Kernel Offset: disabled\n");
}
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 86cbf10946a6..34218337e7f0 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -132,7 +132,7 @@ ENTRY(_cpu_resume)
#ifdef CONFIG_KASAN
mov x0, sp
- bl kasan_unpoison_remaining_stack
+ bl kasan_unpoison_task_stack_below
#endif
ldp x19, x20, [x29, #16]
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 4b4b0ad6ebde..303d571702ea 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -21,6 +21,7 @@
#include <linux/of.h>
#include <linux/smp.h>
#include <linux/types.h>
+#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
@@ -97,7 +98,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
* boot-loader's endianess before jumping. This is mandated by
* the boot protocol.
*/
- writeq_relaxed(__pa(secondary_holding_pen), release_addr);
+ writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
__flush_dcache_area((__force void *)release_addr,
sizeof(*release_addr));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 5ebbd8600b0a..46fa4de29fb1 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -114,6 +114,7 @@ static struct vm_special_mapping vdso_spec[2];
static int __init vdso_init(void)
{
int i;
+ unsigned long pfn;
if (memcmp(&vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
@@ -131,11 +132,14 @@ static int __init vdso_init(void)
return -ENOMEM;
/* Grab the vDSO data page. */
- vdso_pagelist[0] = virt_to_page(vdso_data);
+ vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+
/* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(&vdso_start);
+
for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+ vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
/* Populate the special mapping structures */
vdso_spec[0] = (struct vm_special_mapping) {
@@ -214,10 +218,8 @@ void update_vsyscall(struct timekeeper *tk)
if (!use_syscall) {
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_time.tv_sec;
- vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec <<
- tk->tkr_raw.shift) +
- tk->tkr_raw.xtime_nsec;
+ vdso_data->raw_time_sec = tk->raw_sec;
+ vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 76320e920965..c39872a7b03c 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -309,7 +309,7 @@ ENTRY(__kernel_clock_getres)
b.ne 4f
ldr x2, 6f
2:
- cbz w1, 3f
+ cbz x1, 3f
stp xzr, x2, [x1]
3: /* res == NULL. */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index f2d34d4c0a6b..7af24e552aaa 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -56,6 +56,17 @@ jiffies = jiffies_64;
#define HIBERNATE_TEXT
#endif
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_TEXT \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
+ *(.entry.tramp.text) \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
+#else
+#define TRAMP_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -118,6 +129,7 @@ SECTIONS
*(.exception.text)
__exception_text_end = .;
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
@@ -126,6 +138,7 @@ SECTIONS
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
+ TRAMP_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
@@ -218,6 +231,11 @@ SECTIONS
. += RESERVED_TTBR0_SIZE;
#endif
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ tramp_pg_dir = .;
+ . += PAGE_SIZE;
+#endif
+
_end = .;
STABS_DEBUG
@@ -225,6 +243,10 @@ SECTIONS
HEAD_SYMBOLS
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ "Entry trampoline text too big")
+#endif
/*
* The HYP init code and ID map text can't be longer than a page each,
* and should not cross a page boundary.
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 25006a7a5316..e47f9bc71079 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -122,7 +122,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+ hsr, esr_get_class_string(hsr));
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
@@ -148,13 +160,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
u32 hsr = kvm_vcpu_get_hsr(vcpu);
u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT;
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
- hsr, esr_get_class_string(hsr));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index d7150e30438a..07c7ad97ee28 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -30,7 +30,7 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
- uaccess_enable_not_uao x2, x3
+ uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
- uaccess_disable_not_uao x2
+ uaccess_disable_not_uao x2, x3
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 90154f3f7f2a..683adc358be7 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -64,10 +64,10 @@
end .req x5
ENTRY(__arch_copy_from_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 718b1c4e2f85..e8bfaf19f778 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -65,10 +65,10 @@
end .req x5
ENTRY(__copy_in_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index e99e31c9acac..f6cfcc0441de 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -63,10 +63,10 @@
end .req x5
ENTRY(__arch_copy_to_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 3be2cda5dbda..e5091d9cceb6 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -49,7 +49,7 @@ ENTRY(flush_icache_range)
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
- uaccess_ttbr0_enable x2, x3
+ uaccess_ttbr0_enable x2, x3, x4
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
@@ -72,7 +72,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
isb
mov x0, #0
1:
- uaccess_ttbr0_disable x1
+ uaccess_ttbr0_disable x1, x2
ret
9:
mov x0, #-EFAULT
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 25128089c386..b3d4a7153617 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -38,7 +38,16 @@ static cpumask_t tlb_flush_pending;
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
-#define NUM_USER_ASIDS ASID_FIRST_VERSION
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
+#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
+#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK)
+#else
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
+#define asid2idx(asid) ((asid) & ~ASID_MASK)
+#define idx2asid(idx) asid2idx(idx)
+#endif
static void flush_context(unsigned int cpu)
{
@@ -65,7 +74,7 @@ static void flush_context(unsigned int cpu)
*/
if (asid == 0)
asid = per_cpu(reserved_asids, i);
- __set_bit(asid & ~ASID_MASK, asid_map);
+ __set_bit(asid2idx(asid), asid_map);
per_cpu(reserved_asids, i) = asid;
}
@@ -120,16 +129,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
- asid &= ~ASID_MASK;
- if (!__test_and_set_bit(asid, asid_map))
+ if (!__test_and_set_bit(asid2idx(asid), asid_map))
return newasid;
}
/*
* Allocate a free ASID. If we can't find one, take a note of the
- * currently active ASIDs and mark the TLBs as requiring flushes.
- * We always count from ASID #1, as we use ASID #0 when setting a
- * reserved TTBR0 for the init_mm.
+ * currently active ASIDs and mark the TLBs as requiring flushes. We
+ * always count from ASID #2 (index 1), as we use ASID #0 when setting
+ * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+ * pairs.
*/
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
@@ -146,7 +155,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
- return asid | generation;
+ return idx2asid(asid) | generation;
}
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
@@ -190,6 +199,15 @@ switch_mm_fastpath:
cpu_switch_mm(mm->pgd, mm);
}
+/* Errata workaround post TTBRx_EL1 update. */
+asmlinkage void post_ttbr_update_workaround(void)
+{
+ asm(ALTERNATIVE("nop; nop; nop",
+ "ic iallu; dsb nsh; isb",
+ ARM64_WORKAROUND_CAVIUM_27456,
+ CONFIG_CAVIUM_ERRATUM_27456));
+}
+
static int asids_init(void)
{
int fld = cpuid_feature_extract_field(read_cpuid(SYS_ID_AA64MMFR0_EL1), 4);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 74ac8a90ba3f..5ab1553e434b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -37,6 +37,7 @@
#include <linux/swiotlb.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
+#include <linux/mm.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -364,8 +365,8 @@ void __init arm64_memblock_init(void)
* linear mapping. Take care not to clip the kernel which may be
* high in memory.
*/
- memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
- ULLONG_MAX);
+ memblock_remove(max_t(u64, memstart_addr + linear_region_size,
+ __pa_symbol(_end)), ULLONG_MAX);
if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {
/* ensure that memstart_addr remains sufficiently aligned */
memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,
@@ -380,7 +381,7 @@ void __init arm64_memblock_init(void)
*/
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
memblock_enforce_memory_limit(memory_limit);
- memblock_add(__pa(_text), (u64)(_end - _text));
+ memblock_add(__pa_symbol(_text), (u64)(_end - _text));
}
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
@@ -404,7 +405,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
- memblock_reserve(__pa(_text), _end - _text);
+ memblock_reserve(__pa_symbol(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start) {
memblock_reserve(initrd_start, initrd_end - initrd_start);
@@ -427,6 +428,7 @@ void __init arm64_memblock_init(void)
reserve_elfcorehdr();
+ high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@@ -451,7 +453,6 @@ void __init bootmem_init(void)
sparse_init();
zone_sizes_init(min, max);
- high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
max_pfn = max_low_pfn = max;
}
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 757009daa9ed..03588d136f93 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/memblock.h>
#include <linux/start_kernel.h>
+#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/kernel-pgtable.h>
@@ -26,6 +27,13 @@
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+/*
+ * The p*d_populate functions call virt_to_phys implicitly so they can't be used
+ * directly on kernel symbols (bm_p*d). All the early functions are called too
+ * early to use lm_alias so __p*d_populate functions must be used to populate
+ * with the physical address from __pa_symbol.
+ */
+
static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long end)
{
@@ -33,12 +41,13 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long next;
if (pmd_none(*pmd))
- pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+ __pmd_populate(pmd, __pa_symbol(kasan_zero_pte),
+ PMD_TYPE_TABLE);
pte = pte_offset_kimg(pmd, addr);
do {
next = addr + PAGE_SIZE;
- set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+ set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page),
PAGE_KERNEL));
} while (pte++, addr = next, addr != end && pte_none(*pte));
}
@@ -51,7 +60,8 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
unsigned long next;
if (pud_none(*pud))
- pud_populate(&init_mm, pud, kasan_zero_pmd);
+ __pud_populate(pud, __pa_symbol(kasan_zero_pmd),
+ PMD_TYPE_TABLE);
pmd = pmd_offset_kimg(pud, addr);
do {
@@ -68,7 +78,8 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
unsigned long next;
if (pgd_none(*pgd))
- pgd_populate(&init_mm, pgd, kasan_zero_pud);
+ __pgd_populate(pgd, __pa_symbol(kasan_zero_pud),
+ PUD_TYPE_TABLE);
pud = pud_offset_kimg(pgd, addr);
do {
@@ -148,7 +159,7 @@ void __init kasan_init(void)
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
dsb(ishst);
- cpu_replace_ttbr1(tmp_pg_dir);
+ cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@@ -199,10 +210,10 @@ void __init kasan_init(void)
*/
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_zero_pte[i],
- pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+ pfn_pte(sym_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
memset(kasan_zero_page, 0, PAGE_SIZE);
- cpu_replace_ttbr1(swapper_pg_dir);
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2d2b6643a31e..63f6631178eb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -31,6 +31,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <linux/mm.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -380,8 +381,8 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
static void __init map_mem(pgd_t *pgd)
{
- unsigned long kernel_start = __pa(_text);
- unsigned long kernel_end = __pa(__init_begin);
+ unsigned long kernel_start = __pa_symbol(_text);
+ unsigned long kernel_end = __pa_symbol(__init_begin);
struct memblock_region *reg;
/*
@@ -441,14 +442,15 @@ void mark_rodata_ro(void)
unsigned long section_size;
section_size = (unsigned long)_etext - (unsigned long)_text;
- create_mapping_late(__pa(_text), (unsigned long)_text,
+ create_mapping_late(__pa_symbol(_text), (unsigned long)_text,
section_size, PAGE_KERNEL_ROX);
/*
* mark .rodata as read only. Use __init_begin rather than __end_rodata
* to cover NOTES and EXCEPTION_TABLE.
*/
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
- create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+ create_mapping_late(__pa_symbol(__start_rodata),
+ (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
}
@@ -465,7 +467,7 @@ void fixup_init(void)
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma)
{
- phys_addr_t pa_start = __pa(va_start);
+ phys_addr_t pa_start = __pa_symbol(va_start);
unsigned long size = va_end - va_start;
BUG_ON(!PAGE_ALIGNED(pa_start));
@@ -483,6 +485,37 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
vm_area_add_early(vma);
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __init map_entry_trampoline(void)
+{
+ extern char __entry_tramp_text_start[];
+
+ pgprot_t prot = PAGE_KERNEL_EXEC;
+ phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
+
+ /* The trampoline is always mapped and can therefore be global */
+ pgprot_val(prot) &= ~PTE_NG;
+
+ /* Map only the text into the trampoline page table */
+ memset(tramp_pg_dir, 0, PGD_SIZE);
+ __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
+ prot, late_pgtable_alloc, 0);
+
+ /* Map both the text and data into the kernel page table */
+ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ extern char __entry_tramp_data_start[];
+
+ __set_fixmap(FIX_ENTRY_TRAMP_DATA,
+ __pa_symbol(__entry_tramp_data_start),
+ PAGE_KERNEL_RO);
+ }
+
+ return 0;
+}
+core_initcall(map_entry_trampoline);
+#endif
+
/*
* Create fine-grained mappings for the kernel.
*/
@@ -513,7 +546,7 @@ static void __init map_kernel(pgd_t *pgd)
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
- __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+ __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE));
pud_clear_fixmap();
} else {
BUG();
@@ -544,7 +577,7 @@ void __init paging_init(void)
*/
cpu_replace_ttbr1(__va(pgd_phys));
memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
- cpu_replace_ttbr1(swapper_pg_dir);
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
pgd_clear_fixmap();
memblock_free(pgd_phys, PAGE_SIZE);
@@ -553,7 +586,7 @@ void __init paging_init(void)
* We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
* allocated with it.
*/
- memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+ memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
SWAPPER_DIR_SIZE - PAGE_SIZE);
bootmem_init();
@@ -666,6 +699,12 @@ static inline pte_t * fixmap_pte(unsigned long addr)
return &bm_pte[pte_index(addr)];
}
+/*
+ * The p*d_populate functions call virt_to_phys implicitly so they can't be used
+ * directly on kernel symbols (bm_p*d). This function is called too early to use
+ * lm_alias so __p*d_populate functions must be used to populate with the
+ * physical address from __pa_symbol.
+ */
void __init early_fixmap_init(void)
{
pgd_t *pgd;
@@ -675,7 +714,7 @@ void __init early_fixmap_init(void)
pgd = pgd_offset_k(addr);
if (CONFIG_PGTABLE_LEVELS > 3 &&
- !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+ !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
@@ -684,12 +723,15 @@ void __init early_fixmap_init(void)
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
pud = pud_offset_kimg(pgd, addr);
} else {
- pgd_populate(&init_mm, pgd, bm_pud);
+ if (pgd_none(*pgd))
+ __pgd_populate(pgd, __pa_symbol(bm_pud),
+ PUD_TYPE_TABLE);
pud = fixmap_pud(addr);
}
- pud_populate(&init_mm, pud, bm_pmd);
+ if (pud_none(*pud))
+ __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
pmd = fixmap_pmd(addr);
- pmd_populate_kernel(&init_mm, pmd, bm_pte);
+ __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
/*
* The boot-ioremap range spans multiple pmds, for which
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 32682be978e0..19834ff5360c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -138,12 +138,17 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
+ mrs x2, ttbr1_el1
mmid x1, x1 // get mm->context.id
- bfi x0, x1, #48, #16 // set the ASID
- msr ttbr0_el1, x0 // set TTBR0
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ bfi x0, x1, #48, #16 // set the ASID field in TTBR0
+#endif
+ bfi x2, x1, #48, #16 // set the ASID
+ msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
isb
- post_ttbr0_update_workaround
- ret
+ msr ttbr0_el1, x0 // now update TTBR0
+ isb
+ b post_ttbr_update_workaround // Back to C code...
ENDPROC(cpu_do_switch_mm)
.pushsection ".idmap.text", "ax"
@@ -224,7 +229,7 @@ ENTRY(__cpu_setup)
* both user and kernel.
*/
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
- TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+ TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
tcr_set_idmap_t0sz x10, x9
/*
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index b96db5dafec4..a396beb7829b 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -98,12 +98,12 @@ ENTRY(privcmd_call)
* need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
* is enabled (it implies that hardware UAO and PAN disabled).
*/
- uaccess_ttbr0_enable x6, x7
+ uaccess_ttbr0_enable x6, x7, x8
hvc XEN_IMM
/*
* Disable userspace access from kernel once the hyp call completed.
*/
- uaccess_ttbr0_disable x6
+ uaccess_ttbr0_disable x6, x7
ret
ENDPROC(privcmd_call);
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index af76634f8d98..934573cc1134 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -318,11 +318,14 @@ config BF53x
config GPIO_ADI
def_bool y
+ depends on !PINCTRL
depends on (BF51x || BF52x || BF53x || BF538 || BF539 || BF561)
-config PINCTRL
+config PINCTRL_BLACKFIN_ADI2
def_bool y
- depends on BF54x || BF60x
+ depends on (BF54x || BF60x)
+ select PINCTRL
+ select PINCTRL_ADI2
config MEM_MT48LC64M4A2FB_7E
bool
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index f3337ee03621..a93cf06a4d6f 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -17,6 +17,7 @@ config DEBUG_VERBOSE
config DEBUG_MMRS
tristate "Generate Blackfin MMR tree"
+ depends on !PINCTRL
select DEBUG_FS
help
Create a tree of Blackfin MMRs via the debugfs tree. If
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index c9eec84aa258..d920b959ff3a 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -35,6 +35,7 @@ SECTIONS
#endif
LOCK_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
KPROBES_TEXT
#ifdef CONFIG_ROMKERNEL
__sinittext = .;
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 5a6e141d1641..50bc10f97bcb 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
SCHED_TEXT
LOCK_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S
index e12055e88bfe..150ace92c7ad 100644
--- a/arch/metag/kernel/vmlinux.lds.S
+++ b/arch/metag/kernel/vmlinux.lds.S
@@ -24,6 +24,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.text.*)
*(.gnu.warning)
}
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index be9488d69734..0a47f0410554 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -36,6 +36,7 @@ SECTIONS {
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
. = ALIGN (4) ;
_etext = . ;
}
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 163b3449a8de..fcbc4e57d765 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -664,6 +664,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
unsigned long switch_count;
struct task_struct *t;
+ /* If nothing to change, return right away, successfully. */
+ if (value == mips_get_process_fp_mode(task))
+ return 0;
+
+ /* Only accept a mode change if 64-bit FP enabled for o32. */
+ if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+ return -EOPNOTSUPP;
+
+ /* And only for o32 tasks. */
+ if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+ return -EOPNOTSUPP;
+
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index a3f38e6b7ea1..c3d2d2c05fdb 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target,
#endif /* CONFIG_64BIT */
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
+ * correspond 1:1 to buffer slots. Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ return user_regset_copyout(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots. Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+ err = user_regset_copyout(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
- for (i = 0; i < NUM_FPU_REGS; i++) {
- fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
- err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
+ * context's general register slots. Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ return user_regset_copyin(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots. Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+ err = user_regset_copyin(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
if (err)
return err;
+ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
}
return 0;
}
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
static int fpr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ u32 fcr31;
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ BUG_ON(count % sizeof(elf_fpreg_t));
+
+ if (pos + count > sizeof(elf_fpregset_t))
+ return -EIO;
init_fp_ctx(target);
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
- for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
+ if (count > 0) {
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ &fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
if (err)
return err;
- set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+
+ ptrace_setfcr31(target, fcr31);
}
- return 0;
+ return err;
}
enum mips_regset {
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 2026203c41e2..261b2ce579bb 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -58,6 +58,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.text.*)
*(.fixup)
*(.gnu.warning)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index dd058aa8a3b5..89d05de8040a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1777,7 +1777,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
rv.s = ieee754sp_maddf(fd, fs, ft);
- break;
+ goto copcsr;
}
case fmsubf_op: {
@@ -1790,7 +1790,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
rv.s = ieee754sp_msubf(fd, fs, ft);
- break;
+ goto copcsr;
}
case frint_op: {
@@ -1814,7 +1814,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754sp_2008class(fs);
rfmt = w_fmt;
- break;
+ goto copcsr;
}
case fmin_op: {
@@ -1826,7 +1826,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmin(fs, ft);
- break;
+ goto copcsr;
}
case fmina_op: {
@@ -1838,7 +1838,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmina(fs, ft);
- break;
+ goto copcsr;
}
case fmax_op: {
@@ -1850,7 +1850,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmax(fs, ft);
- break;
+ goto copcsr;
}
case fmaxa_op: {
@@ -1862,7 +1862,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmaxa(fs, ft);
- break;
+ goto copcsr;
}
case fabs_op:
@@ -2095,7 +2095,7 @@ copcsr:
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
rv.d = ieee754dp_maddf(fd, fs, ft);
- break;
+ goto copcsr;
}
case fmsubf_op: {
@@ -2108,7 +2108,7 @@ copcsr:
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
rv.d = ieee754dp_msubf(fd, fs, ft);
- break;
+ goto copcsr;
}
case frint_op: {
@@ -2132,7 +2132,7 @@ copcsr:
DPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754dp_2008class(fs);
rfmt = w_fmt;
- break;
+ goto copcsr;
}
case fmin_op: {
@@ -2144,7 +2144,7 @@ copcsr:
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmin(fs, ft);
- break;
+ goto copcsr;
}
case fmina_op: {
@@ -2156,7 +2156,7 @@ copcsr:
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmina(fs, ft);
- break;
+ goto copcsr;
}
case fmax_op: {
@@ -2168,7 +2168,7 @@ copcsr:
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmax(fs, ft);
- break;
+ goto copcsr;
}
case fmaxa_op: {
@@ -2180,7 +2180,7 @@ copcsr:
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmaxa(fs, ft);
- break;
+ goto copcsr;
}
case fabs_op:
diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S
index 326fab40a9de..e23e89539967 100644
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ b/arch/nios2/kernel/vmlinux.lds.S
@@ -39,6 +39,7 @@ SECTIONS
SCHED_TEXT
LOCK_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
KPROBES_TEXT
} =0
_etext = .;
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 5cc6b4f1b795..1a836afb636d 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -215,7 +215,7 @@ do { \
case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \
case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \
- case 8: __get_user_asm2(x, ptr, retval); \
+ case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
} while (0)
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index 3a08b55609b6..341fc086bc17 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.text.__*)
_etext = .;
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index 8121aa6db2ff..51bb6b8eade6 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -11,6 +11,7 @@
for the semaphore. */
#define __PA_LDCW_ALIGNMENT 16
+#define __PA_LDCW_ALIGN_ORDER 4
#define __ldcw_align(a) ({ \
unsigned long __ret = (unsigned long) &(a)->lock[0]; \
__ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
@@ -28,6 +29,7 @@
ldcd). */
#define __PA_LDCW_ALIGNMENT 4
+#define __PA_LDCW_ALIGN_ORDER 2
#define __ldcw_align(a) (&(a)->slock)
#define __LDCW "ldcw,co"
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 623496c11756..5dc831955de5 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm/signal.h>
#include <asm/unistd.h>
+#include <asm/ldcw.h>
#include <asm/thread_info.h>
#include <linux/linkage.h>
@@ -46,6 +47,14 @@
#endif
.import pa_tlb_lock,data
+ .macro load_pa_tlb_lock reg
+#if __PA_LDCW_ALIGNMENT > 4
+ load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
+ depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
+#else
+ load32 PA(pa_tlb_lock), \reg
+#endif
+ .endm
/* space_to_prot macro creates a prot id from a space id */
@@ -457,7 +466,7 @@
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
cmpib,COND(=),n 0,\spc,2f
- load32 PA(pa_tlb_lock),\tmp
+ load_pa_tlb_lock \tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
nop
@@ -480,7 +489,7 @@
/* Release pa_tlb_lock lock. */
.macro tlb_unlock1 spc,tmp
#ifdef CONFIG_SMP
- load32 PA(pa_tlb_lock),\tmp
+ load_pa_tlb_lock \tmp
tlb_unlock0 \spc,\tmp
#endif
.endm
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index a4761b772406..16073f472118 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -36,6 +36,7 @@
#include <asm/assembly.h>
#include <asm/pgtable.h>
#include <asm/cache.h>
+#include <asm/ldcw.h>
#include <linux/linkage.h>
.text
@@ -333,8 +334,12 @@ ENDPROC(flush_data_cache_local)
.macro tlb_lock la,flags,tmp
#ifdef CONFIG_SMP
- ldil L%pa_tlb_lock,%r1
- ldo R%pa_tlb_lock(%r1),\la
+#if __PA_LDCW_ALIGNMENT > 4
+ load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
+ depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
+#else
+ load32 pa_tlb_lock, \la
+#endif
rsm PSW_SM_I,\flags
1: LDCW 0(\la),\tmp
cmpib,<>,n 0,\tmp,3f
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 60771df10fde..75304af9f742 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.text.do_softirq)
*(.text.sys_exit)
*(.text.do_sigaltstack)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index d41fd0af8980..2dd91f79de05 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -55,6 +55,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
#ifdef CONFIG_PPC32
*(.got1)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d1e65ce545b3..b2ab164a8094 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -401,8 +401,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
int ret;
__u64 target;
- if (is_kernel_addr(addr))
- return branch_target((unsigned int *)addr);
+ if (is_kernel_addr(addr)) {
+ if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ return 0;
+
+ return branch_target(&instr);
+ }
/* Userspace: need copy instruction here then translate it */
pagefault_disable();
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9f9dfda9ed2c..e8ca0fad2e69 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -514,7 +514,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
{
if (s1 < s2)
return 1;
- if (s2 > s1)
+ if (s1 > s2)
return -1;
return memcmp(d1, d2, s1);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index bdc8c0c71d15..4c00b37b09bc 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -39,18 +39,18 @@ int __opal_async_get_token(void)
int token;
spin_lock_irqsave(&opal_async_comp_lock, flags);
- token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
+ token = find_first_zero_bit(opal_async_token_map, opal_max_async_tokens);
if (token >= opal_max_async_tokens) {
token = -EBUSY;
goto out;
}
- if (__test_and_set_bit(token, opal_async_token_map)) {
+ if (!__test_and_clear_bit(token, opal_async_complete_map)) {
token = -EBUSY;
goto out;
}
- __clear_bit(token, opal_async_complete_map);
+ __set_bit(token, opal_async_token_map);
out:
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e40d0714679e..ecb7f3220355 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2270,6 +2270,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
level_shift = entries_shift + 3;
level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+ if ((level_shift - 3) * levels + page_shift >= 60)
+ return -EINVAL;
+
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
levels, tce_table_size, &offset, &total_allocated);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a9a8fa37a555..f48afc06ba14 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -295,7 +295,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
{
unsigned long ret_freq;
- ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+ ret_freq = cpufreq_get(cpu) * 1000ul;
/*
* If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 7a399b4d60a0..566e8fc341f3 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -276,7 +276,9 @@ failed:
if (bank->disk->major > 0)
unregister_blkdev(bank->disk->major,
bank->disk->disk_name);
- del_gendisk(bank->disk);
+ if (bank->disk->flags & GENHD_FL_UP)
+ del_gendisk(bank->disk);
+ put_disk(bank->disk);
}
device->dev.platform_data = NULL;
if (bank->io_addr != 0)
@@ -301,6 +303,7 @@ axon_ram_remove(struct platform_device *device)
device_remove_file(&device->dev, &dev_attr_ecc);
free_irq(bank->irq_id, device);
del_gendisk(bank->disk);
+ put_disk(bank->disk);
iounmap((void __iomem *) bank->io_addr);
kfree(bank);
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index f76ee39cb337..800a591695c0 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -845,12 +845,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
u32 ipic_get_mcp_status(void)
{
- return ipic_read(primary_ipic->regs, IPIC_SERMR);
+ return ipic_read(primary_ipic->regs, IPIC_SERSR);
}
void ipic_clear_mcp_status(u32 mask)
{
- ipic_write(primary_ipic->regs, IPIC_SERMR, mask);
+ ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
}
/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
deleted file mode 100644
index 2c3413b0ca52..000000000000
--- a/arch/s390/include/asm/asm-prototypes.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_S390_PROTOTYPES_H
-
-#include <linux/kvm_host.h>
-#include <linux/ftrace.h>
-#include <asm/fpu/api.h>
-#include <asm-generic/asm-prototypes.h>
-
-#endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 649eb62c52b3..9e02cb7955c1 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -81,6 +81,6 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
int zpci_load(u64 *data, u64 req, u64 offset);
int zpci_store(u64 data, u64 req, u64 offset);
int zpci_store_block(const u64 *data, u64 req, u64 offset);
-void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
#endif
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
index 402ad6df4897..c54a9310d814 100644
--- a/arch/s390/include/asm/runtime_instr.h
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -85,6 +85,8 @@ static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
load_runtime_instr_cb(&runtime_instr_empty_cb);
}
-void exit_thread_runtime_instr(void);
+struct task_struct;
+
+void runtime_instr_release(struct task_struct *tsk);
#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index dde6b52359c5..ff2fbdafe689 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -29,17 +29,16 @@ static inline void restore_access_regs(unsigned int *acrs)
}
#define switch_to(prev,next,last) do { \
- if (prev->mm) { \
- save_fpu_regs(); \
- save_access_regs(&prev->thread.acrs[0]); \
- save_ri_cb(prev->thread.ri_cb); \
- } \
+ /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
+ * a restore of the floating point / vector registers as \
+ * soon as the next task returns to user space \
+ */ \
+ save_fpu_regs(); \
+ save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
update_cr_regs(next); \
- if (next->mm) { \
- set_cpu_flag(CIF_FPU); \
- restore_access_regs(&next->thread.acrs[0]); \
- restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- } \
+ restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 437e61159279..0176ebc97bfd 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index efa035a31b98..7bc4e4c5d5b8 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -72,7 +72,6 @@ extern void kernel_thread_starter(void);
*/
void exit_thread(void)
{
- exit_thread_runtime_instr();
}
void flush_thread(void)
@@ -87,6 +86,7 @@ void arch_release_task_struct(struct task_struct *tsk)
{
/* Free either the floating-point or the vector register save area */
kfree(tsk->thread.fpu.regs);
+ runtime_instr_release(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 70cdb03d4acd..fd03a7569e10 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -18,11 +18,24 @@
/* empty control block to disable RI by loading it */
struct runtime_instr_cb runtime_instr_empty_cb;
+void runtime_instr_release(struct task_struct *tsk)
+{
+ kfree(tsk->thread.ri_cb);
+}
+
static void disable_runtime_instr(void)
{
- struct pt_regs *regs = task_pt_regs(current);
+ struct task_struct *task = current;
+ struct pt_regs *regs;
+ if (!task->thread.ri_cb)
+ return;
+ regs = task_pt_regs(task);
+ preempt_disable();
load_runtime_instr_cb(&runtime_instr_empty_cb);
+ kfree(task->thread.ri_cb);
+ task->thread.ri_cb = NULL;
+ preempt_enable();
/*
* Make sure the RI bit is deleted from the PSW. If the user did not
@@ -43,19 +56,6 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
cb->valid = 1;
}
-void exit_thread_runtime_instr(void)
-{
- struct task_struct *task = current;
-
- preempt_disable();
- if (!task->thread.ri_cb)
- return;
- disable_runtime_instr();
- kfree(task->thread.ri_cb);
- task->thread.ri_cb = NULL;
- preempt_enable();
-}
-
SYSCALL_DEFINE1(s390_runtime_instr, int, command)
{
struct runtime_instr_cb *cb;
@@ -64,7 +64,7 @@ SYSCALL_DEFINE1(s390_runtime_instr, int, command)
return -EOPNOTSUPP;
if (command == S390_RUNTIME_INSTR_STOP) {
- exit_thread_runtime_instr();
+ disable_runtime_instr();
return 0;
}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 5378c3ea1b98..a1eeaa0db8b7 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -369,10 +369,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
SYSCALL(sys_socket,sys_socket)
SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
-SYSCALL(sys_bind,sys_bind)
-SYSCALL(sys_connect,sys_connect)
+SYSCALL(sys_bind,compat_sys_bind)
+SYSCALL(sys_connect,compat_sys_connect)
SYSCALL(sys_listen,sys_listen)
-SYSCALL(sys_accept4,sys_accept4)
+SYSCALL(sys_accept4,compat_sys_accept4)
SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
SYSCALL(sys_setsockopt,compat_sys_setsockopt)
SYSCALL(sys_getsockname,compat_sys_getsockname)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 445657fe658c..0f41a8286378 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -28,6 +28,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
} :text = 0x0700
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index f2f6720a3331..ef0499b76c50 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -359,7 +359,8 @@ static void zpci_irq_handler(struct airq_struct *airq)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
+ break;
si = 0;
continue;
}
@@ -921,7 +922,7 @@ static int __init pci_base_init(void)
if (!s390_pci_probe)
return 0;
- if (!test_facility(69) || !test_facility(71) || !test_facility(72))
+ if (!test_facility(69) || !test_facility(71))
return 0;
rc = zpci_debug_init();
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 10ca15dcab11..bc065392f7ab 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <linux/errno.h>
#include <linux/delay.h>
+#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_debug.h>
#include <asm/processor.h>
@@ -91,11 +92,14 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
{
+ if (!test_facility(72))
+ return -EIO;
asm volatile (
" .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+ return 0;
}
/* PCI Load */
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index db88cbf9eafd..235a4101999f 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -39,6 +39,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
_etext = .; /* End of text section */
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 4a41d412dd3d..7d02b1fef025 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.gnu.warning)
} = 0
_etext = .;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 384aba109d7c..c2f376ce443b 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2402,10 +2402,17 @@ void __init mem_init(void)
{
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
- register_page_bootmem_info();
free_all_bootmem();
/*
+ * Must be done after boot memory is put on freelist, because here we
+ * might set fields in deferred struct pages that have not yet been
+ * initialized, and free_all_bootmem() initializes all the reserved
+ * deferred pages for us.
+ */
+ register_page_bootmem_info();
+
+ /*
* Set up the zero page, mark it reserved, so that page count
* is not manipulated when freeing the page from user ptes.
*/
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 0e059a0101ea..378f5d8d1ec8 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -45,6 +45,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
__fix_text_end = .; /* tile-cpack won't rearrange before this */
ALIGN_FUNCTION();
*(.hottext*)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 23b2767e423d..09332bd6e05a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,6 +27,7 @@ config X86
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_SG_CHAIN
@@ -42,7 +43,7 @@ config X86
select ARCH_USE_CMPXCHG_LOCKREF if X86_64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
+ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_IPC_PARSE_VERSION if X86_32
@@ -64,6 +65,7 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index c0cc2a6be0bf..707adf8b6a9f 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -11,6 +11,13 @@
KASAN_SANITIZE := n
+# Kernel does not boot with kcov instrumentation here.
+# One of the problems observed was insertion of __sanitizer_cov_trace_pc()
+# callback into middle of per-cpu data enabling code. Thus the callback observed
+# inconsistent state and crashed. We are interested mostly in syscall coverage,
+# so boot code is not interesting anyway.
+KCOV_INSTRUMENT := n
+
# If you want to preset the SVGA mode, uncomment the next line and
# set SVGA_MODE to whatever number you want.
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index efa6073ffa7e..6f7af9ac16e2 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -18,6 +18,9 @@
KASAN_SANITIZE := n
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 3783dc3e10b3..4abb284a5b9c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -9,6 +9,7 @@
*/
#undef CONFIG_PARAVIRT
#undef CONFIG_PARAVIRT_SPINLOCKS
+#undef CONFIG_PAGE_TABLE_ISOLATION
#undef CONFIG_KASAN
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d067d6..cb91a64a99e7 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
salsa20_ivsetup(ctx, walk.iv);
- if (likely(walk.nbytes == nbytes))
- {
- salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
- walk.dst.virt.addr, nbytes);
- return blkcipher_walk_done(desc, &walk, 0);
- }
-
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cc0f2f5da19b..db42a6766995 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -35,6 +35,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/pgtable_types.h>
+#include <asm/kaiser.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -135,6 +136,7 @@ ENTRY(entry_SYSCALL_64)
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
/*
* A hypervisor implementation might want to use a label
* after the swapgs, so that it can do the swapgs
@@ -207,9 +209,17 @@ entry_SYSCALL_64_fastpath:
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
- RESTORE_C_REGS_EXCEPT_RCX_R11
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
movq RSP(%rsp), %rsp
/*
* 64-bit SYSRET restores rip from rcx,
@@ -347,10 +357,26 @@ GLOBAL(int_ret_from_sys_call)
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
movq RSP(%rsp), %rsp
USERGS_SYSRET64
opportunistic_sysret_failed:
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
SWAPGS
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
@@ -509,6 +535,7 @@ END(irq_entries_start)
* tracking that we're in kernel mode.
*/
SWAPGS
+ SWITCH_KERNEL_CR3
/*
* We need to tell lockdep that IRQs are off. We can't do this until
@@ -568,6 +595,7 @@ GLOBAL(retint_user)
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
+ SWITCH_USER_CR3
SWAPGS
jmp restore_regs_and_iret
@@ -625,6 +653,7 @@ native_irq_return_ldt:
pushq %rax
pushq %rdi
SWAPGS
+ SWITCH_KERNEL_CR3
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* RAX */
movq (2*8)(%rsp), %rax /* RIP */
@@ -640,6 +669,7 @@ native_irq_return_ldt:
andl $0xffff0000, %eax
popq %rdi
orq PER_CPU_VAR(espfix_stack), %rax
+ SWITCH_USER_CR3
SWAPGS
movq %rax, %rsp
popq %rax
@@ -672,9 +702,15 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
.endm
#endif
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+#define POP_SECTION_IRQENTRY .popsection
+
.macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
.endm
#ifdef CONFIG_SMP
@@ -995,7 +1031,11 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
/*
* Save all registers in pt_regs, and switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ *
+ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
*/
ENTRY(paranoid_entry)
cld
@@ -1008,7 +1048,26 @@ ENTRY(paranoid_entry)
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx, %ebx
-1: ret
+1:
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
+ * unconditionally, but we need to find out whether the reverse
+ * should be done on return (conveyed to paranoid_exit in %ebx).
+ */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ testl $KAISER_SHADOW_PGD_OFFSET, %eax
+ jz 2f
+ orl $2, %ebx
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ movq %rax, %cr3
+2:
+#endif
+ ret
END(paranoid_entry)
/*
@@ -1021,19 +1080,26 @@ END(paranoid_entry)
* be complicated. Fortunately, we there's no good reason
* to try to handle preemption here.
*
- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs
*/
ENTRY(paranoid_exit)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF_DEBUG
- testl %ebx, %ebx /* swapgs needed? */
+ TRACE_IRQS_IRETQ_DEBUG
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+ SWITCH_USER_CR3
+paranoid_exit_no_switch:
+#endif
+ testl $1, %ebx /* swapgs needed? */
jnz paranoid_exit_no_swapgs
- TRACE_IRQS_IRETQ
SWAPGS_UNSAFE_STACK
- jmp paranoid_exit_restore
paranoid_exit_no_swapgs:
- TRACE_IRQS_IRETQ_DEBUG
-paranoid_exit_restore:
RESTORE_EXTRA_REGS
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
@@ -1048,6 +1114,13 @@ ENTRY(error_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ /*
+ * error_entry() always returns with a kernel gsbase and
+ * CR3. We must also have a kernel CR3/gsbase before
+ * calling TRACE_IRQS_*. Just unconditionally switch to
+ * the kernel CR3 here.
+ */
+ SWITCH_KERNEL_CR3
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1210,6 +1283,10 @@ ENTRY(nmi)
*/
SWAPGS_UNSAFE_STACK
+ /*
+ * percpu variables are mapped with user CR3, so no need
+ * to switch CR3 here.
+ */
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1243,12 +1320,34 @@ ENTRY(nmi)
movq %rsp, %rdi
movq $-1, %rsi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
+2:
+#endif
call do_nmi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Unconditionally restore CR3. I know we return to
+ * kernel code that needs user CR3, but do we ever return
+ * to "user mode" where we need the kernel CR3?
+ */
+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+#endif
+
/*
* Return back to user mode. We must *not* do the normal exit
- * work, because we don't want to enable interrupts. Fortunately,
- * do_nmi doesn't modify pt_regs.
+ * work, because we don't want to enable interrupts. Do not
+ * switch to user CR3: we might be going back to kernel code
+ * that had a user CR3 set.
*/
SWAPGS
jmp restore_c_regs_and_iret
@@ -1445,22 +1544,55 @@ end_repeat_nmi:
ALLOC_PT_GPREGS_ON_STACK
/*
- * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
- * as we should not be calling schedule in NMI context.
- * Even with normal interrupts enabled. An NMI should not be
- * setting NEED_RESCHED or anything that normal interrupts and
- * exceptions might do.
+ * Use the same approach as paranoid_entry to handle SWAPGS, but
+ * without CR3 handling since we do that differently in NMIs. No
+ * need to use paranoid_exit as we should not be calling schedule
+ * in NMI context. Even with normal interrupts enabled. An NMI
+ * should not be setting NEED_RESCHED or anything that normal
+ * interrupts and exceptions might do.
*/
- call paranoid_entry
-
- /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ cld
+ SAVE_C_REGS
+ SAVE_EXTRA_REGS
+ movl $1, %ebx
+ movl $MSR_GS_BASE, %ecx
+ rdmsr
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx, %ebx
+1:
movq %rsp, %rdi
movq $-1, %rsi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
+2:
+#endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
call do_nmi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Unconditionally restore CR3. We might be returning to
+ * kernel code that needs user CR3, like just just before
+ * a sysret.
+ */
+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+#endif
+
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
+ /* We fixed up CR3 above, so no need to switch it here */
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_EXTRA_REGS
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 15cfebaa7688..d03bf0e28b8b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,8 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/pgtable_types.h>
+#include <asm/kaiser.h>
#include <linux/linkage.h>
#include <linux/err.h>
@@ -50,6 +52,7 @@ ENDPROC(native_usergs_sysret32)
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
@@ -161,6 +164,7 @@ ENDPROC(entry_SYSENTER_compat)
ENTRY(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
/* Stash user ESP and switch to the kernel stack. */
movl %esp, %r8d
@@ -208,6 +212,7 @@ ENTRY(entry_SYSCALL_compat)
/* Opportunistic SYSRET */
sysret32_from_system_call:
TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ SWITCH_USER_CR3
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -269,6 +274,7 @@ ENTRY(entry_INT80_compat)
PARAVIRT_ADJUST_EXCEPTION_FRAME
ASM_CLAC /* Do this early to minimize exposure */
SWAPGS
+ SWITCH_KERNEL_CR3_NO_STACK
/*
* User tracing code (ptrace or signal handlers) might assume that
@@ -311,6 +317,7 @@ ENTRY(entry_INT80_compat)
/* Go back to user mode. */
TRACE_IRQS_ON
+ SWITCH_USER_CR3
SWAPGS
jmp restore_regs_and_iret
END(entry_INT80_compat)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 7af017a8958f..fddeb1f4dcd2 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -5,6 +5,9 @@
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
VDSO64-$(CONFIG_X86_64) := y
VDSOX32-$(CONFIG_X86_X32_ABI) := y
VDSO32-$(CONFIG_X86_32) := y
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index ca94fa649251..5dd363d54348 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void)
}
#endif
+#ifdef CONFIG_PARAVIRT_CLOCK
+extern u8 pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
#ifndef BUILD_VDSO32
#include <linux/kernel.h>
@@ -62,63 +67,65 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
#ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
+static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
{
- const struct pvclock_vsyscall_time_info *pvti_base;
- int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
- int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
-
- BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
-
- pvti_base = (struct pvclock_vsyscall_time_info *)
- __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
-
- return &pvti_base[offset];
+ return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
}
static notrace cycle_t vread_pvclock(int *mode)
{
- const struct pvclock_vsyscall_time_info *pvti;
+ const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
- u64 last;
- u32 version;
- u8 flags;
- unsigned cpu, cpu1;
-
+ u64 tsc, pvti_tsc;
+ u64 last, delta, pvti_system_time;
+ u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
/*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
*
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
*/
- do {
- cpu = __getcpu() & VGETCPU_CPU_MASK;
- /* TODO: We can put vcpu id into higher bits of pvti.version.
- * This will save a couple of cycles by getting rid of
- * __getcpu() calls (Gleb).
- */
-
- pvti = get_pvti(cpu);
-
- version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
-
- /*
- * Test we're still on the cpu as well as the version.
- * We could have been migrated just after the first
- * vgetcpu but before fetching the version, so we
- * wouldn't notice a version change.
- */
- cpu1 = __getcpu() & VGETCPU_CPU_MASK;
- } while (unlikely(cpu != cpu1 ||
- (pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
-
- if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
+
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
+ return 0;
+ }
+
+ do {
+ version = pvti->version;
+
+ /* This is also a read barrier, so we'll read version first. */
+ tsc = rdtsc_ordered();
+
+ pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
+ pvti_tsc_shift = pvti->tsc_shift;
+ pvti_system_time = pvti->system_time;
+ pvti_tsc = pvti->tsc_timestamp;
+
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
+ } while (unlikely((version & 1) || version != pvti->version));
+
+ delta = tsc - pvti_tsc;
+ ret = pvti_system_time +
+ pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
+ pvti_tsc_shift);
/* refer to tsc.c read_tsc() comment for rationale */
last = gtod->cycle_last;
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index de2c921025f5..4158acc17df0 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -25,7 +25,7 @@ SECTIONS
* segment.
*/
- vvar_start = . - 2 * PAGE_SIZE;
+ vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE;
+ pvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 785d9922b106..491020b2826d 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -73,6 +73,7 @@ enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
@@ -80,6 +81,7 @@ enum {
const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
};
struct vdso_sym {
@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
+ [sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 64df47148160..b8f69e264ac4 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
+#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
@@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
.name = "[vvar]",
.pages = no_pages,
};
+ struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
}
#endif
+ pvti = pvclock_pvti_cpu0_va();
+ if (pvti && image->sym_pvclock_page) {
+ ret = remap_pfn_range(vma,
+ text_start + image->sym_pvclock_page,
+ __pa(pvti) >> PAGE_SHIFT,
+ PAGE_SIZE,
+ PAGE_READONLY);
+
+ if (ret)
+ goto up_fail;
+ }
+
up_fail:
if (ret)
current->mm->context.vdso = NULL;
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 174c2549939d..112178b401a1 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *str)
}
early_param("vsyscall", vsyscall_setup);
+bool vsyscall_enabled(void)
+{
+ return vsyscall_mode != NONE;
+}
+
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
const char *message)
{
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 09936e9c8154..d1cf17173b1b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -138,7 +138,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
- ".popsection"
+ ".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \
@@ -149,7 +149,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
- ".popsection"
+ ".popsection\n"
/*
* This must be included *after* the definition of ALTERNATIVE due to
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index e01f7f7ccb0c..84ae170bc3d0 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,5 +2,7 @@
#define _ASM_X86_CMDLINE_H
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
+ char *buffer, int bufsize);
#endif /* _ASM_X86_CMDLINE_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index f7ba9fbf12ee..142028afd049 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -187,6 +187,7 @@
#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
@@ -199,6 +200,9 @@
#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
@@ -273,6 +277,9 @@
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -355,6 +362,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4e10d73cf018..880db91d9457 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -43,7 +43,7 @@ struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
} __attribute__((aligned(PAGE_SIZE)));
-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
+DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index f226df064660..8b17c2ad1048 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID 0
#else
# define DISABLE_VME 0
# define DISABLE_K6_MTRR 0
# define DISABLE_CYRIX_ARR 0
# define DISABLE_CENTAUR_MCR 0
+# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
#endif /* CONFIG_X86_64 */
/*
@@ -35,7 +37,7 @@
#define DISABLED_MASK1 0
#define DISABLED_MASK2 0
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 0
+#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7178043b0e1d..9b76cd331990 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,12 +22,8 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
- /*
- * irq_tlb_count is double-counted in irq_call_count, so it must be
- * subtracted from irq_call_count when displaying irq_call_count
- */
- unsigned int irq_tlb_count;
#endif
+ unsigned int irq_tlb_count;
#ifdef CONFIG_X86_THERMAL_VECTOR
unsigned int irq_thermal_count;
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 59caa55fb9b5..ee52ff858699 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -187,7 +187,7 @@ extern char irq_entries_start[];
#define VECTOR_RETRIGGERED ((void *)~0UL)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
-DECLARE_PER_CPU(vector_irq_t, vector_irq);
+DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
#endif /* !ASSEMBLY_ */
diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
new file mode 100644
index 000000000000..48c791a411ab
--- /dev/null
+++ b/arch/x86/include/asm/kaiser.h
@@ -0,0 +1,151 @@
+#ifndef _ASM_X86_KAISER_H
+#define _ASM_X86_KAISER_H
+
+#include <uapi/asm/processor-flags.h> /* For PCID constants */
+
+/*
+ * This file includes the definitions for the KAISER feature.
+ * KAISER is a counter measure against x86_64 side channel attacks on
+ * the kernel virtual memory. It has a shadow pgd for every process: the
+ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole
+ * user memory. Within a kernel context switch, or when an interrupt is handled,
+ * the pgd is switched to the normal one. When the system switches to user mode,
+ * the shadow pgd is enabled. By this, the virtual memory caches are freed,
+ * and the user may not attack the whole kernel memory.
+ *
+ * A minimalistic kernel mapping holds the parts needed to be mapped in user
+ * mode, such as the entry/exit functions of the user space, or the stacks.
+ */
+
+#define KAISER_SHADOW_PGD_OFFSET 0x1000
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * A page table address must have this alignment to stay the same when
+ * KAISER_SHADOW_PGD_OFFSET mask is applied
+ */
+#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1)
+#else
+#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE
+#endif
+
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+.macro _SWITCH_TO_KERNEL_CR3 reg
+movq %cr3, \reg
+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
+ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
+movq \reg, %cr3
+.endm
+
+.macro _SWITCH_TO_USER_CR3 reg regb
+/*
+ * regb must be the low byte portion of reg: because we have arranged
+ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
+ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
+ * not enabled): so that the one register can update both memory and cr3.
+ */
+movq %cr3, \reg
+orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
+js 9f
+/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
+movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+9:
+movq \reg, %cr3
+.endm
+
+.macro SWITCH_KERNEL_CR3
+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+_SWITCH_TO_KERNEL_CR3 %rax
+popq %rax
+8:
+.endm
+
+.macro SWITCH_USER_CR3
+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+_SWITCH_TO_USER_CR3 %rax %al
+popq %rax
+8:
+.endm
+
+.macro SWITCH_KERNEL_CR3_NO_STACK
+ALTERNATIVE "jmp 8f", \
+ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
+ X86_FEATURE_KAISER
+_SWITCH_TO_KERNEL_CR3 %rax
+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+8:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION */
+
+.macro SWITCH_KERNEL_CR3
+.endm
+.macro SWITCH_USER_CR3
+.endm
+.macro SWITCH_KERNEL_CR3_NO_STACK
+.endm
+
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Upon kernel/user mode switch, it may happen that the address
+ * space has to be switched before the registers have been
+ * stored. To change the address space, another register is
+ * needed. A register therefore has to be stored/restored.
+*/
+DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
+extern int kaiser_enabled;
+extern void __init kaiser_check_boottime_disable(void);
+#else
+#define kaiser_enabled 0
+static inline void __init kaiser_check_boottime_disable(void) {}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,
+ * so as to build with tests on kaiser_enabled instead of #ifdefs.
+ */
+
+/**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ * @flags: The mapping flags of the pages
+ *
+ * The mapping is done on a global scope, so no bigger
+ * synchronization has to be done. the pages have to be
+ * manually unmapped again when they are not needed any longer.
+ */
+extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+/**
+ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ */
+extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+/**
+ * kaiser_init - Initialize the shadow mapping
+ *
+ * Most parts of the shadow mapping can be mapped upon boot
+ * time. Only per-process things like the thread stacks
+ * or a new LDT have to be mapped at runtime. These boot-
+ * time mappings are permanent and never unmapped.
+ */
+extern void kaiser_init(void);
+
+#endif /* __ASSEMBLY */
+
+#endif /* _ASM_X86_KAISER_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 55234d5e7160..7680b76adafc 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -24,12 +24,6 @@ typedef struct {
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
-#ifdef CONFIG_SMP
void leave_mm(int cpu);
-#else
-static inline void leave_mm(int cpu)
-{
-}
-#endif
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index bfd9b2a35a0b..9bfc5fd77015 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -98,109 +98,16 @@ static inline void load_mm_ldt(struct mm_struct *mm)
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
-#ifdef CONFIG_SMP
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
}
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
-{
- unsigned cpu = smp_processor_id();
-
- if (likely(prev != next)) {
-#ifdef CONFIG_SMP
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
- cpumask_set_cpu(cpu, mm_cpumask(next));
-
- /*
- * Re-load page tables.
- *
- * This logic has an ordering constraint:
- *
- * CPU 0: Write to a PTE for 'next'
- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
- * CPU 1: set bit 1 in next's mm_cpumask
- * CPU 1: load from the PTE that CPU 0 writes (implicit)
- *
- * We need to prevent an outcome in which CPU 1 observes
- * the new PTE value and CPU 0 observes bit 1 clear in
- * mm_cpumask. (If that occurs, then the IPI will never
- * be sent, and CPU 0's TLB will contain a stale entry.)
- *
- * The bad outcome can occur if either CPU's load is
- * reordered before that CPU's store, so both CPUs must
- * execute full barriers to prevent this from happening.
- *
- * Thus, switch_mm needs a full barrier between the
- * store to mm_cpumask and any operation that could load
- * from next->pgd. TLB fills are special and can happen
- * due to instruction fetches or for no reason at all,
- * and neither LOCK nor MFENCE orders them.
- * Fortunately, load_cr3() is serializing and gives the
- * ordering guarantee we need.
- *
- */
- load_cr3(next->pgd);
-
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-
- /* Stop flush ipis for the previous mm */
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
+extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
- /* Load per-mm CR4 state */
- load_mm_cr4(next);
-
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT, if the LDT is different.
- *
- * It's possible that prev->context.ldt doesn't match
- * the LDT register. This can happen if leave_mm(prev)
- * was called and then modify_ldt changed
- * prev->context.ldt but suppressed an IPI to this CPU.
- * In this case, prev->context.ldt != NULL, because we
- * never set context.ldt to NULL while the mm still
- * exists. That means that next->context.ldt !=
- * prev->context.ldt, because mms never share an LDT.
- */
- if (unlikely(prev->context.ldt != next->context.ldt))
- load_mm_ldt(next);
-#endif
- }
-#ifdef CONFIG_SMP
- else {
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-
- if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
- /*
- * On established mms, the mm_cpumask is only changed
- * from irq context, from ptep_clear_flush() while in
- * lazy tlb mode, and here. Irqs are blocked during
- * schedule, protecting us from simultaneous changes.
- */
- cpumask_set_cpu(cpu, mm_cpumask(next));
-
- /*
- * We were in lazy tlb mode and leave_mm disabled
- * tlb flush IPI delivery. We must reload CR3
- * to make sure to use no freed page tables.
- *
- * As above, load_cr3() is serializing and orders TLB
- * fills with respect to the mm_cpumask write.
- */
- load_cr3(next->pgd);
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
- load_mm_cr4(next);
- load_mm_ldt(next);
- }
- }
-#endif
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
+#define switch_mm_irqs_off switch_mm_irqs_off
#define activate_mm(prev, next) \
do { \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 6ec0c8b2e9df..84c62d950023 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -18,6 +18,12 @@
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern int kaiser_enabled;
+#else
+#define kaiser_enabled 0
+#endif
+
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_checkwx(void);
@@ -653,7 +659,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
static inline int pgd_bad(pgd_t pgd)
{
- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ pgdval_t ignore_flags = _PAGE_USER;
+ /*
+ * We set NX on KAISER pgds that map userspace memory so
+ * that userspace can not meaningfully use the kernel
+ * page table by accident; it will fault on the first
+ * instruction it tries to run. See native_set_pgd().
+ */
+ if (kaiser_enabled)
+ ignore_flags |= _PAGE_NX;
+
+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}
static inline int pgd_none(pgd_t pgd)
@@ -855,7 +871,15 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
- memcpy(dst, src, count * sizeof(pgd_t));
+ memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (kaiser_enabled) {
+ /* Clone the shadow pgd part as well */
+ memcpy(native_get_shadow_pgd(dst),
+ native_get_shadow_pgd(src),
+ count * sizeof(pgd_t));
+ }
+#endif
}
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 2ee781114d34..c810226e741a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -106,9 +106,32 @@ static inline void native_pud_clear(pud_t *pud)
native_set_pud(pud, native_make_pud(0));
}
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
+
+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+{
+#ifdef CONFIG_DEBUG_VM
+ /* linux/mmdebug.h may not have been included at this point */
+ BUG_ON(!kaiser_enabled);
+#endif
+ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+}
+#else
+static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ return pgd;
+}
+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+{
+ BUILD_BUG_ON(1);
+ return NULL;
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pgd;
+ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 79c91853e50e..8dba273da25a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -89,7 +89,7 @@
#define _PAGE_NX (_AT(pteval_t, 0))
#endif
-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
@@ -102,6 +102,33 @@
_PAGE_SOFT_DIRTY)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+/* The ASID is the lower 12 bits of CR3 */
+#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
+
+/* Mask for all the PCID-related bits in CR3: */
+#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
+
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)
+/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
+#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL))
+
+#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
+#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
+#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
+#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
+#else
+#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
+/*
+ * PCIDs are unsupported on 32-bit and none of these bits can be
+ * set in CR3:
+ */
+#define X86_CR3_PCID_KERN_FLUSH (0)
+#define X86_CR3_PCID_USER_FLUSH (0)
+#define X86_CR3_PCID_KERN_NOFLUSH (0)
+#define X86_CR3_PCID_USER_NOFLUSH (0)
+#endif
+
/*
* The cache modes defined here are used to translate between pure SW usage
* and the HW defined cache mode bits and/or PAT entries.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2d5a50cb61a2..c124d6ab4bf9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
-extern __u32 cpu_caps_cleared[NCAPINTS];
-extern __u32 cpu_caps_set[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -305,7 +305,7 @@ struct tss_struct {
} ____cacheline_aligned;
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index baad72e4c100..c926255745e1 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -4,6 +4,15 @@
#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
+#ifdef CONFIG_KVM_GUEST
+extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
+#else
+static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return NULL;
+}
+#endif
+
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcafe27a6..bad25bb80679 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
asmlinkage long sys_iopl(unsigned int);
/* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6433e28dc9c8..a691b66cc40a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -6,6 +6,55 @@
#include <asm/processor.h>
#include <asm/special_insns.h>
+#include <asm/smp.h>
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+{
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+ * mappings, we don't want the compiler to reorder any subsequent
+ * memory accesses before the TLB flush.
+ *
+ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR 0
+#define INVPCID_TYPE_SINGLE_CTXT 1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+ unsigned long addr)
+{
+ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
@@ -16,10 +65,8 @@
#endif
struct tlb_state {
-#ifdef CONFIG_SMP
struct mm_struct *active_mm;
int state;
-#endif
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
@@ -84,6 +131,24 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
cr4_set_bits(mask);
}
+/*
+ * Declare a couple of kaiser interfaces here for convenience,
+ * to avoid the need for asm/kaiser.h in unexpected places.
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern int kaiser_enabled;
+extern void kaiser_setup_pcid(void);
+extern void kaiser_flush_tlb_on_return_to_user(void);
+#else
+#define kaiser_enabled 0
+static inline void kaiser_setup_pcid(void)
+{
+}
+static inline void kaiser_flush_tlb_on_return_to_user(void)
+{
+}
+#endif
+
static inline void __native_flush_tlb(void)
{
/*
@@ -92,6 +157,8 @@ static inline void __native_flush_tlb(void)
* back:
*/
preempt_disable();
+ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
native_write_cr3(native_read_cr3());
preempt_enable();
}
@@ -101,39 +168,84 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* clear PGE */
- native_write_cr4(cr4 & ~X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ if (cr4 & X86_CR4_PGE) {
+ /* clear PGE and flush TLB of all entries */
+ native_write_cr4(cr4 & ~X86_CR4_PGE);
+ /* restore PGE as it was before */
+ native_write_cr4(cr4);
+ } else {
+ /* do it with cr3, letting kaiser flush user PCID */
+ __native_flush_tlb();
+ }
}
static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+ *
+ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+ invpcid_flush_all();
+ return;
+ }
+
/*
* Read-modify-write to CR4 - protect it from preemption and
* from interrupts. (Use the raw variant because this code can
* be called from deep inside debugging code.)
*/
raw_local_irq_save(flags);
-
__native_flush_tlb_global_irq_disabled();
-
raw_local_irq_restore(flags);
}
static inline void __native_flush_tlb_single(unsigned long addr)
{
- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ /*
+ * SIMICS #GP's if you run INVPCID with type 2/3
+ * and X86_CR4_PCIDE clear. Shame!
+ *
+ * The ASIDs used below are hard-coded. But, we must not
+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
+ * invlpg in the case we are called early.
+ */
+
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+ }
+ /* Flush the address out of both PCIDs. */
+ /*
+ * An optimization here might be to determine addresses
+ * that are only kernel-mapped and only flush the kernel
+ * ASID. But, userspace flushes are probably much more
+ * important performance-wise.
+ *
+ * Make sure to do only a single invpcid when KAISER is
+ * disabled and we have only a single ASID.
+ */
+ if (kaiser_enabled)
+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
}
static inline void __flush_tlb_all(void)
{
- if (cpu_has_pge)
- __flush_tlb_global();
- else
- __flush_tlb();
+ __flush_tlb_global();
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
+ * we might fail to flush kernel translations for other cached ASIDs.
+ *
+ * To avoid this issue, we force PCID off if PGE is off.
+ */
}
static inline void __flush_tlb_one(unsigned long addr)
@@ -147,7 +259,6 @@ static inline void __flush_tlb_one(unsigned long addr)
/*
* TLB flushing:
*
- * - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
@@ -159,84 +270,6 @@ static inline void __flush_tlb_one(unsigned long addr)
* and page-granular flushes are available only on i486 and up.
*/
-#ifndef CONFIG_SMP
-
-/* "_up" is for UniProcessor.
- *
- * This is a helper for other header functions. *Not* intended to be called
- * directly. All global TLB flushes need to either call this, or to bump the
- * vm statistics themselves.
- */
-static inline void __flush_tlb_up(void)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- __flush_tlb();
-}
-
-static inline void flush_tlb_all(void)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- __flush_tlb_all();
-}
-
-static inline void flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
-static inline void local_flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
- if (mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long addr)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_mm_range(struct mm_struct *mm,
- unsigned long start, unsigned long end, unsigned long vmflag)
-{
- if (mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
-}
-
-static inline void reset_lazy_tlbstate(void)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- flush_tlb_all();
-}
-
-#else /* SMP */
-
-#include <asm/smp.h>
-
#define local_flush_tlb() __flush_tlb()
#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
@@ -245,13 +278,14 @@ static inline void flush_tlb_kernel_range(unsigned long start,
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb() flush_tlb_current_task()
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
+{
+ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
+}
void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
@@ -266,14 +300,6 @@ static inline void reset_lazy_tlbstate(void)
this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
}
-#endif /* SMP */
-
-/* Not inlined due to inc_irq_stat not being defined yet */
-#define flush_tlb_local() { \
- inc_irq_stat(irq_tlb_count); \
- local_flush_tlb(); \
-}
-
#ifndef CONFIG_PARAVIRT
#define flush_tlb_others(mask, mm, start, end) \
native_flush_tlb_others(mask, mm, start, end)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 756de9190aec..deabaf9759b6 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -22,6 +22,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
+ long sym_pvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 6ba66ee79710..4865e10dbb55 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -12,12 +12,14 @@ extern void map_vsyscall(void);
* Returns true if handled.
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool vsyscall_enabled(void);
#else
static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
return false;
}
+static inline bool vsyscall_enabled(void) { return false; }
#endif
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 79887abcb5e1..1361779f44fe 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -77,7 +77,8 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ffe01d0..616ebd22ef9a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-KASAN_SANITIZE_head$(BITS).o := n
-KASAN_SANITIZE_dumpstack.o := n
-KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_head$(BITS).o := n
+KASAN_SANITIZE_dumpstack.o := n
+KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_stacktrace.o := n
+
+OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_test_nx.o := y
+
+# If instrumentation of this dir is enabled, boot hangs during first second.
+# Probably could be more selective here, but note that files related to irqs,
+# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
+# non-deterministic coverage.
+KCOV_INSTRUMENT := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1e5eb9f2ff5f..a1e4a6c3f394 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -321,13 +321,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi);
+
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
/*
* Check bus_irq boundary.
*/
@@ -337,14 +336,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
}
/*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
@@ -352,16 +343,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
+ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+ return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
@@ -408,6 +391,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi)
+{
+ struct mpc_intsrc mp_irq;
+ int ioapic, pin;
+
+ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+ return ioapic;
+ }
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq;
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = pin;
+
+ mp_save_irq(&mp_irq);
+
+ return 0;
+}
+
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@@ -452,7 +463,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ if (bus_irq < NR_IRQS_LEGACY)
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ else
+ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8c35df468104..48e3979d174a 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -107,6 +107,15 @@ ENTRY(do_suspend_lowlevel)
movq pt_regs_r14(%rax), %r14
movq pt_regs_r15(%rax), %r15
+#ifdef CONFIG_KASAN
+ /*
+ * The suspend path may have poisoned some areas deeper in the stack,
+ * which we now need to unpoison.
+ */
+ movq %rsp, %rdi
+ call kasan_unpoison_task_stack_below
+#endif
+
xorl %eax, %eax
addq $8, %rsp
jmp restore_processor_state
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f909362b7a..d6f375f1b928 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -339,9 +339,12 @@ done:
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
+ int i;
- if (instr[0] != 0x90)
- return;
+ for (i = 0; i < a->padlen; i++) {
+ if (instr[i] != 0x90)
+ return;
+ }
local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8bb12ddc5db8..8e63ebdcbd0b 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,6 +2,10 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
+# Leads to non-deterministic coverage that is not a function of syscall inputs.
+# In particualr, smp_apic_timer_interrupt() is called in random places.
+KCOV_INSTRUMENT := n
+
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o
obj-y += hw_nmi.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 58031303e304..606ebe494756 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_perf_event.o = -pg
endif
+# If these files are instrumented, boot hangs during the first second.
+KCOV_INSTRUMENT_common.o := n
+KCOV_INSTRUMENT_perf_event.o := n
+
# Make sure load_percpu_segment has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
@@ -16,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-y += bugs.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_X86_32) += bugs.o
-obj-$(CONFIG_X86_64) += bugs_64.o
-
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bd17db15a2c1..cd46f9039119 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,6 +9,7 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -16,15 +17,27 @@
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
void __init check_bugs(void)
{
- identify_boot_cpu();
-#ifndef CONFIG_SMP
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
+ identify_boot_cpu();
+
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
*
@@ -40,4 +53,46 @@ void __init check_bugs(void)
alternative_instructions();
fpu__init_check_bugs();
+#else /* CONFIG_X86_64 */
+ alternative_instructions();
+
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+#endif
+}
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_KAISER))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+#endif
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
deleted file mode 100644
index 04f0fe5af83e..000000000000
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2000 SuSE
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/alternative.h>
-#include <asm/bugs.h>
-#include <asm/processor.h>
-#include <asm/mtrr.h>
-#include <asm/cacheflush.h>
-
-void __init check_bugs(void)
-{
- identify_boot_cpu();
-#if !defined(CONFIG_SMP)
- printk(KERN_INFO "CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 637ca414d431..dc4dfad66a70 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -92,7 +92,7 @@ static const struct cpu_dev default_cpu = {
static const struct cpu_dev *this_cpu = &default_cpu;
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
/*
* We need valid kernel segments for data and code in long mode too
@@ -162,6 +162,40 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+#ifdef CONFIG_X86_64
+static int __init x86_pcid_setup(char *s)
+{
+ /* require an exact match without trailing characters */
+ if (strlen(s))
+ return 0;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return 1;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ pr_info("nopcid: PCID feature disabled\n");
+ return 1;
+}
+__setup("nopcid", x86_pcid_setup);
+#endif
+
+static int __init x86_noinvpcid_setup(char *s)
+{
+ /* noinvpcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_INVPCID))
+ return 0;
+
+ setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+ pr_info("noinvpcid: INVPCID feature disabled\n");
+ return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -287,6 +321,39 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
}
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+ * INVPCID has two "groups" of types:
+ * 1/2: Invalidate an individual address
+ * 3/4: Invalidate all contexts
+ *
+ * 1/2 take a PCID, but 3/4 do not. So, 3/4
+ * ignore the PCID argument in the descriptor.
+ * But, we have to be careful not to call 1/2
+ * with an actual non-zero PCID in them before
+ * we do the above cr4_set_bits().
+ */
+ if (cpu_has(c, X86_FEATURE_INVPCID))
+ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
+ kaiser_setup_pcid();
+}
+
/*
* Some CPU features depend on higher CPUID levels, which may not always
* be available due to CPUID level capping or broken virtualization
@@ -365,8 +432,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -597,6 +664,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
}
}
+static void apply_forced_caps(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
@@ -753,6 +830,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ /* Assume for now that ALL x86 CPUs are insecure */
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
fpu__init_system(c);
}
@@ -888,11 +972,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_identify)
this_cpu->c_identify(c);
- /* Clear/Set all flags overriden by options, after probe */
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ /* Clear/Set all flags overridden by options, after probe */
+ apply_forced_caps(c);
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
@@ -918,6 +999,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smep(c);
setup_smap(c);
+ /* Set up PCID */
+ setup_pcid(c);
+
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
@@ -950,10 +1034,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
* Clear/Set all flags overriden by options, need do it
* before following smp all cpus cap AND.
*/
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
@@ -1173,7 +1254,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
/* May not be marked __init: used by software suspend */
@@ -1336,6 +1417,14 @@ void cpu_init(void)
* try to read it.
*/
cr4_init_shadow();
+ if (!kaiser_enabled) {
+ /*
+ * secondary_startup_64() deferred setting PGE in cr4:
+ * probe_page_size_mask() sets it on the boot cpu,
+ * but it needs to be set on each secondary cpu.
+ */
+ cr4_set_bits(X86_CR4_PGE);
+ }
/*
* Load microcode on this cpu if a valid microcode is available.
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 2233f8a76615..2a0f44d225fe 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -580,6 +580,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
switch (family) {
case 0x14:
@@ -591,6 +592,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
+ case 0x17:
+ max_size = F17H_MPB_MAX_SIZE;
+ break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index abf581ade8d2..b428a8174be1 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -994,9 +994,17 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == 79) {
- pr_err_once("late loading on model 79 is disabled.\n");
- return true;
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * may result in a system hang. This behavior is documented in item
+ * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == 79 &&
+ c->x86_mask == 0x01 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
}
return false;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 1e7de3cefc9c..f01b3a12dce0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -2,11 +2,15 @@
#include <linux/types.h>
#include <linux/slab.h>
+#include <asm/kaiser.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "perf_event.h"
+static
+DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
+
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
@@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu)
static DEFINE_PER_CPU(void *, insn_buffer);
+static void *dsalloc(size_t size, gfp_t flags, int node)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ unsigned int order = get_order(size);
+ struct page *page;
+ unsigned long addr;
+
+ page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+ if (!page)
+ return NULL;
+ addr = (unsigned long)page_address(page);
+ if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
+ __free_pages(page, order);
+ addr = 0;
+ }
+ return (void *)addr;
+#else
+ return kmalloc_node(size, flags | __GFP_ZERO, node);
+#endif
+}
+
+static void dsfree(const void *buffer, size_t size)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (!buffer)
+ return;
+ kaiser_remove_mapping((unsigned long)buffer, size);
+ free_pages((unsigned long)buffer, get_order(size));
+#else
+ kfree(buffer);
+#endif
+}
+
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu)
if (!x86_pmu.pebs)
return 0;
- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu)
if (x86_pmu.intel_cap.pebs_format < 2) {
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
if (!ibuffer) {
- kfree(buffer);
+ dsfree(buffer, x86_pmu.pebs_buffer_size);
return -ENOMEM;
}
per_cpu(insn_buffer, cpu) = ibuffer;
@@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu)
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ dsfree((void *)(unsigned long)ds->pebs_buffer_base,
+ x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
}
@@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu)
if (!x86_pmu.bts)
return 0;
- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
if (unlikely(!buffer)) {
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
return -ENOMEM;
@@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu)
if (!ds || !x86_pmu.bts)
return;
- kfree((void *)(unsigned long)ds->bts_buffer_base);
+ dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
}
static int alloc_ds_buffer(int cpu)
{
- int node = cpu_to_node(cpu);
- struct debug_store *ds;
-
- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
- if (unlikely(!ds))
- return -ENOMEM;
+ struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
+ memset(ds, 0, sizeof(*ds));
per_cpu(cpu_hw_events, cpu).ds = ds;
return 0;
@@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu)
return;
per_cpu(cpu_hw_events, cpu).ds = NULL;
- kfree(ds);
}
void release_ds_buffers(void)
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4d38416e2a7f..b02cb2ec6726 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -41,6 +41,7 @@
#include <asm/pgalloc.h>
#include <asm/setup.h>
#include <asm/espfix.h>
+#include <asm/kaiser.h>
/*
* Note: we only need 6*8 = 48 bytes for the espfix stack, but round
@@ -126,6 +127,15 @@ void __init init_espfix_bsp(void)
/* Install the espfix pud into the kernel page directory */
pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+ /*
+ * Just copy the top-level PGD that is mapping the espfix
+ * area to ensure it is mapped into the shadow user page
+ * tables.
+ */
+ if (kaiser_enabled) {
+ set_pgd(native_get_shadow_pgd(pgd_p),
+ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+ }
/* Randomize the locations */
init_espfix_random();
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860390..4034e905741a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -183,8 +183,8 @@ ENTRY(secondary_startup_64)
movq $(init_level4_pgt - __START_KERNEL_map), %rax
1:
- /* Enable PAE mode and PGE */
- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
+ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx
movq %rcx, %cr4
/* Setup early boot stage 4 level pagetables. */
@@ -441,6 +441,27 @@ early_idt_ripmsg:
.balign PAGE_SIZE; \
GLOBAL(name)
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned. We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define KAISER_USER_PGD_FILL 512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+ .balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define KAISER_USER_PGD_FILL 0
+#endif
+
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
@@ -450,9 +471,10 @@ GLOBAL(name)
.endr
__INITDATA
-NEXT_PAGE(early_level4_pgt)
+NEXT_PGD_PAGE(early_level4_pgt)
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .fill KAISER_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -460,16 +482,18 @@ NEXT_PAGE(early_dynamic_pgts)
.data
#ifndef CONFIG_XEN
-NEXT_PAGE(init_level4_pgt)
+NEXT_PGD_PAGE(init_level4_pgt)
.fill 512,8,0
+ .fill KAISER_USER_PGD_FILL,8,0
#else
-NEXT_PAGE(init_level4_pgt)
+NEXT_PGD_PAGE(init_level4_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .fill KAISER_USER_PGD_FILL,8,0
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
@@ -480,6 +504,7 @@ NEXT_PAGE(level2_ident_pgt)
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#endif
+ .fill KAISER_USER_PGD_FILL,8,0
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acc9b8f19ca8..f48eb8eeefe2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -353,7 +353,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
- disable_irq(hdev->irq);
+ disable_hardirq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 61521dc19c10..9f669fdd2010 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
- irq_stats(j)->irq_tlb_count);
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1423ab1b0312..f480b38a03c3 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
.flags = IRQF_NO_THREAD,
};
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
};
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 99d293ea2b49..3eb804335458 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -49,7 +49,7 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
-
+#include <linux/kasan.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
@@ -1078,6 +1078,9 @@ void jprobe_return(void)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ /* Unpoison stack redzones in the frames we are going to jump over. */
+ kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
+
asm volatile (
#ifdef CONFIG_X86_64
" xchg %%rbx,%%rsp \n"
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 5f8f0b3cc674..2c0b0b645a74 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -26,7 +26,7 @@
#include "common.h"
static nokprobe_inline
-int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_ip)
{
/*
@@ -41,20 +41,21 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
__this_cpu_write(current_kprobe, NULL);
if (orig_ip)
regs->ip = orig_ip;
- return 1;
}
int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- if (kprobe_ftrace(p))
- return __skip_singlestep(p, regs, kcb, 0);
- else
- return 0;
+ if (kprobe_ftrace(p)) {
+ __skip_singlestep(p, regs, kcb, 0);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
}
NOKPROBE_SYMBOL(skip_singlestep);
-/* Ftrace callback handler for kprobes */
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
@@ -77,13 +78,17 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
+ /* To emulate trap based kprobes, preempt_disable here */
+ preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
__skip_singlestep(p, regs, kcb, orig_ip);
+ preempt_enable_no_resched();
+ }
/*
* If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
+ * resets current kprobe, and keep preempt count +1.
*/
}
end:
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 2bd81e302427..ec1b06dc82d2 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock);
static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
+struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return hv_clock;
+}
+
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6acc9dd91f36..bc429365b72a 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,9 +12,11 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <linux/kaiser.h>
#include <asm/ldt.h>
#include <asm/desc.h>
@@ -33,11 +35,21 @@ static void flush_ldt(void *current_mm)
set_ldt(pc->ldt->entries, pc->ldt->size);
}
+static void __free_ldt_struct(struct ldt_struct *ldt)
+{
+ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(ldt->entries);
+ else
+ free_page((unsigned long)ldt->entries);
+ kfree(ldt);
+}
+
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(int size)
{
struct ldt_struct *new_ldt;
int alloc_size;
+ int ret;
if (size > LDT_ENTRIES)
return NULL;
@@ -65,7 +77,13 @@ static struct ldt_struct *alloc_ldt_struct(int size)
return NULL;
}
+ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+ __PAGE_KERNEL);
new_ldt->size = size;
+ if (ret) {
+ __free_ldt_struct(new_ldt);
+ return NULL;
+ }
return new_ldt;
}
@@ -91,12 +109,10 @@ static void free_ldt_struct(struct ldt_struct *ldt)
if (likely(!ldt))
return;
+ kaiser_remove_mapping((unsigned long)ldt->entries,
+ ldt->size * LDT_ENTRY_SIZE);
paravirt_free_ldt(ldt->entries, ldt->size);
- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(ldt->entries);
- else
- free_page((unsigned long)ldt->entries);
- kfree(ldt);
+ __free_ldt_struct(ldt);
}
/*
@@ -271,8 +287,8 @@ out:
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
- unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
int ret = -ENOSYS;
@@ -290,5 +306,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
ret = write_ldt(ptr, bytecount, 0);
break;
}
- return ret;
+ /*
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+ * return type, but tht ABI for sys_modify_ldt() expects
+ * 'int'. This cast gives us an int-sized value in %rax
+ * for the return code. The 'unsigned' is necessary so
+ * the compiler does not try to sign-extend the negative
+ * return codes into the high half of the register when
+ * taking the value from int->long.
+ */
+ return (unsigned int)ret;
}
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 8aa05583bc42..0677bf8d3a42 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
@@ -62,7 +61,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
- PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57eca132962f..c1b21d61b769 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -39,7 +39,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f660d63f40fe..9a16932c7258 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -93,6 +93,10 @@ void __noreturn machine_real_restart(unsigned int type)
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
+
+ /* Exiting long mode will fail if CR4.PCIDE is set. */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e67b834279b2..bbaae4cf9e8e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
#include <asm/alternative.h>
#include <asm/prom.h>
#include <asm/microcode.h>
+#include <asm/kaiser.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -1016,6 +1017,12 @@ void __init setup_arch(char **cmdline_p)
*/
init_hypervisor_platform();
+ /*
+ * This needs to happen right after XENPV is set on xen and
+ * kaiser_enabled is checked below in cleanup_highmap().
+ */
+ kaiser_check_boottime_disable();
+
x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fbabe4fcc7fb..fe89f938e0f0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -104,14 +104,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
spin_unlock_irqrestore(&rtc_lock, flags);
- local_flush_tlb();
- pr_debug("1.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
- pr_debug("2.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
- pr_debug("3.\n");
}
static inline void smpboot_restore_warm_reset_vector(void)
@@ -119,11 +115,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
unsigned long flags;
/*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
* Paranoid: Set warm reset code and vector here back
* to default values.
*/
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index 1c113db9ed57..2bb5ee464df3 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -9,10 +9,12 @@
#include <linux/atomic.h>
atomic_t trace_idt_ctr = ATOMIC_INIT(0);
+__aligned(PAGE_SIZE)
struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) trace_idt_table };
/* No need to be aligned, but done to keep all IDTs defined the same way. */
+__aligned(PAGE_SIZE)
gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
static int trace_irq_vector_refcount;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 524619351961..510e80da7de4 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -187,7 +187,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
pte_unmap_unlock(pte, ptl);
out:
up_write(&mm->mmap_sem);
- flush_tlb();
+ flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index fe133b710bef..1d0e36f909eb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ SECTIONS
KPROBES_TEXT
ENTRY_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
/* End of text section */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 684edebb4a0c..00045499f6c2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2383,9 +2383,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
}
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr4)
+ u64 cr0, u64 cr3, u64 cr4)
{
int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
/*
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2404,6 +2416,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
}
return X86EMUL_CONTINUE;
@@ -2414,11 +2432,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
struct desc_struct desc;
struct desc_ptr dt;
u16 selector;
- u32 val, cr0, cr4;
+ u32 val, cr0, cr3, cr4;
int i;
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
@@ -2460,14 +2478,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr4;
+ u64 val, cr0, cr3, cr4;
u32 base3;
u16 selector;
int i, r;
@@ -2484,7 +2502,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2512,7 +2530,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4e1b254c3695..900ffb6c28b5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1696,6 +1696,8 @@ static int ud_interception(struct vcpu_svm *svm)
int er;
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@@ -3854,6 +3856,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
+ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+ "xor %%r8, %%r8 \n\t"
+ "xor %%r9, %%r9 \n\t"
+ "xor %%r10, %%r10 \n\t"
+ "xor %%r11, %%r11 \n\t"
+ "xor %%r12, %%r12 \n\t"
+ "xor %%r13, %%r13 \n\t"
+ "xor %%r14, %%r14 \n\t"
+ "xor %%r15, %%r15 \n\t"
+#endif
"pop %%" _ASM_BP
:
: [svm]"a"(svm),
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 67ba0d8f87c7..c26255f19603 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -828,8 +828,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ return -ENOENT;
+
+ /*
+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
+ * generic mechanism.
+ */
+ asm("lfence");
+
+ if (vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
return vmcs_field_to_offset_table[field];
@@ -1107,6 +1115,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}
+static inline bool cpu_has_vmx_invvpid(void)
+{
+ return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
static inline bool cpu_has_vmx_ept(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -5267,6 +5280,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1;
}
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -6180,12 +6195,7 @@ static __init int hardware_setup(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
@@ -6202,8 +6212,10 @@ static __init int hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
- if (!cpu_has_vmx_vpid())
+ if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+ !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs)
@@ -6927,9 +6939,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 zero = 0;
gpa_t vmptr;
- struct vmcs12 *vmcs12;
- struct page *page;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -6940,22 +6951,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- /*
- * For accurate processor emulation, VMCLEAR beyond available
- * physical memory should do nothing at all. However, it is
- * possible that a nested vmx bug, not a guest hypervisor bug,
- * resulted in this case, so let's shut down before doing any
- * more damage:
- */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- return 1;
- }
- vmcs12 = kmap(page);
- vmcs12->launch_state = 0;
- kunmap(page);
- nested_release_page(page);
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12, launch_state),
+ &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
@@ -8633,6 +8631,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
+ "setbe %c[fail](%0)\n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -8649,12 +8648,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
"mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ "xor %%eax, %%eax \n\t"
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
@@ -10572,8 +10582,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
static void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
+ }
free_nested(to_vmx(vcpu));
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ffd5900da5b..f973cfa8ff4f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -759,7 +759,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
+ !is_long_mode(vcpu))
return 1;
}
@@ -1812,6 +1813,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*/
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+ if (guest_hv_clock.version & 1)
+ ++guest_hv_clock.version; /* first time write, random junk */
+
vcpu->hv_clock.version = guest_hv_clock.version + 1;
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
&vcpu->hv_clock,
@@ -4110,7 +4114,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4358,7 +4362,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4380,14 +4384,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
@@ -5426,6 +5430,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
+ if (ctxt->have_exception && inject_emulated_exception(vcpu))
+ return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
return handle_emulation_failure(vcpu);
@@ -6936,7 +6942,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
#endif
kvm_rip_write(vcpu, regs->rip);
- kvm_set_rflags(vcpu, regs->rflags);
+ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
@@ -8225,11 +8231,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
{
struct x86_exception fault;
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+ trace_kvm_async_pf_ready(work->arch.token, work->gva);
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index db4a1c6ea785..2b6561d97a46 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,6 +2,9 @@
# Makefile for x86 specific library files.
#
+# Produces uninteresting flaky coverage.
+KCOV_INSTRUMENT_delay.o := n
+
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 422db000d727..a744506856b1 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -82,3 +82,108 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
return 0; /* Buffer overrun */
}
+
+/*
+ * Find a non-boolean option (i.e. option=argument). In accordance with
+ * standard Linux practice, if this option is repeated, this returns the
+ * last instance on the command line.
+ *
+ * @cmdline: the cmdline string
+ * @max_cmdline_size: the maximum size of cmdline
+ * @option: option string to look for
+ * @buffer: memory buffer to return the option argument
+ * @bufsize: size of the supplied memory buffer
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+static int
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
+ const char *option, char *buffer, int bufsize)
+{
+ char c;
+ int pos = 0, len = -1;
+ const char *opptr = NULL;
+ char *bufptr = buffer;
+ enum {
+ st_wordstart = 0, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ st_bufcpy, /* Copying this to buffer */
+ } state = st_wordstart;
+
+ if (!cmdline)
+ return -1; /* No command line */
+
+ /*
+ * This 'pos' check ensures we do not overrun
+ * a non-NULL-terminated 'cmdline'
+ */
+ while (pos++ < max_cmdline_size) {
+ c = *(char *)cmdline++;
+ if (!c)
+ break;
+
+ switch (state) {
+ case st_wordstart:
+ if (myisspace(c))
+ break;
+
+ state = st_wordcmp;
+ opptr = option;
+ /* fall through */
+
+ case st_wordcmp:
+ if ((c == '=') && !*opptr) {
+ /*
+ * We matched all the way to the end of the
+ * option we were looking for, prepare to
+ * copy the argument.
+ */
+ len = 0;
+ bufptr = buffer;
+ state = st_bufcpy;
+ break;
+ } else if (c == *opptr++) {
+ /*
+ * We are currently matching, so continue
+ * to the next character on the cmdline.
+ */
+ break;
+ }
+ state = st_wordskip;
+ /* fall through */
+
+ case st_wordskip:
+ if (myisspace(c))
+ state = st_wordstart;
+ break;
+
+ case st_bufcpy:
+ if (myisspace(c)) {
+ state = st_wordstart;
+ } else {
+ /*
+ * Increment len, but don't overrun the
+ * supplied buffer and leave room for the
+ * NULL terminator.
+ */
+ if (++len < bufsize)
+ *bufptr++ = c;
+ }
+ break;
+ }
+ }
+
+ if (bufsize)
+ *bufptr = '\0';
+
+ return len;
+}
+
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
+ int bufsize)
+{
+ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
+ buffer, bufsize);
+}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 65c47fda26fc..eaf852500be4 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,8 @@
+# Kernel does not boot with instrumentation of tlb.c.
+KCOV_INSTRUMENT_tlb.o := n
+
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o gup.o setup_nx.o
+ pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -9,7 +12,6 @@ CFLAGS_setup_nx.o := $(nostackp)
CFLAGS_fault.o := -I$(src)/../include/asm/trace
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
-obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
@@ -33,3 +35,4 @@ obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3aebbd6c6f5f..151fd33e9043 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -165,7 +165,7 @@ static void __init probe_page_size_mask(void)
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
- if (cpu_has_pge) {
+ if (cpu_has_pge && !kaiser_enabled) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
} else
@@ -753,13 +753,11 @@ void __init zone_sizes_init(void)
}
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-#ifdef CONFIG_SMP
.active_mm = &init_mm,
.state = 0,
-#endif
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
-EXPORT_SYMBOL_GPL(cpu_tlbstate);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e08d141844ee..97b6b0164dcb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -395,6 +395,16 @@ void __init cleanup_highmap(void)
continue;
if (vaddr < (unsigned long) _text || vaddr > end)
set_pmd(pmd, __pmd(0));
+ else if (kaiser_enabled) {
+ /*
+ * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
+ * clear that now. This is not important, so long as
+ * CR4.PGE remains clear, but it removes an anomaly.
+ * Physical mapping setup below avoids _PAGE_GLOBAL
+ * by use of massage_pgprot() inside pfn_pte() etc.
+ */
+ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
+ }
}
}
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
new file mode 100644
index 000000000000..8af98513d36c
--- /dev/null
+++ b/arch/x86/mm/kaiser.c
@@ -0,0 +1,484 @@
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
+
+#include <asm/kaiser.h>
+#include <asm/tlbflush.h> /* to verify its kaiser declarations */
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/desc.h>
+#include <asm/cmdline.h>
+#include <asm/vsyscall.h>
+
+int kaiser_enabled __read_mostly = 1;
+EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
+
+__visible
+DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+/*
+ * These can have bit 63 set, so we can not just use a plain "or"
+ * instruction to get their value or'd into CR3. It would take
+ * another register. So, we use a memory reference to these instead.
+ *
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+/*
+ * At runtime, the only things we map are some things for CPU
+ * hotplug, and stacks for new processes. No two CPUs will ever
+ * be populating the same addresses, so we only need to ensure
+ * that we protect between two CPUs trying to allocate and
+ * populate the same page table page.
+ *
+ * Only take this lock when doing a set_p[4um]d(), but it is not
+ * needed for doing a set_pte(). We assume that only the *owner*
+ * of a given allocation will be doing this for _their_
+ * allocation.
+ *
+ * This ensures that once a system has been running for a while
+ * and there have been stacks all over and these page tables
+ * are fully populated, there will be no further acquisitions of
+ * this lock.
+ */
+static DEFINE_SPINLOCK(shadow_table_allocation_lock);
+
+/*
+ * Returns -1 on error.
+ */
+static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(vaddr);
+ /*
+ * We made all the kernel PGDs present in kaiser_init().
+ * We expect them to stay that way.
+ */
+ BUG_ON(pgd_none(*pgd));
+ /*
+ * PGDs are either 512GB or 128TB on all x86_64
+ * configurations. We don't handle these.
+ */
+ BUG_ON(pgd_large(*pgd));
+
+ pud = pud_offset(pgd, vaddr);
+ if (pud_none(*pud)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (pud_large(*pud))
+ return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
+
+ pmd = pmd_offset(pud, vaddr);
+ if (pmd_none(*pmd)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (pmd_large(*pmd))
+ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
+
+ pte = pte_offset_kernel(pmd, vaddr);
+ if (pte_none(*pte)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+}
+
+/*
+ * This is a relatively normal page table walk, except that it
+ * also tries to allocate page tables pages along the way.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
+{
+ pmd_t *pmd;
+ pud_t *pud;
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ unsigned long prot = _KERNPG_TABLE;
+
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+ return NULL;
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+ if (user) {
+ /*
+ * The vsyscall page is the only page that will have
+ * _PAGE_USER set. Catch everything else.
+ */
+ BUG_ON(address != VSYSCALL_ADDR);
+
+ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+ prot = _PAGE_TABLE;
+ }
+
+ pud = pud_offset(pgd, address);
+ /* The shadow page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pud_none(*pud)) {
+ unsigned long new_pmd_page = __get_free_page(gfp);
+ if (!new_pmd_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+ if (pud_none(*pud)) {
+ set_pud(pud, __pud(prot | __pa(new_pmd_page)));
+ __inc_zone_page_state(virt_to_page((void *)
+ new_pmd_page), NR_KAISERTABLE);
+ } else
+ free_page(new_pmd_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+
+ pmd = pmd_offset(pud, address);
+ /* The shadow page tables do not use large mappings: */
+ if (pmd_large(*pmd)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pmd_none(*pmd)) {
+ unsigned long new_pte_page = __get_free_page(gfp);
+ if (!new_pte_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
+ __inc_zone_page_state(virt_to_page((void *)
+ new_pte_page), NR_KAISERTABLE);
+ } else
+ free_page(new_pte_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+
+ return pte_offset_kernel(pmd, address);
+}
+
+static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+ unsigned long flags)
+{
+ int ret = 0;
+ pte_t *pte;
+ unsigned long start_addr = (unsigned long )__start_addr;
+ unsigned long address = start_addr & PAGE_MASK;
+ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ unsigned long target_address;
+
+ /*
+ * It is convenient for callers to pass in __PAGE_KERNEL etc,
+ * and there is no actual harm from setting _PAGE_GLOBAL, so
+ * long as CR4.PGE is not set. But it is nonetheless troubling
+ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
+ * requires that not to be #defined to 0): so mask it off here.
+ */
+ flags &= ~_PAGE_GLOBAL;
+ if (!(__supported_pte_mask & _PAGE_NX))
+ flags &= ~_PAGE_NX;
+
+ for (; address < end_addr; address += PAGE_SIZE) {
+ target_address = get_pa_from_mapping(address);
+ if (target_address == -1) {
+ ret = -EIO;
+ break;
+ }
+ pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
+ if (!pte) {
+ ret = -ENOMEM;
+ break;
+ }
+ if (pte_none(*pte)) {
+ set_pte(pte, __pte(flags | target_address));
+ } else {
+ pte_t tmp;
+ set_pte(&tmp, __pte(flags | target_address));
+ WARN_ON_ONCE(!pte_same(*pte, tmp));
+ }
+ }
+ return ret;
+}
+
+static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
+{
+ unsigned long size = end - start;
+
+ return kaiser_add_user_map(start, size, flags);
+}
+
+/*
+ * Ensure that the top level of the (shadow) page tables are
+ * entirely populated. This ensures that all processes that get
+ * forked have the same entries. This way, we do not have to
+ * ever go set up new entries in older processes.
+ *
+ * Note: we never free these, so there are no updates to them
+ * after this.
+ */
+static void __init kaiser_init_all_pgds(void)
+{
+ pgd_t *pgd;
+ int i = 0;
+
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ pgd_t new_pgd;
+ pud_t *pud = pud_alloc_one(&init_mm,
+ PAGE_OFFSET + i * PGDIR_SIZE);
+ if (!pud) {
+ WARN_ON(1);
+ break;
+ }
+ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ /*
+ * Make sure not to stomp on some other pgd entry.
+ */
+ if (!pgd_none(pgd[i])) {
+ WARN_ON(1);
+ continue;
+ }
+ set_pgd(pgd + i, new_pgd);
+ }
+}
+
+#define kaiser_add_user_map_early(start, size, flags) do { \
+ int __ret = kaiser_add_user_map(start, size, flags); \
+ WARN_ON(__ret); \
+} while (0)
+
+#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \
+ int __ret = kaiser_add_user_map_ptrs(start, end, flags); \
+ WARN_ON(__ret); \
+} while (0)
+
+void __init kaiser_check_boottime_disable(void)
+{
+ bool enable = true;
+ char arg[5];
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_XENPV))
+ goto silent_disable;
+
+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ if (ret > 0) {
+ if (!strncmp(arg, "on", 2))
+ goto enable;
+
+ if (!strncmp(arg, "off", 3))
+ goto disable;
+
+ if (!strncmp(arg, "auto", 4))
+ goto skip;
+ }
+
+ if (cmdline_find_option_bool(boot_command_line, "nopti"))
+ goto disable;
+
+skip:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ goto disable;
+
+enable:
+ if (enable)
+ setup_force_cpu_cap(X86_FEATURE_KAISER);
+
+ return;
+
+disable:
+ pr_info("disabled\n");
+
+silent_disable:
+ kaiser_enabled = 0;
+ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+}
+
+/*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+ * will have most of the kernel up by then and should be able to
+ * get a clean warning out of it. If we BUG_ON() here, we run
+ * the risk of being before we have good console output.
+ */
+void __init kaiser_init(void)
+{
+ int cpu;
+
+ if (!kaiser_enabled)
+ return;
+
+ kaiser_init_all_pgds();
+
+ /*
+ * Note that this sets _PAGE_USER and it needs to happen when the
+ * pagetable hierarchy gets created, i.e., early. Otherwise
+ * kaiser_pagetable_walk() will encounter initialized PTEs in the
+ * hierarchy and not set the proper permissions, leading to the
+ * pagefaults with page-protection violations when trying to read the
+ * vsyscall page. For example.
+ */
+ if (vsyscall_enabled())
+ kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
+ PAGE_SIZE,
+ __PAGE_KERNEL_VSYSCALL);
+
+ for_each_possible_cpu(cpu) {
+ void *percpu_vaddr = __per_cpu_user_mapped_start +
+ per_cpu_offset(cpu);
+ unsigned long percpu_sz = __per_cpu_user_mapped_end -
+ __per_cpu_user_mapped_start;
+ kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
+ __PAGE_KERNEL);
+ }
+
+ /*
+ * Map the entry/exit text section, which is needed at
+ * switches from user to and from kernel.
+ */
+ kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
+ __PAGE_KERNEL_RX);
+
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+ kaiser_add_user_map_ptrs_early(__irqentry_text_start,
+ __irqentry_text_end,
+ __PAGE_KERNEL_RX);
+#endif
+ kaiser_add_user_map_early((void *)idt_descr.address,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL_RO);
+#ifdef CONFIG_TRACING
+ kaiser_add_user_map_early(&trace_idt_descr,
+ sizeof(trace_idt_descr),
+ __PAGE_KERNEL);
+ kaiser_add_user_map_early(&trace_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+#endif
+ kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
+ __PAGE_KERNEL);
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+
+ pr_info("enabled\n");
+}
+
+/* Add a mapping to the shadow mapping, and synchronize the mappings */
+int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+{
+ if (!kaiser_enabled)
+ return 0;
+ return kaiser_add_user_map((const void *)addr, size, flags);
+}
+
+void kaiser_remove_mapping(unsigned long start, unsigned long size)
+{
+ extern void unmap_pud_range_nofree(pgd_t *pgd,
+ unsigned long start, unsigned long end);
+ unsigned long end = start + size;
+ unsigned long addr, next;
+ pgd_t *pgd;
+
+ if (!kaiser_enabled)
+ return;
+ pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ for (addr = start; addr < end; pgd++, addr = next) {
+ next = pgd_addr_end(addr, end);
+ unmap_pud_range_nofree(pgd, addr, next);
+ }
+}
+
+/*
+ * Page table pages are page-aligned. The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ * This returns true for user pages that need to get copied into
+ * both the user and kernel copies of the page tables, and false
+ * for kernel pages that should only be in the kernel copy.
+ */
+static inline bool is_userspace_pgd(pgd_t *pgdp)
+{
+ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
+}
+
+pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (!kaiser_enabled)
+ return pgd;
+ /*
+ * Do we need to also populate the shadow pgd? Check _PAGE_USER to
+ * skip cases like kexec and EFI which make temporary low mappings.
+ */
+ if (pgd.pgd & _PAGE_USER) {
+ if (is_userspace_pgd(pgdp)) {
+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+ /*
+ * Even if the entry is *mapping* userspace, ensure
+ * that userspace can not use it. This way, if we
+ * get out to userspace running on the kernel CR3,
+ * userspace will crash instead of running.
+ */
+ if (__supported_pte_mask & _PAGE_NX)
+ pgd.pgd |= _PAGE_NX;
+ }
+ } else if (!pgd.pgd) {
+ /*
+ * pgd_clear() cannot check _PAGE_USER, and is even used to
+ * clear corrupted pgd entries: so just rely on cases like
+ * kexec and EFI never to be using pgd_clear().
+ */
+ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
+ is_userspace_pgd(pgdp))
+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+ }
+ return pgd;
+}
+
+void kaiser_setup_pcid(void)
+{
+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+
+ if (this_cpu_has(X86_FEATURE_PCID))
+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+ this_cpu_write(x86_cr3_pcid_user, user_cr3);
+}
+
+/*
+ * Make a note that this cpu will need to flush USER tlb on return to user.
+ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
+ */
+void kaiser_flush_tlb_on_return_to_user(void)
+{
+ if (this_cpu_has(X86_FEATURE_PCID))
+ this_cpu_write(x86_cr3_pcid_user,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+}
+EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 4e5ac46adc9d..fdfa25c83119 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -121,11 +121,22 @@ void __init kasan_init(void)
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END);
- memset(kasan_zero_page, 0, PAGE_SIZE);
-
load_cr3(init_level4_pgt);
__flush_tlb_all();
- init_task.kasan_depth = 0;
+ /*
+ * kasan_zero_page has been used as early shadow memory, thus it may
+ * contain some garbage. Now we can clear and write protect it, since
+ * after the TLB flush no one should write to it.
+ */
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+ set_pte(&kasan_zero_pte[i], pte);
+ }
+ /* Flush TLBs again to be sure that write protection applied. */
+ __flush_tlb_all();
+
+ init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n");
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4540e8880cd9..ac9c7797b632 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock);
#define CPA_FLUSHTLB 1
#define CPA_ARRAY 2
#define CPA_PAGES_ARRAY 4
+#define CPA_FREE_PAGETABLES 8
#ifdef CONFIG_PROC_FS
static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -723,10 +724,13 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
return 0;
}
-static bool try_to_free_pte_page(pte_t *pte)
+static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
{
int i;
+ if (!(cpa->flags & CPA_FREE_PAGETABLES))
+ return false;
+
for (i = 0; i < PTRS_PER_PTE; i++)
if (!pte_none(pte[i]))
return false;
@@ -735,10 +739,13 @@ static bool try_to_free_pte_page(pte_t *pte)
return true;
}
-static bool try_to_free_pmd_page(pmd_t *pmd)
+static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
{
int i;
+ if (!(cpa->flags & CPA_FREE_PAGETABLES))
+ return false;
+
for (i = 0; i < PTRS_PER_PMD; i++)
if (!pmd_none(pmd[i]))
return false;
@@ -759,7 +766,9 @@ static bool try_to_free_pud_page(pud_t *pud)
return true;
}
-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
+static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
+ unsigned long start,
+ unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, start);
@@ -770,22 +779,23 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
pte++;
}
- if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
+ if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
pmd_clear(pmd);
return true;
}
return false;
}
-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
+static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
unsigned long start, unsigned long end)
{
- if (unmap_pte_range(pmd, start, end))
- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+ if (unmap_pte_range(cpa, pmd, start, end))
+ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
pud_clear(pud);
}
-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
+static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
+ unsigned long start, unsigned long end)
{
pmd_t *pmd = pmd_offset(pud, start);
@@ -796,7 +806,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
unsigned long pre_end = min_t(unsigned long, end, next_page);
- __unmap_pmd_range(pud, pmd, start, pre_end);
+ __unmap_pmd_range(cpa, pud, pmd, start, pre_end);
start = pre_end;
pmd++;
@@ -809,7 +819,8 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
if (pmd_large(*pmd))
pmd_clear(pmd);
else
- __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
+ __unmap_pmd_range(cpa, pud, pmd,
+ start, start + PMD_SIZE);
start += PMD_SIZE;
pmd++;
@@ -819,17 +830,19 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
* 4K leftovers?
*/
if (start < end)
- return __unmap_pmd_range(pud, pmd, start, end);
+ return __unmap_pmd_range(cpa, pud, pmd, start, end);
/*
* Try again to free the PMD page if haven't succeeded above.
*/
if (!pud_none(*pud))
- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
pud_clear(pud);
}
-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
+ unsigned long start,
+ unsigned long end)
{
pud_t *pud = pud_offset(pgd, start);
@@ -840,7 +853,7 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
unsigned long pre_end = min_t(unsigned long, end, next_page);
- unmap_pmd_range(pud, start, pre_end);
+ unmap_pmd_range(cpa, pud, start, pre_end);
start = pre_end;
pud++;
@@ -854,7 +867,7 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
if (pud_large(*pud))
pud_clear(pud);
else
- unmap_pmd_range(pud, start, start + PUD_SIZE);
+ unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
start += PUD_SIZE;
pud++;
@@ -864,7 +877,7 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
* 2M leftovers?
*/
if (start < end)
- unmap_pmd_range(pud, start, end);
+ unmap_pmd_range(cpa, pud, start, end);
/*
* No need to try to free the PUD page because we'll free it in
@@ -872,6 +885,24 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
*/
}
+static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+{
+ struct cpa_data cpa = {
+ .flags = CPA_FREE_PAGETABLES,
+ };
+
+ __unmap_pud_range(&cpa, pgd, start, end);
+}
+
+void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
+{
+ struct cpa_data cpa = {
+ .flags = 0,
+ };
+
+ __unmap_pud_range(&cpa, pgd, start, end);
+}
+
static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
{
pgd_t *pgd_entry = root + pgd_index(addr);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3f1bb4f93a5a..3146b1da6d72 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -750,11 +750,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
return 1;
while (cursor < to) {
- if (!devmem_is_allowed(pfn)) {
- pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
- current->comm, from, to - 1);
+ if (!devmem_is_allowed(pfn))
return 0;
- }
cursor += PAGE_SIZE;
pfn++;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fb0a9dd1d6e4..dbc27a2b4ad5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,7 @@
#include <asm/fixmap.h>
#include <asm/mtrr.h>
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
#ifdef CONFIG_HIGHPTE
#define PGALLOC_USER_GFP __GFP_HIGHMEM
@@ -340,14 +340,24 @@ static inline void _pgd_free(pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd);
}
#else
+
+/*
+ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#define PGD_ALLOCATION_ORDER kaiser_enabled
+
static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ /* No __GFP_REPEAT: to avoid page allocation stalls in order-1 case */
+ return (pgd_t *)__get_free_pages(PGALLOC_GFP & ~__GFP_REPEAT,
+ PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
{
- free_page((unsigned long)pgd);
+ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
}
#endif /* CONFIG_X86_PAE */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5a760fd66bec..7cad01af6dcd 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,16 +6,17 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
+#include <asm/kaiser.h>
/*
- * Smarter SMP flushing macros.
+ * TLB flushing, formerly SMP-only
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
@@ -34,6 +35,36 @@ struct flush_tlb_info {
unsigned long flush_end;
};
+static void load_new_mm_cr3(pgd_t *pgdir)
+{
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+ if (kaiser_enabled) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+ * Flush KERN below, flush USER when returning to userspace in
+ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
+ *
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ *
+ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
+ * would be needed in the write_cr3() below - if PCIDs enabled.
+ */
+ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
+ kaiser_flush_tlb_on_return_to_user();
+ }
+
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
@@ -45,7 +76,7 @@ void leave_mm(int cpu)
BUG();
if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
- load_cr3(swapper_pg_dir);
+ load_new_mm_cr3(swapper_pg_dir);
/*
* This gets called in the idle path where RCU
* functions differently. Tracing normally
@@ -57,6 +88,109 @@ void leave_mm(int cpu)
}
EXPORT_SYMBOL_GPL(leave_mm);
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm_irqs_off(prev, next, tsk);
+ local_irq_restore(flags);
+}
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * Re-load page tables.
+ *
+ * This logic has an ordering constraint:
+ *
+ * CPU 0: Write to a PTE for 'next'
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
+ * CPU 1: set bit 1 in next's mm_cpumask
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
+ *
+ * We need to prevent an outcome in which CPU 1 observes
+ * the new PTE value and CPU 0 observes bit 1 clear in
+ * mm_cpumask. (If that occurs, then the IPI will never
+ * be sent, and CPU 0's TLB will contain a stale entry.)
+ *
+ * The bad outcome can occur if either CPU's load is
+ * reordered before that CPU's store, so both CPUs must
+ * execute full barriers to prevent this from happening.
+ *
+ * Thus, switch_mm needs a full barrier between the
+ * store to mm_cpumask and any operation that could load
+ * from next->pgd. TLB fills are special and can happen
+ * due to instruction fetches or for no reason at all,
+ * and neither LOCK nor MFENCE orders them.
+ * Fortunately, load_cr3() is serializing and gives the
+ * ordering guarantee we need.
+ *
+ */
+ load_new_mm_cr3(next->pgd);
+
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+ /* Stop flush ipis for the previous mm */
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+
+ /* Load per-mm CR4 state */
+ load_mm_cr4(next);
+
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ /*
+ * Load the LDT, if the LDT is different.
+ *
+ * It's possible that prev->context.ldt doesn't match
+ * the LDT register. This can happen if leave_mm(prev)
+ * was called and then modify_ldt changed
+ * prev->context.ldt but suppressed an IPI to this CPU.
+ * In this case, prev->context.ldt != NULL, because we
+ * never set context.ldt to NULL while the mm still
+ * exists. That means that next->context.ldt !=
+ * prev->context.ldt, because mms never share an LDT.
+ */
+ if (unlikely(prev->context.ldt != next->context.ldt))
+ load_mm_ldt(next);
+#endif
+ } else {
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+
+ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ /*
+ * On established mms, the mm_cpumask is only changed
+ * from irq context, from ptep_clear_flush() while in
+ * lazy tlb mode, and here. Irqs are blocked during
+ * schedule, protecting us from simultaneous changes.
+ */
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * We were in lazy tlb mode and leave_mm disabled
+ * tlb flush IPI delivery. We must reload CR3
+ * to make sure to use no freed page tables.
+ *
+ * As above, load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask write.
+ */
+ load_new_mm_cr3(next->pgd);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
+ load_mm_ldt(next);
+ }
+ }
+}
+
/*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
@@ -104,7 +238,7 @@ static void flush_tlb_func(void *info)
inc_irq_stat(irq_tlb_count);
- if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
+ if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
return;
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -158,23 +292,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
}
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
-
- preempt_disable();
-
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-
- /* This is an implicit full barrier that synchronizes with switch_mm. */
- local_flush_tlb();
-
- trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
- preempt_enable();
-}
-
/*
* See Documentation/x86/tlb.txt for details. We choose 33
* because it is large enough to cover the vast majority (at
@@ -195,6 +312,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
preempt_disable();
+
+ if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+ base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+ if (base_pages_to_flush > tlb_single_page_flush_ceiling)
+ base_pages_to_flush = TLB_FLUSH_ALL;
+
if (current->active_mm != mm) {
/* Synchronize with switch_mm. */
smp_mb();
@@ -211,15 +334,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
goto out;
}
- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
- base_pages_to_flush = (end - start) >> PAGE_SHIFT;
-
/*
* Both branches below are implicit full barriers (MOV to CR or
* INVLPG) that synchronize with switch_mm.
*/
- if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
- base_pages_to_flush = TLB_FLUSH_ALL;
+ if (base_pages_to_flush == TLB_FLUSH_ALL) {
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
local_flush_tlb();
} else {
@@ -240,33 +359,6 @@ out:
preempt_enable();
}
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
-{
- struct mm_struct *mm = vma->vm_mm;
-
- preempt_disable();
-
- if (current->active_mm == mm) {
- if (current->mm) {
- /*
- * Implicit full barrier (INVLPG) that synchronizes
- * with switch_mm.
- */
- __flush_tlb_one(start);
- } else {
- leave_mm(smp_processor_id());
-
- /* Synchronize with switch_mm. */
- smp_mb();
- }
- }
-
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
-
- preempt_enable();
-}
-
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
* We should get host bridge information from ACPI unless the BIOS
* doesn't support it.
*/
- if (acpi_os_get_root_pointer())
+ if (!acpi_disabled && acpi_os_get_root_pointer())
return 0;
#endif
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index ea48449b2e63..64fbc7e33226 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -28,8 +28,7 @@ struct bmp_header {
void __init efi_bgrt_init(void)
{
acpi_status status;
- void __iomem *image;
- bool ioremapped = false;
+ void *image;
struct bmp_header bmp_header;
if (acpi_disabled)
@@ -70,20 +69,14 @@ void __init efi_bgrt_init(void)
return;
}
- image = efi_lookup_mapped_addr(bgrt_tab->image_address);
+ image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB);
if (!image) {
- image = early_ioremap(bgrt_tab->image_address,
- sizeof(bmp_header));
- ioremapped = true;
- if (!image) {
- pr_err("Ignoring BGRT: failed to map image header memory\n");
- return;
- }
+ pr_err("Ignoring BGRT: failed to map image header memory\n");
+ return;
}
- memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
- if (ioremapped)
- early_iounmap(image, sizeof(bmp_header));
+ memcpy(&bmp_header, image, sizeof(bmp_header));
+ memunmap(image);
bgrt_image_size = bmp_header.size;
bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
@@ -93,18 +86,14 @@ void __init efi_bgrt_init(void)
return;
}
- if (ioremapped) {
- image = early_ioremap(bgrt_tab->image_address,
- bmp_header.size);
- if (!image) {
- pr_err("Ignoring BGRT: failed to map image memory\n");
- kfree(bgrt_image);
- bgrt_image = NULL;
- return;
- }
+ image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
+ if (!image) {
+ pr_err("Ignoring BGRT: failed to map image memory\n");
+ kfree(bgrt_image);
+ bgrt_image = NULL;
+ return;
}
- memcpy_fromio(bgrt_image, image, bgrt_image_size);
- if (ioremapped)
- early_iounmap(image, bmp_header.size);
+ memcpy(bgrt_image, image, bgrt_image_size);
+ memunmap(image);
}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0b7a63d98440..805a3271a137 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -4,6 +4,7 @@
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
+#include <asm/kaiser.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -15,7 +16,8 @@ void __init reserve_real_mode(void)
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
/* Has to be under 1M so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+ mem = memblock_find_in_range(0, 1 << 20, size,
+ KAISER_KERNEL_PGD_ALIGNMENT);
if (!mem)
panic("Cannot allocate trampoline\n");
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 2730d775ef9a..59610be05468 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -8,6 +8,9 @@
#
KASAN_SANITIZE := n
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
always := realmode.bin realmode.relocs
wakeup-objs := wakeup_asm.o wakemain.o video-mode.o
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20d2f9d..781cca63f795 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#include <asm/kaiser.h>
#include "realmode.h"
.text
@@ -139,7 +140,7 @@ tr_gdt:
tr_gdt_end:
.bss
- .balign PAGE_SIZE
+ .balign KAISER_KERNEL_PGD_ALIGNMENT
GLOBAL(trampoline_pgd) .space PAGE_SIZE
.balign 8
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb5df43..3ee234b6234d 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <os.h>
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
mm->arch.ldt.entry_count = 0;
}
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
- return do_modify_ldt_skas(func, ptr, bytecount);
+ /* See non-um modify_ldt() for why we do this cast */
+ return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ffa41591bff9..cbef64b508e1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -433,6 +433,12 @@ static void __init xen_init_cpuid_mask(void)
~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
+ /*
+ * Xen PV would need some work to support PCID: CR3 handling as well
+ * as xen_flush_tlb_others() would need updating.
+ */
+ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */
+
if (!xen_initial_domain())
cpuid_leaf1_edx_mask &=
~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
diff --git a/block/bio.c b/block/bio.c
index 68bbc835bacc..63363a689922 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1268,6 +1268,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
int ret, offset;
struct iov_iter i;
struct iovec iov;
+ struct bio_vec *bvec;
iov_for_each(iov, i, *iter) {
unsigned long uaddr = (unsigned long) iov.iov_base;
@@ -1312,7 +1313,12 @@ struct bio *bio_map_user_iov(struct request_queue *q,
ret = get_user_pages_fast(uaddr, local_nr_pages,
(iter->type & WRITE) != WRITE,
&pages[cur_page]);
- if (ret < local_nr_pages) {
+ if (unlikely(ret < local_nr_pages)) {
+ for (j = cur_page; j < page_limit; j++) {
+ if (!pages[j])
+ break;
+ put_page(pages[j]);
+ }
ret = -EFAULT;
goto out_unmap;
}
@@ -1374,10 +1380,8 @@ struct bio *bio_map_user_iov(struct request_queue *q,
return bio;
out_unmap:
- for (j = 0; j < nr_pages; j++) {
- if (!pages[j])
- break;
- page_cache_release(pages[j]);
+ bio_for_each_segment_all(bvec, bio, j) {
+ put_page(bvec->bv_page);
}
out:
kfree(pages);
diff --git a/block/blk-core.c b/block/blk-core.c
index 0aa722c93d9f..a2378344b12c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -528,8 +528,8 @@ void blk_set_queue_dying(struct request_queue *q)
blk_queue_for_each_rl(rl, q) {
if (rl->rq_pool) {
- wake_up(&rl->wait[BLK_RW_SYNC]);
- wake_up(&rl->wait[BLK_RW_ASYNC]);
+ wake_up_all(&rl->wait[BLK_RW_SYNC]);
+ wake_up_all(&rl->wait[BLK_RW_ASYNC]);
}
}
}
@@ -3579,76 +3579,43 @@ int __init blk_dev_init(void)
* TODO : If necessary, we can make the histograms per-cpu and aggregate
* them when printing them out.
*/
-void
-blk_zero_latency_hist(struct io_latency_state *s)
-{
- memset(s->latency_y_axis_read, 0,
- sizeof(s->latency_y_axis_read));
- memset(s->latency_y_axis_write, 0,
- sizeof(s->latency_y_axis_write));
- s->latency_reads_elems = 0;
- s->latency_writes_elems = 0;
-}
-EXPORT_SYMBOL(blk_zero_latency_hist);
-
ssize_t
-blk_latency_hist_show(struct io_latency_state *s, char *buf)
+blk_latency_hist_show(char* name, struct io_latency_state *s, char *buf,
+ int buf_size)
{
int i;
int bytes_written = 0;
u_int64_t num_elem, elem;
int pct;
-
- num_elem = s->latency_reads_elems;
- if (num_elem > 0) {
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "IO svc_time Read Latency Histogram (n = %llu):\n",
- num_elem);
- for (i = 0;
- i < ARRAY_SIZE(latency_x_axis_us);
- i++) {
- elem = s->latency_y_axis_read[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t< %5lluus%15llu%15d%%\n",
- latency_x_axis_us[i],
- elem, pct);
- }
- /* Last element in y-axis table is overflow */
- elem = s->latency_y_axis_read[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t> %5dms%15llu%15d%%\n", 10,
- elem, pct);
- }
- num_elem = s->latency_writes_elems;
- if (num_elem > 0) {
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "IO svc_time Write Latency Histogram (n = %llu):\n",
- num_elem);
- for (i = 0;
- i < ARRAY_SIZE(latency_x_axis_us);
- i++) {
- elem = s->latency_y_axis_write[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t< %5lluus%15llu%15d%%\n",
- latency_x_axis_us[i],
- elem, pct);
- }
- /* Last element in y-axis table is overflow */
- elem = s->latency_y_axis_write[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t> %5dms%15llu%15d%%\n", 10,
- elem, pct);
+ u_int64_t average;
+
+ num_elem = s->latency_elems;
+ if (num_elem > 0) {
+ average = div64_u64(s->latency_sum, s->latency_elems);
+ bytes_written += scnprintf(buf + bytes_written,
+ buf_size - bytes_written,
+ "IO svc_time %s Latency Histogram (n = %llu,"
+ " average = %llu):\n", name, num_elem, average);
+ for (i = 0;
+ i < ARRAY_SIZE(latency_x_axis_us);
+ i++) {
+ elem = s->latency_y_axis[i];
+ pct = div64_u64(elem * 100, num_elem);
+ bytes_written += scnprintf(buf + bytes_written,
+ PAGE_SIZE - bytes_written,
+ "\t< %6lluus%15llu%15d%%\n",
+ latency_x_axis_us[i],
+ elem, pct);
+ }
+ /* Last element in y-axis table is overflow */
+ elem = s->latency_y_axis[i];
+ pct = div64_u64(elem * 100, num_elem);
+ bytes_written += scnprintf(buf + bytes_written,
+ PAGE_SIZE - bytes_written,
+ "\t>=%6lluus%15llu%15d%%\n",
+ latency_x_axis_us[i - 1], elem, pct);
}
+
return bytes_written;
}
EXPORT_SYMBOL(blk_latency_hist_show);
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 43f5bdb6b570..eb58b73ca925 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -168,6 +168,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
spawn->alg = NULL;
spawns = &inst->alg.cra_users;
+
+ /*
+ * We may encounter an unregistered instance here, since
+ * an instance's spawns are set up prior to the instance
+ * being registered. An unregistered instance will have
+ * NULL ->cra_users.next, since ->cra_users isn't
+ * properly initialized until registration. But an
+ * unregistered instance cannot have any users, so treat
+ * it the same as ->cra_users being empty.
+ */
+ if (spawns->next == NULL)
+ break;
}
} while ((spawns = crypto_more_spawns(alg, &stack, &top,
&secondary_spawns)));
diff --git a/crypto/api.c b/crypto/api.c
index bbc147cb5dec..e5c1abfd451f 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -24,6 +24,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/completion.h>
#include "internal.h"
LIST_HEAD(crypto_alg_list);
@@ -611,5 +612,17 @@ int crypto_has_alg(const char *name, u32 type, u32 mask)
}
EXPORT_SYMBOL_GPL(crypto_has_alg);
+void crypto_req_done(struct crypto_async_request *req, int err)
+{
+ struct crypto_wait *wait = req->data;
+
+ if (err == -EINPROGRESS)
+ return;
+
+ wait->err = err;
+ complete(&wait->completion);
+}
+EXPORT_SYMBOL_GPL(crypto_req_done);
+
MODULE_DESCRIPTION("Cryptographic core API");
MODULE_LICENSE("GPL");
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 13c4e5a5fe8c..4471e7ed8c12 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -399,6 +399,8 @@ int x509_extract_key_data(void *context, size_t hdrlen,
ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA;
/* Discard the BIT STRING metadata */
+ if (vlen < 1 || *(const u8 *)value != 0)
+ return -EBADMSG;
ctx->key = value + 1;
ctx->key_size = vlen - 1;
return 0;
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 99c3cce01290..0214600ba071 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -600,6 +600,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
CRYPTO_ALG_TYPE_AHASH_MASK);
if (IS_ERR(poly))
return PTR_ERR(poly);
+ poly_hash = __crypto_hash_alg_common(poly);
+
+ err = -EINVAL;
+ if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+ goto out_put_poly;
err = -ENOMEM;
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -608,7 +613,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
ctx = aead_instance_ctx(inst);
ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
- poly_hash = __crypto_hash_alg_common(poly);
err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
aead_crypto_instance(inst));
if (err)
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 72e38c098bb3..ba07fb6221ae 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -194,11 +194,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
salg = shash_attr_alg(tb[1], 0, 0);
if (IS_ERR(salg))
return PTR_ERR(salg);
+ alg = &salg->base;
+ /* The underlying hash algorithm must be unkeyed */
err = -EINVAL;
+ if (crypto_shash_alg_has_setkey(salg))
+ goto out_put_alg;
+
ds = salg->digestsize;
ss = salg->statesize;
- alg = &salg->base;
if (ds > alg->cra_blocksize ||
ss < alg->cra_blocksize)
goto out_put_alg;
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index b4f3930266b1..f620fe09d20a 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -80,6 +80,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+ spin_lock_init(&cpu_queue->q_lock);
}
return 0;
}
@@ -103,15 +104,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
int cpu, err;
struct mcryptd_cpu_queue *cpu_queue;
- cpu = get_cpu();
- cpu_queue = this_cpu_ptr(queue->cpu_queue);
- rctx->tag.cpu = cpu;
+ cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+ spin_lock(&cpu_queue->q_lock);
+ cpu = smp_processor_id();
+ rctx->tag.cpu = smp_processor_id();
err = crypto_enqueue_request(&cpu_queue->queue, request);
pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
cpu, cpu_queue, request);
+ spin_unlock(&cpu_queue->q_lock);
queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
- put_cpu();
return err;
}
@@ -164,16 +166,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
i = 0;
while (i < MCRYPTD_BATCH || single_task_running()) {
- /*
- * preempt_disable/enable is used to prevent
- * being preempted by mcryptd_enqueue_request()
- */
- local_bh_disable();
- preempt_disable();
+
+ spin_lock_bh(&cpu_queue->q_lock);
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
- preempt_enable();
- local_bh_enable();
+ spin_unlock_bh(&cpu_queue->q_lock);
if (!req) {
mcryptd_opportunistic_flush();
@@ -188,7 +185,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
++i;
}
if (cpu_queue->queue.qlen)
- queue_work(kcrypto_wq, &cpu_queue->work);
+ queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
}
void mcryptd_flusher(struct work_struct *__work)
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index ee9cfb99fe25..f8ec3d4ba4a8 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
crypto_free_aead(ctx->child);
}
+static void pcrypt_free(struct aead_instance *inst)
+{
+ struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
+
+ crypto_drop_aead(&ctx->spawn);
+ kfree(inst);
+}
+
static int pcrypt_init_instance(struct crypto_instance *inst,
struct crypto_alg *alg)
{
@@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
inst->alg.encrypt = pcrypt_aead_encrypt;
inst->alg.decrypt = pcrypt_aead_decrypt;
+ inst->free = pcrypt_free;
+
err = aead_register_instance(tmpl, inst);
if (err)
goto out_drop_aead;
@@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
return -EINVAL;
}
-static void pcrypt_free(struct crypto_instance *inst)
-{
- struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
-
- crypto_drop_aead(&ctx->spawn);
- kfree(inst);
-}
-
static int pcrypt_cpumask_change_notify(struct notifier_block *self,
unsigned long val, void *data)
{
@@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
static struct crypto_template pcrypt_tmpl = {
.name = "pcrypt",
.create = pcrypt_create,
- .free = pcrypt_free,
.module = THIS_MODULE,
};
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index f550b5d94630..d7da0eea5622 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc,
salsa20_ivsetup(ctx, walk.iv);
- if (likely(walk.nbytes == nbytes))
- {
- salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes);
- return blkcipher_walk_done(desc, &walk, 0);
- }
-
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
walk.src.virt.addr,
diff --git a/crypto/shash.c b/crypto/shash.c
index eba42e28af4c..641568d35599 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -24,11 +24,12 @@
static const struct crypto_type crypto_shash_type;
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
{
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(shash_no_setkey);
static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 46a4a757d478..f522828d45c9 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -410,7 +410,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
}
sg_init_aead(sg, xbuf,
- *b_size + (enc ? authsize : 0));
+ *b_size + (enc ? 0 : authsize));
sg_init_aead(sgout, xoutbuf,
*b_size + (enc ? authsize : 0));
@@ -418,7 +418,9 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
sg_set_buf(&sg[0], assoc, aad_size);
sg_set_buf(&sgout[0], assoc, aad_size);
- aead_request_set_crypt(req, sg, sgout, *b_size, iv);
+ aead_request_set_crypt(req, sg, sgout,
+ *b_size + (enc ? 0 : authsize),
+ iv);
aead_request_set_ad(req, aad_size);
if (secs)
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6682c5daf742..4c9be45ea328 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1020,7 +1020,7 @@ skip:
/* The record may be cleared by others, try read next record */
if (len == -ENOENT)
goto skip;
- else if (len < sizeof(*rcd)) {
+ else if (len < 0 || len < sizeof(*rcd)) {
rc = -EIO;
goto out;
}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index bd9d7861c22f..87ab35bae6b0 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -513,8 +513,6 @@ struct binder_priority {
* (protected by @inner_lock)
* @todo: list of work for this process
* (protected by @inner_lock)
- * @wait: wait queue head to wait for proc work
- * (invariant after initialized)
* @stats: per-process binder statistics
* (atomics, no lock needed)
* @delivered_death: list of delivered death notification
@@ -555,7 +553,6 @@ struct binder_proc {
bool is_dead;
struct list_head todo;
- wait_queue_head_t wait;
struct binder_stats stats;
struct list_head delivered_death;
int max_threads;
@@ -2407,7 +2404,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
debug_id, (u64)fda->num_fds);
continue;
}
- fd_array = (u32 *)(parent_buffer + fda->parent_offset);
+ fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
for (fd_index = 0; fd_index < fda->num_fds; fd_index++)
task_close_fd(proc, fd_array[fd_index]);
} break;
@@ -2631,7 +2628,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
*/
parent_buffer = parent->buffer -
binder_alloc_get_user_buffer_offset(&target_proc->alloc);
- fd_array = (u32 *)(parent_buffer + fda->parent_offset);
+ fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) {
binder_user_error("%d:%d parent offset not aligned correctly.\n",
proc->pid, thread->pid);
@@ -2697,7 +2694,7 @@ static int binder_fixup_parent(struct binder_transaction *t,
proc->pid, thread->pid);
return -EINVAL;
}
- parent_buffer = (u8 *)(parent->buffer -
+ parent_buffer = (u8 *)((uintptr_t)parent->buffer -
binder_alloc_get_user_buffer_offset(
&target_proc->alloc));
*(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 7dbba387d12a..18de4c457068 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1480,7 +1480,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
break;
default:
- WARN_ON_ONCE(1);
return AC_ERR_SYSTEM;
}
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 527bbd595e37..d9b762a62e25 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2804,7 +2804,7 @@ out:
return err;
out_free_irq:
- free_irq(dev->irq, dev);
+ free_irq(irq, dev);
out_free:
kfree(dev);
out_release:
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec99c7d..59992788966c 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -223,6 +223,9 @@ config GENERIC_CPU_DEVICES
config GENERIC_CPU_AUTOPROBE
bool
+config GENERIC_CPU_VULNERABILITIES
+ bool
+
config SOC_BUS
bool
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 91bbb1959d8d..3db71afbba93 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -498,10 +498,58 @@ static void __init cpu_dev_register_generic(void)
#endif
}
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+ &dev_attr_spectre_v1.attr,
+ &dev_attr_spectre_v2.attr,
+ NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+ .name = "vulnerabilities",
+ .attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+ if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_vulnerabilities_group))
+ pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
void __init cpu_dev_init(void)
{
if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
panic("Failed to register CPU subsystem");
cpu_dev_register_generic();
+ cpu_register_vulnerabilities();
}
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index 91dba65d7264..901d8185309e 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -39,7 +39,7 @@ static int isa_bus_probe(struct device *dev)
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->probe)
+ if (isa_driver && isa_driver->probe)
return isa_driver->probe(dev, to_isa_dev(dev)->id);
return 0;
@@ -49,7 +49,7 @@ static int isa_bus_remove(struct device *dev)
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->remove)
+ if (isa_driver && isa_driver->remove)
return isa_driver->remove(dev, to_isa_dev(dev)->id);
return 0;
@@ -59,7 +59,7 @@ static void isa_bus_shutdown(struct device *dev)
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->shutdown)
+ if (isa_driver && isa_driver->shutdown)
isa_driver->shutdown(dev, to_isa_dev(dev)->id);
}
@@ -67,7 +67,7 @@ static int isa_bus_suspend(struct device *dev, pm_message_t state)
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->suspend)
+ if (isa_driver && isa_driver->suspend)
return isa_driver->suspend(dev, to_isa_dev(dev)->id, state);
return 0;
@@ -77,7 +77,7 @@ static int isa_bus_resume(struct device *dev)
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->resume)
+ if (isa_driver && isa_driver->resume)
return isa_driver->resume(dev, to_isa_dev(dev)->id);
return 0;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ca3bcc81b623..e0699a20859f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3767,7 +3767,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
q->limits.max_sectors = queue_max_hw_sectors(q);
- blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segments(q, USHRT_MAX);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 62a93b685c54..502406c9e6e1 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1247,6 +1247,8 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+ zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
+ zram->disk->queue->limits.chunk_sectors = 0;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index 0f54cb7ddcbb..e764e8ebb86b 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -1260,6 +1260,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
+ .module = THIS_MODULE,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 25996e256110..0ffb247b42d6 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = {
.match = sunxi_rsb_device_match,
.probe = sunxi_rsb_device_probe,
.remove = sunxi_rsb_device_remove,
+ .uevent = of_device_uevent_modalias,
};
static void sunxi_rsb_dev_release(struct device *dev)
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index cf25020576fa..340f96e44642 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -238,7 +238,10 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
goto out;
}
- mutex_lock(&reading_mutex);
+ if (mutex_lock_interruptible(&reading_mutex)) {
+ err = -ERESTARTSYS;
+ goto out_put;
+ }
if (!data_avail) {
bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
@@ -288,6 +291,7 @@ out:
out_unlock_reading:
mutex_unlock(&reading_mutex);
+out_put:
put_rng(rng);
goto out;
}
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 4cc72fa017c7..2f9abe0d04dc 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -239,6 +239,9 @@ struct smi_info {
/* The timer for this si. */
struct timer_list si_timer;
+ /* This flag is set, if the timer can be set */
+ bool timer_can_start;
+
/* This flag is set, if the timer is running (timer_pending() isn't enough) */
bool timer_running;
@@ -414,6 +417,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
{
+ if (!smi_info->timer_can_start)
+ return;
smi_info->last_timeout_jiffies = jiffies;
mod_timer(&smi_info->si_timer, new_val);
smi_info->timer_running = true;
@@ -433,21 +438,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
}
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
{
unsigned char msg[2];
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
- if (start_timer)
- start_new_msg(smi_info, msg, 2);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+ start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
}
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
{
unsigned char msg[3];
@@ -456,10 +458,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
- if (start_timer)
- start_new_msg(smi_info, msg, 3);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+ start_new_msg(smi_info, msg, 3);
smi_info->si_state = SI_CLEARING_FLAGS;
}
@@ -494,11 +493,11 @@ static void start_getting_events(struct smi_info *smi_info)
* Note that we cannot just use disable_irq(), since the interrupt may
* be shared.
*/
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = true;
- start_check_enables(smi_info, start_timer);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -508,7 +507,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = false;
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -526,7 +525,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
msg = ipmi_alloc_smi_msg();
if (!msg) {
- if (!disable_si_irq(smi_info, true))
+ if (!disable_si_irq(smi_info))
smi_info->si_state = SI_NORMAL;
} else if (enable_si_irq(smi_info)) {
ipmi_free_smi_msg(msg);
@@ -542,7 +541,7 @@ static void handle_flags(struct smi_info *smi_info)
/* Watchdog pre-timeout */
smi_inc_stat(smi_info, watchdog_pretimeouts);
- start_clear_flags(smi_info, true);
+ start_clear_flags(smi_info);
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
if (smi_info->intf)
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -925,7 +924,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
* disable and messages disabled.
*/
if (smi_info->supports_event_msg_buff || smi_info->irq) {
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
} else {
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
@@ -1232,6 +1231,7 @@ static int smi_start_processing(void *send_info,
/* Set up the timer that drives the interface. */
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
+ new_smi->timer_can_start = true;
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
/* Try to claim any interrupts. */
@@ -3434,10 +3434,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
check_set_rcv_irq(smi_info);
}
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
{
if (smi_info->thread != NULL)
kthread_stop(smi_info->thread);
+
+ smi_info->timer_can_start = false;
if (smi_info->timer_running)
del_timer_sync(&smi_info->si_timer);
}
@@ -3635,7 +3637,7 @@ static int try_smi_init(struct smi_info *new_smi)
* Start clearing the flags before we enable interrupts or the
* timer to avoid racing with the timer.
*/
- start_clear_flags(new_smi, false);
+ start_clear_flags(new_smi);
/*
* IRQ is defined to be set when non-zero. req_events will
@@ -3713,7 +3715,7 @@ static int try_smi_init(struct smi_info *new_smi)
return 0;
out_err_stop_timer:
- wait_for_timer_and_thread(new_smi);
+ stop_timer_and_thread(new_smi);
out_err:
new_smi->interrupt_disabled = true;
@@ -3919,7 +3921,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
*/
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
- wait_for_timer_and_thread(to_clean);
+ stop_timer_and_thread(to_clean);
/*
* Timeouts are stopped, now make sure the interrupts are off
@@ -3930,7 +3932,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
- disable_si_irq(to_clean, false);
+ disable_si_irq(to_clean);
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 2898d19fadf5..23f52a897283 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -70,12 +70,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
u64 cursor = from;
while (cursor < to) {
- if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO
- "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
- current->comm, from, to);
+ if (!devmem_is_allowed(pfn))
return 0;
- }
cursor += PAGE_SIZE;
pfn++;
}
diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c
index aab64205d866..a0df83e6b84b 100644
--- a/drivers/clk/imx/clk-imx6q.c
+++ b/drivers/clk/imx/clk-imx6q.c
@@ -419,7 +419,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu2d_core_podf", base + 0x6c, 24);
clk[IMX6QDL_CLK_GPU3D_CORE] = imx_clk_gate2("gpu3d_core", "gpu3d_core_podf", base + 0x6c, 26);
clk[IMX6QDL_CLK_HDMI_IAHB] = imx_clk_gate2("hdmi_iahb", "ahb", base + 0x70, 0);
- clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "video_27m", base + 0x70, 4);
+ clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "mipi_core_cfg", base + 0x70, 4);
clk[IMX6QDL_CLK_I2C1] = imx_clk_gate2("i2c1", "ipg_per", base + 0x70, 6);
clk[IMX6QDL_CLK_I2C2] = imx_clk_gate2("i2c2", "ipg_per", base + 0x70, 8);
clk[IMX6QDL_CLK_I2C3] = imx_clk_gate2("i2c3", "ipg_per", base + 0x70, 10);
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index 32d2e455eb3f..8e501c219946 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -174,6 +174,7 @@ struct mtk_pll_data {
uint32_t pcw_reg;
int pcw_shift;
const struct mtk_pll_div_table *div_table;
+ const char *parent_name;
};
void mtk_clk_register_plls(struct device_node *node,
diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c
index 966cab1348da..1c5b081ad5a1 100644
--- a/drivers/clk/mediatek/clk-pll.c
+++ b/drivers/clk/mediatek/clk-pll.c
@@ -302,7 +302,10 @@ static struct clk *mtk_clk_register_pll(const struct mtk_pll_data *data,
init.name = data->name;
init.ops = &mtk_pll_ops;
- init.parent_names = &parent_name;
+ if (data->parent_name)
+ init.parent_names = &data->parent_name;
+ else
+ init.parent_names = &parent_name;
init.num_parents = 1;
clk = clk_register(NULL, &pll->hw);
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index b90db615c29e..8c41c6fcb9ee 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -1063,7 +1063,7 @@ static void __init tegra30_super_clk_init(void)
* U71 divider of cclk_lp.
*/
clk = tegra_clk_register_divider("pll_p_out3_cclklp", "pll_p_out3",
- clk_base + SUPER_CCLKG_DIVIDER, 0,
+ clk_base + SUPER_CCLKLP_DIVIDER, 0,
TEGRA_DIVIDER_INT, 16, 8, 1, NULL);
clk_register_clkdev(clk, "pll_p_out3_cclklp", NULL);
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 845bafcfa792..d5c5a476360f 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -160,6 +160,24 @@ static int powernv_cpuidle_driver_init(void)
drv->state_count += 1;
}
+ /*
+ * On the PowerNV platform cpu_present may be less than cpu_possible in
+ * cases when firmware detects the CPU, but it is not available to the
+ * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
+ * run time and hence cpu_devices are not created for those CPUs by the
+ * generic topology_init().
+ *
+ * drv->cpumask defaults to cpu_possible_mask in
+ * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where
+ * cpu_devices are not created for CPUs in cpu_possible_mask that
+ * cannot be hot-added later at run time.
+ *
+ * Trying cpuidle_register_device() on a CPU without a cpu_device is
+ * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
+ */
+
+ drv->cpumask = (struct cpumask *)cpu_present_mask;
+
return 0;
}
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 151971627757..6b68416cf9df 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -189,6 +189,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
return -EBUSY;
}
target_state = &drv->states[index];
+ broadcast = false;
}
/* Take note of the planned idle state. */
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 832a2c3f01ff..9e98a5fbbc1d 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -613,6 +613,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
int error;
+ /*
+ * Return if cpu_device is not setup for this CPU.
+ *
+ * This could happen if the arch did not set up cpu_device
+ * since this CPU is not in cpu_present mask and the
+ * driver did not send a correct CPU mask during registration.
+ * Without this check we would end up passing bogus
+ * value for &cpu_dev->kobj in kobject_init_and_add()
+ */
+ if (!cpu_dev)
+ return -ENODEV;
+
kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
if (!kdev)
return -ENOMEM;
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index bac0bdeb4b5f..b6529b9fcbe2 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -32,12 +32,12 @@
#define PPC405EX_CE_RESET 0x00000008
#define CRYPTO4XX_CRYPTO_PRIORITY 300
-#define PPC4XX_LAST_PD 63
-#define PPC4XX_NUM_PD 64
-#define PPC4XX_LAST_GD 1023
+#define PPC4XX_NUM_PD 256
+#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1)
#define PPC4XX_NUM_GD 1024
-#define PPC4XX_LAST_SD 63
-#define PPC4XX_NUM_SD 64
+#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1)
+#define PPC4XX_NUM_SD 256
+#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1)
#define PPC4XX_SD_BUFFER_SIZE 2048
#define PD_ENTRY_INUSE 1
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 5450880abb7b..5a9083021fa0 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1641,6 +1641,7 @@ static int queue_cache_init(void)
CWQ_ENTRY_SIZE, 0, NULL);
if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+ queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
return -ENOMEM;
}
return 0;
@@ -1650,6 +1651,8 @@ static void queue_cache_destroy(void)
{
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+ queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
+ queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
}
static int spu_queue_register(struct spu_queue *p, unsigned long q_type)
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index f214a8755827..fd39893079d5 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -664,8 +664,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
}
- err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
- IRQF_SHARED, pdev->name, pdev);
+ err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+ s5p_aes_interrupt, IRQF_ONESHOT,
+ pdev->name, pdev);
if (err < 0) {
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 002ed81c277b..0a0e3294ab30 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1027,12 +1027,14 @@ static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
switch (order) {
case 0 ... 1:
return &unmap_pool[0];
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
case 2 ... 4:
return &unmap_pool[1];
case 5 ... 7:
return &unmap_pool[2];
case 8:
return &unmap_pool[3];
+#endif
default:
BUG();
return NULL;
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 1c7568c0055a..7254c20007f8 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -148,6 +148,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
#define PATTERN_OVERWRITE 0x20
#define PATTERN_COUNT_MASK 0x1f
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+ bool done;
+ wait_queue_head_t *wait;
+};
+
struct dmatest_thread {
struct list_head node;
struct dmatest_info *info;
@@ -156,6 +162,8 @@ struct dmatest_thread {
u8 **srcs;
u8 **dsts;
enum dma_transaction_type type;
+ wait_queue_head_t done_wait;
+ struct dmatest_done test_done;
bool done;
};
@@ -316,18 +324,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
return error_count;
}
-/* poor man's completion - we want to use wait_event_freezable() on it */
-struct dmatest_done {
- bool done;
- wait_queue_head_t *wait;
-};
static void dmatest_callback(void *arg)
{
struct dmatest_done *done = arg;
-
- done->done = true;
- wake_up_all(done->wait);
+ struct dmatest_thread *thread =
+ container_of(arg, struct dmatest_thread, done_wait);
+ if (!thread->done) {
+ done->done = true;
+ wake_up_all(done->wait);
+ } else {
+ /*
+ * If thread->done, it means that this callback occurred
+ * after the parent thread has cleaned up. This can
+ * happen in the case that driver doesn't implement
+ * the terminate_all() functionality and a dma operation
+ * did not occur within the timeout period
+ */
+ WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
+ }
}
static unsigned int min_odd(unsigned int x, unsigned int y)
@@ -398,9 +413,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
*/
static int dmatest_func(void *data)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
struct dmatest_thread *thread = data;
- struct dmatest_done done = { .wait = &done_wait };
+ struct dmatest_done *done = &thread->test_done;
struct dmatest_info *info;
struct dmatest_params *params;
struct dma_chan *chan;
@@ -605,9 +619,9 @@ static int dmatest_func(void *data)
continue;
}
- done.done = false;
+ done->done = false;
tx->callback = dmatest_callback;
- tx->callback_param = &done;
+ tx->callback_param = done;
cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) {
@@ -620,21 +634,12 @@ static int dmatest_func(void *data)
}
dma_async_issue_pending(chan);
- wait_event_freezable_timeout(done_wait, done.done,
+ wait_event_freezable_timeout(thread->done_wait, done->done,
msecs_to_jiffies(params->timeout));
status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
- if (!done.done) {
- /*
- * We're leaving the timed out dma operation with
- * dangling pointer to done_wait. To make this
- * correct, we'll need to allocate wait_done for
- * each test iteration and perform "who's gonna
- * free it this time?" dancing. For now, just
- * leave it dangling.
- */
- WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
+ if (!done->done) {
dmaengine_unmap_put(um);
result("test timed out", total_tests, src_off, dst_off,
len, 0);
@@ -708,7 +713,7 @@ err_thread_type:
dmatest_KBs(runtime, total_len), ret);
/* terminate all transfers on specified channels */
- if (ret)
+ if (ret || failed_tests)
dmaengine_terminate_all(chan);
thread->done = true;
@@ -766,6 +771,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
thread->info = info;
thread->chan = dtc->chan;
thread->type = type;
+ thread->test_done.wait = &thread->done_wait;
+ init_waitqueue_head(&thread->done_wait);
smp_wmb();
thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
dma_chan_name(chan), op, i);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 4a885c95bc37..766ab72d119e 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1845,7 +1845,6 @@ static bool _chan_ns(const struct pl330_dmac *pl330, int i)
static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
{
struct pl330_thread *thrd = NULL;
- unsigned long flags;
int chans, i;
if (pl330->state == DYING)
@@ -1853,8 +1852,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
chans = pl330->pcfg.num_chan;
- spin_lock_irqsave(&pl330->lock, flags);
-
for (i = 0; i < chans; i++) {
thrd = &pl330->channels[i];
if ((thrd->free) && (!_manager_ns(thrd) ||
@@ -1872,8 +1869,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
thrd = NULL;
}
- spin_unlock_irqrestore(&pl330->lock, flags);
-
return thrd;
}
@@ -1891,7 +1886,6 @@ static inline void _free_event(struct pl330_thread *thrd, int ev)
static void pl330_release_channel(struct pl330_thread *thrd)
{
struct pl330_dmac *pl330;
- unsigned long flags;
if (!thrd || thrd->free)
return;
@@ -1903,10 +1897,8 @@ static void pl330_release_channel(struct pl330_thread *thrd)
pl330 = thrd->dmac;
- spin_lock_irqsave(&pl330->lock, flags);
_free_event(thrd, thrd->ev);
thrd->free = true;
- spin_unlock_irqrestore(&pl330->lock, flags);
}
/* Initialize the structure for PL330 configuration, that can be used
@@ -2276,19 +2268,19 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
struct pl330_dmac *pl330 = pch->dmac;
unsigned long flags;
- spin_lock_irqsave(&pch->lock, flags);
+ spin_lock_irqsave(&pl330->lock, flags);
dma_cookie_init(chan);
pch->thread = pl330_request_channel(pl330);
if (!pch->thread) {
- spin_unlock_irqrestore(&pch->lock, flags);
+ spin_unlock_irqrestore(&pl330->lock, flags);
return -ENOMEM;
}
tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);
- spin_unlock_irqrestore(&pch->lock, flags);
+ spin_unlock_irqrestore(&pl330->lock, flags);
return 1;
}
@@ -2391,19 +2383,20 @@ static int pl330_pause(struct dma_chan *chan)
static void pl330_free_chan_resources(struct dma_chan *chan)
{
struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
unsigned long flags;
tasklet_kill(&pch->task);
pm_runtime_get_sync(pch->dmac->ddma.dev);
- spin_lock_irqsave(&pch->lock, flags);
+ spin_lock_irqsave(&pl330->lock, flags);
pl330_release_channel(pch->thread);
pch->thread = NULL;
list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
- spin_unlock_irqrestore(&pch->lock, flags);
+ spin_unlock_irqrestore(&pl330->lock, flags);
pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
}
diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
index 149ec2bd9bc6..8100ede095d5 100644
--- a/drivers/dma/ti-dma-crossbar.c
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -46,12 +46,12 @@ struct ti_am335x_xbar_data {
struct ti_am335x_xbar_map {
u16 dma_line;
- u16 mux_val;
+ u8 mux_val;
};
-static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u16 val)
+static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val)
{
- writeb_relaxed(val & 0x1f, iomem + event);
+ writeb_relaxed(val, iomem + event);
}
static void ti_am335x_xbar_free(struct device *dev, void *route_data)
@@ -102,7 +102,7 @@ static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec,
}
map->dma_line = (u16)dma_spec->args[0];
- map->mux_val = (u16)dma_spec->args[2];
+ map->mux_val = (u8)dma_spec->args[2];
dma_spec->args[2] = 0;
dma_spec->args_count = 2;
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 72e07e3cf718..16e0eb523439 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -227,7 +227,7 @@
#define NREC_RDWR(x) (((x)>>11) & 1)
#define NREC_RANK(x) (((x)>>8) & 0x7)
#define NRECMEMB 0xC0
-#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF)
+#define NREC_CAS(x) (((x)>>16) & 0xFFF)
#define NREC_RAS(x) ((x) & 0x7FFF)
#define NRECFGLOG 0xC4
#define NREEECFBDA 0xC8
@@ -371,7 +371,7 @@ struct i5000_error_info {
/* These registers are input ONLY if there was a
* Non-Recoverable Error */
u16 nrecmema; /* Non-Recoverable Mem log A */
- u16 nrecmemb; /* Non-Recoverable Mem log B */
+ u32 nrecmemb; /* Non-Recoverable Mem log B */
};
@@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci,
NERR_FAT_FBD, &info->nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, &info->nrecmema);
- pci_read_config_word(pvt->branchmap_werrors,
+ pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, &info->nrecmemb);
/* Clear the error bits, by writing them back */
@@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
dimm->mtype = MEM_FB_DDR2;
/* ask what device type on this row */
- if (MTR_DRAM_WIDTH(mtr))
+ if (MTR_DRAM_WIDTH(mtr) == 8)
dimm->dtype = DEV_X8;
else
dimm->dtype = DEV_X4;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 6ef6ad1ba16e..2ea2f32e608b 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -368,7 +368,7 @@ struct i5400_error_info {
/* These registers are input ONLY if there was a Non-Rec Error */
u16 nrecmema; /* Non-Recoverable Mem log A */
- u16 nrecmemb; /* Non-Recoverable Mem log B */
+ u32 nrecmemb; /* Non-Recoverable Mem log B */
};
@@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci,
NERR_FAT_FBD, &info->nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, &info->nrecmema);
- pci_read_config_word(pvt->branchmap_werrors,
+ pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, &info->nrecmemb);
/* Clear the error bits, by writing them back */
@@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci)
dimm->nr_pages = size_mb << 8;
dimm->grain = 8;
- dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+ dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ?
+ DEV_X8 : DEV_X4;
dimm->mtype = MEM_FB_DDR2;
/*
* The eccc mechanism is SDDC (aka SECC), with
* is similar to Chipkill.
*/
- dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ?
+ dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ?
EDAC_S8ECD8ED : EDAC_S4ECD4ED;
ndimms++;
}
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index ca64b174f8a3..a4e1f6939c39 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1773,6 +1773,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
break;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
pvt->pci_ta = pdev;
+ break;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
pvt->pci_ras = pdev;
break;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index c51f3b2fe3c0..a149337229d2 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -115,8 +115,7 @@ static ssize_t systab_show(struct kobject *kobj,
return str - buf;
}
-static struct kobj_attribute efi_attr_systab =
- __ATTR(systab, 0400, systab_show, NULL);
+static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
#define EFI_FIELD(var) efi.var
@@ -313,7 +312,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
early_memunmap(md, sizeof (*md));
}
- pr_err_once("requested map not found.\n");
return -ENOENT;
}
@@ -327,38 +325,6 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md)
return end;
}
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM. So, look it up in the existing EFI memory map instead. Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
- struct efi_memory_map *map;
- void *p;
- map = efi.memmap;
- if (!map)
- return NULL;
- if (WARN_ON(!map->map))
- return NULL;
- for (p = map->map; p < map->map_end; p += map->desc_size) {
- efi_memory_desc_t *md = p;
- u64 size = md->num_pages << EFI_PAGE_SHIFT;
- u64 end = md->phys_addr + size;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
- if (!md->virt_addr)
- continue;
- if (phys_addr >= md->phys_addr && phys_addr < end) {
- phys_addr += md->virt_addr - md->phys_addr;
- return (__force void __iomem *)(unsigned long)phys_addr;
- }
- }
- return NULL;
-}
-
static __initdata efi_config_table_type_t common_tables[] = {
{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 22c5285f7705..341b8c686ec7 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -105,7 +105,7 @@ static const struct sysfs_ops esre_attr_ops = {
};
/* Generic ESRT Entry ("ESRE") support. */
-static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf)
+static ssize_t fw_class_show(struct esre_entry *entry, char *buf)
{
char *str = buf;
@@ -116,18 +116,16 @@ static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf)
return str - buf;
}
-static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400,
- esre_fw_class_show, NULL);
+static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400);
#define esre_attr_decl(name, size, fmt) \
-static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \
+static ssize_t name##_show(struct esre_entry *entry, char *buf) \
{ \
return sprintf(buf, fmt "\n", \
le##size##_to_cpu(entry->esre.esre1->name)); \
} \
\
-static struct esre_attribute esre_##name = __ATTR(name, 0400, \
- esre_##name##_show, NULL)
+static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400)
esre_attr_decl(fw_type, 32, "%u");
esre_attr_decl(fw_version, 32, "%u");
@@ -195,14 +193,13 @@ static int esre_create_sysfs_entry(void *esre, int entry_num)
/* support for displaying ESRT fields at the top level */
#define esrt_attr_decl(name, size, fmt) \
-static ssize_t esrt_##name##_show(struct kobject *kobj, \
+static ssize_t name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf)\
{ \
return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \
} \
\
-static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \
- esrt_##name##_show, NULL)
+static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400)
esrt_attr_decl(fw_resource_count, 32, "%u");
esrt_attr_decl(fw_resource_count_max, 32, "%u");
@@ -256,7 +253,7 @@ void __init efi_esrt_init(void)
rc = efi_mem_desc_lookup(efi.esrt, &md);
if (rc < 0) {
- pr_err("ESRT header is not in the memory map.\n");
+ pr_warn("ESRT header is not in the memory map.\n");
return;
}
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 0e4b85e31b69..2990a4bdaa74 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -23,6 +23,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
GCOV_PROFILE := n
KASAN_SANITIZE := n
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
lib-y := efi-stub-helper.o
# include the stub's generic dependencies from lib/ when building for ARM/arm64
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 5c55227a34c8..2400b3e1d840 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -67,11 +67,11 @@ static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr,
return map_attr->show(entry, buf);
}
-static struct map_attribute map_type_attr = __ATTR_RO(type);
-static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr);
-static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr);
-static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages);
-static struct map_attribute map_attribute_attr = __ATTR_RO(attribute);
+static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400);
+static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400);
+static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400);
+static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400);
+static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400);
/*
* These are default attributes that are added for every memmap entry.
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 3e6661bab54a..ddf9cd3ad974 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -94,21 +94,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
altera_gc = to_altera(irq_data_get_irq_chip_data(d));
- if (type == IRQ_TYPE_NONE)
+ if (type == IRQ_TYPE_NONE) {
+ irq_set_handler_locked(d, handle_bad_irq);
return 0;
- if (type == IRQ_TYPE_LEVEL_HIGH &&
- altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
- return 0;
- if (type == IRQ_TYPE_EDGE_RISING &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
- return 0;
- if (type == IRQ_TYPE_EDGE_FALLING &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
- return 0;
- if (type == IRQ_TYPE_EDGE_BOTH &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
+ }
+ if (type == altera_gc->interrupt_trigger) {
+ if (type == IRQ_TYPE_LEVEL_HIGH)
+ irq_set_handler_locked(d, handle_level_irq);
+ else
+ irq_set_handler_locked(d, handle_simple_irq);
return 0;
-
+ }
+ irq_set_handler_locked(d, handle_bad_irq);
return -EINVAL;
}
@@ -234,7 +231,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-
static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
{
struct altera_gpio_chip *altera_gc;
@@ -314,7 +310,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
altera_gc->interrupt_trigger = reg;
ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
- handle_simple_irq, IRQ_TYPE_NONE);
+ handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_info(&pdev->dev, "could not add irqchip\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index f4cae5357e40..3e90ddcbb24a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1575,34 +1575,32 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
}
-/* Atom needs data in little endian format
- * so swap as appropriate when copying data to
- * or from atom. Note that atom operates on
- * dw units.
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
*/
void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
{
#ifdef __BIG_ENDIAN
- u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
- u32 *dst32, *src32;
+ u32 src_tmp[5], dst_tmp[5];
int i;
+ u8 align_num_bytes = ALIGN(num_bytes, 4);
- memcpy(src_tmp, src, num_bytes);
- src32 = (u32 *)src_tmp;
- dst32 = (u32 *)dst_tmp;
if (to_le) {
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = cpu_to_le32(src32[i]);
- memcpy(dst, dst_tmp, num_bytes);
+ memcpy(src_tmp, src, num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+ memcpy(dst, dst_tmp, align_num_bytes);
} else {
- u8 dws = num_bytes & ~3;
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = le32_to_cpu(src32[i]);
- memcpy(dst, dst_tmp, dws);
- if (num_bytes % 4) {
- for (i = 0; i < (num_bytes % 4); i++)
- dst[dws+i] = dst_tmp[dws+i];
- }
+ memcpy(src_tmp, src, align_num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+ memcpy(dst, dst_tmp, num_bytes);
}
#else
memcpy(dst, src, num_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 16302f7d59f6..fc9f14747f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1760,8 +1760,11 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
}
r = amdgpu_late_init(adev);
- if (r)
+ if (r) {
+ if (fbcon)
+ console_unlock();
return r;
+ }
/* pin cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile
index 26412d2f8c98..ffd673615772 100644
--- a/drivers/gpu/drm/armada/Makefile
+++ b/drivers/gpu/drm/armada/Makefile
@@ -4,5 +4,3 @@ armada-y += armada_510.o
armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o
obj-$(CONFIG_DRM_ARMADA) := armada.o
-
-CFLAGS_armada_trace.o := -I$(src)
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index fbe1b3174f75..34cebcdc2fc4 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -180,6 +180,8 @@ static void decon_commit(struct exynos_drm_crtc *crtc)
/* enable output and display signal */
decon_set_bits(ctx, DECON_VIDCON0, VIDCON0_ENVID | VIDCON0_ENVID_F, ~0);
+
+ decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0);
}
static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 252eb301470c..c147043af1ca 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -245,6 +245,15 @@ struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev,
if (IS_ERR(exynos_gem))
return exynos_gem;
+ if (!is_drm_iommu_supported(dev) && (flags & EXYNOS_BO_NONCONTIG)) {
+ /*
+ * when no IOMMU is available, all allocated buffers are
+ * contiguous anyway, so drop EXYNOS_BO_NONCONTIG flag
+ */
+ flags &= ~EXYNOS_BO_NONCONTIG;
+ DRM_WARN("Non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n");
+ }
+
/* set memory type and cache attribute from user side. */
exynos_gem->flags = flags;
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index f3bee54c414f..cb4313c68f71 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -440,7 +440,9 @@ static bool
gmbus_is_index_read(struct i2c_msg *msgs, int i, int num)
{
return (i + 1 < num &&
- !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 &&
+ msgs[i].addr == msgs[i + 1].addr &&
+ !(msgs[i].flags & I2C_M_RD) &&
+ (msgs[i].len == 1 || msgs[i].len == 2) &&
(msgs[i + 1].flags & I2C_M_RD));
}
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 27c297672076..d2d1c9a34da1 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -142,9 +142,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
struct drm_gem_object *obj = buffer->priv;
int ret = 0;
- if (WARN_ON(!obj->filp))
- return -EINVAL;
-
ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index ebea165db1e2..17c3b9427f54 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -674,6 +674,7 @@ static int panel_simple_remove(struct device *dev)
drm_panel_remove(&panel->base);
panel_simple_disable(&panel->base);
+ panel_simple_unprepare(&panel->base);
if (panel->ddc)
put_device(&panel->ddc->dev);
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index b5760851195c..0c6216a6ee9e 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -45,34 +45,32 @@ static char *pre_emph_names[] = {
/***** radeon AUX functions *****/
-/* Atom needs data in little endian format
- * so swap as appropriate when copying data to
- * or from atom. Note that atom operates on
- * dw units.
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
*/
void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
{
#ifdef __BIG_ENDIAN
- u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
- u32 *dst32, *src32;
+ u32 src_tmp[5], dst_tmp[5];
int i;
+ u8 align_num_bytes = ALIGN(num_bytes, 4);
- memcpy(src_tmp, src, num_bytes);
- src32 = (u32 *)src_tmp;
- dst32 = (u32 *)dst_tmp;
if (to_le) {
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = cpu_to_le32(src32[i]);
- memcpy(dst, dst_tmp, num_bytes);
+ memcpy(src_tmp, src, num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+ memcpy(dst, dst_tmp, align_num_bytes);
} else {
- u8 dws = num_bytes & ~3;
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = le32_to_cpu(src32[i]);
- memcpy(dst, dst_tmp, dws);
- if (num_bytes % 4) {
- for (i = 0; i < (num_bytes % 4); i++)
- dst[dws+i] = dst_tmp[dws+i];
- }
+ memcpy(src_tmp, src, align_num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+ memcpy(dst, dst_tmp, num_bytes);
}
#else
memcpy(dst, src, num_bytes);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 26da2f4d7b4f..a2937a693591 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -226,7 +226,6 @@ static int radeonfb_create(struct drm_fb_helper *helper,
}
info->par = rfbdev;
- info->skip_vt_switch = true;
ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
if (ret) {
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index b6f16804e73b..d9007cc37be1 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3029,6 +3029,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
max_sclk = 75000;
max_mclk = 80000;
}
+ } else if (rdev->family == CHIP_OLAND) {
+ if ((rdev->pdev->revision == 0xC7) ||
+ (rdev->pdev->revision == 0x80) ||
+ (rdev->pdev->revision == 0x81) ||
+ (rdev->pdev->revision == 0x83) ||
+ (rdev->pdev->revision == 0x87) ||
+ (rdev->pdev->device == 0x6604) ||
+ (rdev->pdev->device == 0x6605)) {
+ max_sclk = 75000;
+ }
}
/* Apply dpm quirks */
while (p && p->chip_device != 0) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 04fd0f2b6af0..fda8e85dd5a2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2678,6 +2678,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
}
view_type = vmw_view_cmd_to_type(header->id);
+ if (view_type == vmw_view_max)
+ return -EINVAL;
cmd = container_of(header, typeof(*cmd), header);
ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
user_surface_converter,
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 57d707ca605e..d9fcd0983c27 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -165,11 +165,11 @@ config HID_CHERRY
Support for Cherry Cymotion keyboard.
config HID_CHICONY
- tristate "Chicony Tactical pad"
+ tristate "Chicony devices"
depends on HID
default !EXPERT
---help---
- Support for Chicony Tactical pad.
+ Support for Chicony Tactical pad and special keys on Chicony keyboards.
config HID_CORSAIR
tristate "Corsair devices"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index bc3cec199fee..f04ed9aabc3f 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
{ }
};
MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 902897669fb9..47820119c183 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
@@ -2058,6 +2059,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 796a425fbba1..581129d68519 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -528,6 +528,7 @@
#define USB_VENDOR_ID_JESS 0x0c45
#define USB_DEVICE_ID_JESS_YUREX 0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
#define USB_VENDOR_ID_JESS2 0x0f30
#define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
@@ -1030,6 +1031,7 @@
#define USB_VENDOR_ID_XIN_MO 0x16c0
#define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE 0x05e1
+#define USB_DEVICE_ID_THT_2P_ARCADE 0x75e1
#define USB_VENDOR_ID_XIROKU 0x1477
#define USB_DEVICE_ID_XIROKU_SPX 0x1006
diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c
index 7df5227a7e61..9ad7731d2e10 100644
--- a/drivers/hid/hid-xinmo.c
+++ b/drivers/hid/hid-xinmo.c
@@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device *hdev, struct hid_field *field,
static const struct hid_device_id xinmo_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
{ }
};
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index cccef87963e0..975c43d446f8 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value)
else
err = atk_read_value_new(sensor, value);
+ if (err)
+ return err;
+
sensor->is_valid = true;
sensor->last_updated = jiffies;
sensor->cached_value = *value;
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 32c6a40a408f..ea85330603b2 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -82,6 +82,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6),
.driver_data = (kernel_ulong_t)0,
},
+ {
+ /* Gemini Lake */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+ .driver_data = (kernel_ulong_t)0,
+ },
{ 0 },
};
diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c
index d8803c3bbfdc..16833365475f 100644
--- a/drivers/i2c/busses/i2c-riic.c
+++ b/drivers/i2c/busses/i2c-riic.c
@@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data)
}
if (riic->is_last || riic->err) {
- riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
+ riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER);
writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
+ } else {
+ /* Transfer is complete, but do not send STOP */
+ riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER);
+ complete(&riic->msg_done);
}
return IRQ_HANDLED;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 43d5166db4c6..e354358db77b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1353,7 +1353,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
return id_priv;
}
-static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
+static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv)
{
return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
}
@@ -1731,7 +1731,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
struct net_device *net_dev;
- int offset, ret;
+ u8 offset;
+ int ret;
listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
if (IS_ERR(listen_id))
@@ -3118,7 +3119,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
struct ib_cm_sidr_req_param req;
struct ib_cm_id *id;
void *private_data;
- int offset, ret;
+ u8 offset;
+ int ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
@@ -3175,7 +3177,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
struct rdma_route *route;
void *private_data;
struct ib_cm_id *id;
- int offset, ret;
+ u8 offset;
+ int ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index bc147582bed9..6d62b69c898e 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -579,10 +579,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
ret = -EAGAIN;
goto skip_cqe;
}
- if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+ if (unlikely(!CQE_STATUS(hw_cqe) &&
+ CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
t4_set_wq_in_error(wq);
- hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
- goto proc_cqe;
+ hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
}
goto proc_cqe;
}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 1c8b7c22c822..348828271cb0 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1564,7 +1564,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->mtu_msgmax = (IB_MTU_4096 << 5) |
ilog2(dev->dev->caps.max_gso_sz);
else
- context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 13;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
pr_err("path MTU (%u) is invalid\n",
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2a1fdcaa3044..dbd5adc62c3f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1123,6 +1123,8 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = MLX5_IB_QPT_REG_UMR;
+ qp->send_cq = init_attr->send_cq;
+ qp->recv_cq = init_attr->recv_cq;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8f8c3af9f4e8..d3f0a384faad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1044,10 +1044,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
+ rtnl_lock();
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev);
- if (ipoib_ib_dev_open(dev) != 0)
+
+ result = ipoib_ib_dev_open(dev);
+ rtnl_unlock();
+ if (result)
return;
+
if (netif_queue_stopped(dev))
netif_start_queue(dev);
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 8a5998e6a407..88f97ea6b366 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -450,6 +450,7 @@ struct iser_fr_desc {
struct list_head list;
struct iser_reg_resources rsc;
struct iser_pi_context *pi_ctx;
+ struct list_head all_list;
};
/**
@@ -463,6 +464,7 @@ struct iser_fr_pool {
struct list_head list;
spinlock_t lock;
int size;
+ struct list_head all_list;
};
/**
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 42f4da620f2e..0cbc7ceb9a55 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -405,6 +405,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
int i, ret;
INIT_LIST_HEAD(&fr_pool->list);
+ INIT_LIST_HEAD(&fr_pool->all_list);
spin_lock_init(&fr_pool->lock);
fr_pool->size = 0;
for (i = 0; i < cmds_max; i++) {
@@ -416,6 +417,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
}
list_add_tail(&desc->list, &fr_pool->list);
+ list_add_tail(&desc->all_list, &fr_pool->all_list);
fr_pool->size++;
}
@@ -435,13 +437,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
struct iser_fr_desc *desc, *tmp;
int i = 0;
- if (list_empty(&fr_pool->list))
+ if (list_empty(&fr_pool->all_list))
return;
iser_info("freeing conn %p fr pool\n", ib_conn);
- list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
- list_del(&desc->list);
+ list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+ list_del(&desc->all_list);
iser_free_reg_res(&desc->rsc);
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index c52131233ba7..a73874508c3a 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -957,8 +957,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
return -ENOMEM;
attr->qp_state = IB_QPS_INIT;
- attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
+ attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
attr->port_num = ch->sport->port;
attr->pkey_index = 0;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 6f4dc0fd2ca3..51b96e9bf793 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd)
case 5:
etd->hw_version = 3;
break;
- case 6 ... 14:
+ case 6 ... 15:
etd->hw_version = 4;
break;
default:
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index dbf09836ff30..d1051e3ce819 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -520,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
},
},
+ {
+ /* TUXEDO BU1406 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
+ },
+ },
{ }
};
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 64f1eb8fdcbc..347aaaa5a7ea 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1541,13 +1541,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
return -ENOMEM;
arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
- smmu_domain->pgtbl_ops = pgtbl_ops;
ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
- if (IS_ERR_VALUE(ret))
+ if (IS_ERR_VALUE(ret)) {
free_io_pgtable_ops(pgtbl_ops);
+ return ret;
+ }
- return ret;
+ smmu_domain->pgtbl_ops = pgtbl_ops;
+ return 0;
}
static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f9711aceef54..4efec2db4ee2 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2201,10 +2201,12 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
uint64_t tmp;
if (!sg_res) {
+ unsigned int pgoff = sg->offset & ~PAGE_MASK;
+
sg_res = aligned_nrpages(sg->offset, sg->length);
- sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
+ sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = page_to_phys(sg_page(sg)) | prot;
+ pteval = (sg_phys(sg) - pgoff) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -3757,7 +3759,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
+ sg->dma_address = sg_phys(sg);
sg->dma_length = sg->length;
}
return nelems;
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 63faee04a008..636187a4c1a3 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = {
static int __init crossbar_of_init(struct device_node *node)
{
int i, size, reserved = 0;
- u32 max = 0, entry;
+ u32 max = 0, entry, reg_size;
const __be32 *irqsr;
int ret = -ENOMEM;
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
if (!cb->register_offsets)
goto err_irq_map;
- of_property_read_u32(node, "ti,reg-size", &size);
+ of_property_read_u32(node, "ti,reg-size", &reg_size);
- switch (size) {
+ switch (reg_size) {
case 1:
cb->write = crossbar_writeb;
break;
@@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
continue;
cb->register_offsets[i] = reserved;
- reserved += size;
+ reserved += reg_size;
}
of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 823f6985b260..dd7e38ac29bd 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
sizeof(avmb1_carddef))))
return -EFAULT;
cdef.cardtype = AVM_CARDTYPE_B1;
+ cdef.cardnr = 0;
} else {
if ((retval = copy_from_user(&cdef, data,
sizeof(avmb1_extcarddef))))
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ea47980949ef..4d46f2ce606f 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -479,7 +479,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
if (b == -1)
goto err;
- k->ptr[i] = PTR(ca->buckets[b].gen,
+ k->ptr[i] = MAKE_PTR(ca->buckets[b].gen,
bucket_to_sector(c, b),
ca->sb.nr_this_dev);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 243de0bf15cd..4bf15182c4da 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -584,7 +584,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
return false;
for (i = 0; i < KEY_PTRS(l); i++)
- if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
+ if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
return false;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 29eba7219b01..6ed066a0e7c0 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -508,7 +508,7 @@ static void journal_reclaim(struct cache_set *c)
continue;
ja->cur_idx = next;
- k->ptr[n++] = PTR(0,
+ k->ptr[n++] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6c4c7caea693..e73aeb0e892c 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -468,6 +468,7 @@ struct search {
unsigned recoverable:1;
unsigned write:1;
unsigned read_dirty_data:1;
+ unsigned cache_missed:1;
unsigned long start_time;
@@ -653,6 +654,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->orig_bio = bio;
s->cache_miss = NULL;
+ s->cache_missed = 0;
s->d = d;
s->recoverable = 1;
s->write = (bio->bi_rw & REQ_WRITE) != 0;
@@ -708,7 +710,14 @@ static void cached_dev_read_error(struct closure *cl)
struct search *s = container_of(cl, struct search, cl);
struct bio *bio = &s->bio.bio;
- if (s->recoverable) {
+ /*
+ * If read request hit dirty data (s->read_dirty_data is true),
+ * then recovery a failed read request from cached device may
+ * get a stale data back. So read failure recovery is only
+ * permitted when read request hit clean data in cache device,
+ * or when cache read race happened.
+ */
+ if (s->recoverable && !s->read_dirty_data) {
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
@@ -769,7 +778,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
bch_mark_cache_accounting(s->iop.c, s->d,
- !s->cache_miss, s->iop.bypass);
+ !s->cache_missed, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
if (s->iop.error)
@@ -788,6 +797,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
+ s->cache_missed = 1;
+
if (s->cache_miss || s->iop.bypass) {
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c5ceea9222ff..8eaadd9869bc 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2083,6 +2083,7 @@ static void bcache_exit(void)
if (bcache_major)
unregister_blkdev(bcache_major, "bcache");
unregister_reboot_notifier(&reboot);
+ mutex_destroy(&bch_register_lock);
}
static int __init bcache_init(void)
@@ -2101,14 +2102,15 @@ static int __init bcache_init(void)
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
unregister_reboot_notifier(&reboot);
+ mutex_destroy(&bch_register_lock);
return bcache_major;
}
if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
- sysfs_create_files(bcache_kobj, files) ||
bch_request_init() ||
- bch_debug_init(bcache_kobj))
+ bch_debug_init(bcache_kobj) ||
+ sysfs_create_files(bcache_kobj, files))
goto err;
return 0;
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 2ec7f90e3455..969c815c90b6 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1527,7 +1527,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
int l;
struct dm_buffer *b, *tmp;
unsigned long freed = 0;
- unsigned long count = nr_to_scan;
+ unsigned long count = c->n_buffers[LIST_CLEAN] +
+ c->n_buffers[LIST_DIRTY];
unsigned long retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
@@ -1564,6 +1565,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c;
unsigned long count;
+ unsigned long retain_target;
c = container_of(shrink, struct dm_bufio_client, shrinker);
if (sc->gfp_mask & __GFP_FS)
@@ -1572,8 +1574,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
return 0;
count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ retain_target = get_retain_buffers(c);
dm_bufio_unlock(c);
- return count;
+ return (count < retain_target) ? 0 : (count - retain_target);
}
/*
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index d6a1126d85ce..494d01d0e92a 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -821,6 +821,7 @@ static int leave(struct mddev *mddev)
lockres_free(cinfo->no_new_dev_lockres);
lockres_free(cinfo->bitmap_lockres);
dlm_release_lockspace(cinfo->lockspace, 2);
+ kfree(cinfo);
return 0;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d55bf85b76ce..86ab6d14d782 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1681,8 +1681,11 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
struct r5dev *dev = &sh->dev[i];
if (dev->written || i == pd_idx || i == qd_idx) {
- if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
+ if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) {
set_bit(R5_UPTODATE, &dev->flags);
+ if (test_bit(STRIPE_EXPAND_READY, &sh->state))
+ set_bit(R5_Expanded, &dev->flags);
+ }
if (fua)
set_bit(R5_WantFUA, &dev->flags);
if (sync)
diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c
index ef3a8f75f82e..7b15aea2723d 100644
--- a/drivers/media/usb/dvb-usb/dibusb-common.c
+++ b/drivers/media/usb/dvb-usb/dibusb-common.c
@@ -179,8 +179,20 @@ EXPORT_SYMBOL(dibusb_i2c_algo);
int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val)
{
- u8 wbuf[1] = { offs };
- return dibusb_i2c_msg(d, 0x50, wbuf, 1, val, 1);
+ u8 *buf;
+ int rc;
+
+ buf = kmalloc(2, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf[0] = offs;
+
+ rc = dibusb_i2c_msg(d, 0x50, &buf[0], 1, &buf[1], 1);
+ *val = buf[1];
+ kfree(buf);
+
+ return rc;
}
EXPORT_SYMBOL(dibusb_read_eeprom_byte);
diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c
index 91d709efef7a..cafc34938a79 100644
--- a/drivers/media/usb/usbvision/usbvision-video.c
+++ b/drivers/media/usb/usbvision/usbvision-video.c
@@ -1461,6 +1461,13 @@ static int usbvision_probe(struct usb_interface *intf,
printk(KERN_INFO "%s: %s found\n", __func__,
usbvision_device_data[model].model_string);
+ /*
+ * this is a security check.
+ * an exploit using an incorrect bInterfaceNumber is known
+ */
+ if (ifnum >= USB_MAXINTERFACES || !dev->actconfig->interface[ifnum])
+ return -ENODEV;
+
if (usbvision_device_data[model].interface >= 0)
interface = &dev->actconfig->interface[usbvision_device_data[model].interface]->altsetting[0];
else if (ifnum < dev->actconfig->desc.bNumInterfaces)
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 55cba89dbdb8..49691a8c74ee 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1890,9 +1890,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev,
if (!of_property_read_u32(child, "dma-channel", &val))
gpmc_onenand_data->dma_channel = val;
- gpmc_onenand_init(gpmc_onenand_data);
-
- return 0;
+ return gpmc_onenand_init(gpmc_onenand_data);
}
#else
static int gpmc_probe_onenand_child(struct platform_device *pdev,
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 6a0f6ec67c6b..ee7847a1ca06 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -660,6 +660,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
sizeof(struct ec_response_get_protocol_info);
ec_dev->dout_size = sizeof(struct ec_host_request);
+ ec_spi->last_transfer_ns = ktime_get_ns();
err = cros_ec_register(ec_dev);
if (err) {
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index 0a1606480023..cc832d309599 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
- struct device_node *node)
+ struct device_node *parent)
{
+ struct device_node *node;
+
if (pdata && pdata->codec)
return true;
- if (of_find_node_by_name(node, "codec"))
+ node = of_get_child_by_name(parent, "codec");
+ if (node) {
+ of_node_put(node);
return true;
+ }
return false;
}
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index 08a693cd38cc..72aab60ae846 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
};
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
{
-#ifdef CONFIG_OF
- if (of_find_node_by_name(node, "vibra"))
+ struct device_node *node;
+
+ node = of_get_child_by_name(parent, "vibra");
+ if (node) {
+ of_node_put(node);
return true;
-#endif
+ }
+
return false;
}
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0c6c17a1c59e..ba2f6d1d7db7 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1329,6 +1329,9 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
/* There should only be one entry, but go through the list
* anyway
*/
+ if (afu->phb == NULL)
+ return result;
+
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
if (!afu_dev->driver)
continue;
@@ -1369,6 +1372,10 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
*/
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
+ /*
+ * Tell the AFU drivers; but we don't care what they
+ * say, we're going away.
+ */
cxl_vphb_error_detected(afu, state);
}
return PCI_ERS_RESULT_DISCONNECT;
@@ -1492,6 +1499,9 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
if (cxl_afu_select_best_mode(afu))
goto err;
+ if (afu->phb == NULL)
+ continue;
+
cxl_pci_vphb_reconfigure(afu);
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
@@ -1556,6 +1566,9 @@ static void cxl_pci_resume(struct pci_dev *pdev)
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
+ if (afu->phb == NULL)
+ continue;
+
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
if (afu_dev->driver && afu_dev->driver->err_handler &&
afu_dev->driver->err_handler->resume)
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 5d7c0900fa1b..f112c5bc082a 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -257,6 +257,9 @@ static ssize_t at24_read(struct at24_data *at24,
if (unlikely(!count))
return count;
+ if (off + count > at24->chip.byte_len)
+ return -EINVAL;
+
/*
* Read data from chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
@@ -311,6 +314,9 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
unsigned long timeout, write_time;
unsigned next_page;
+ if (offset + count > at24->chip.byte_len)
+ return -EINVAL;
+
/* Get corresponding I2C address and adjust offset */
client = at24_translate_offset(at24, &offset);
diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c
index 031320e51522..0e6ab4e7c686 100644
--- a/drivers/misc/uid_sys_stats.c
+++ b/drivers/misc/uid_sys_stats.c
@@ -1,4 +1,4 @@
-/* drivers/misc/uid_cputime.c
+/* drivers/misc/uid_sys_stats.c
*
* Copyright (C) 2014 - 2015 Google, Inc.
*
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index c40d700d424d..dd1b2c70128c 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -155,6 +155,9 @@ static int mmc_bus_suspend(struct device *dev)
return ret;
ret = host->bus_ops->suspend(host);
+ if (ret)
+ pm_generic_resume(dev);
+
return ret;
}
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index a68f956548e4..fec3c96c1546 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -184,9 +184,10 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
completion = ktime_get();
delta_us = ktime_us_delta(completion,
mrq->io_start);
- blk_update_latency_hist(&host->io_lat_s,
- (mrq->data->flags & MMC_DATA_READ),
- delta_us);
+ blk_update_latency_hist(
+ (mrq->data->flags & MMC_DATA_READ) ?
+ &host->io_lat_read :
+ &host->io_lat_write, delta_us);
}
#endif
trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data);
@@ -2943,8 +2944,14 @@ static ssize_t
latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct mmc_host *host = cls_dev_to_mmc_host(dev);
+ size_t written_bytes;
- return blk_latency_hist_show(&host->io_lat_s, buf);
+ written_bytes = blk_latency_hist_show("Read", &host->io_lat_read,
+ buf, PAGE_SIZE);
+ written_bytes += blk_latency_hist_show("Write", &host->io_lat_write,
+ buf + written_bytes, PAGE_SIZE - written_bytes);
+
+ return written_bytes;
}
/*
@@ -2962,9 +2969,10 @@ latency_hist_store(struct device *dev, struct device_attribute *attr,
if (kstrtol(buf, 0, &value))
return -EINVAL;
- if (value == BLK_IO_LAT_HIST_ZERO)
- blk_zero_latency_hist(&host->io_lat_s);
- else if (value == BLK_IO_LAT_HIST_ENABLE ||
+ if (value == BLK_IO_LAT_HIST_ZERO) {
+ memset(&host->io_lat_read, 0, sizeof(host->io_lat_read));
+ memset(&host->io_lat_write, 0, sizeof(host->io_lat_write));
+ } else if (value == BLK_IO_LAT_HIST_ENABLE ||
value == BLK_IO_LAT_HIST_DISABLE)
host->latency_hist_enabled = value;
return count;
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 33dfd7e72516..0bf0d0e9dbdb 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -570,7 +570,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
}
}
sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV,
- (mode << 8) | (div % 0xff));
+ (mode << 8) | div);
sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN);
while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
cpu_relax();
@@ -1540,7 +1540,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
host->src_clk_freq = clk_get_rate(host->src_clk);
/* Set host parameters to mmc */
mmc->ops = &mt_msdc_ops;
- mmc->f_min = host->src_clk_freq / (4 * 255);
+ mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255);
mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
mmc->caps |= MMC_CAP_RUNTIME_RESUME;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 54ab48827258..7ba109e8cf88 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2663,15 +2663,18 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const uint8_t *buf)
{
struct nand_chip *chip = mtd->priv;
+ int chipnr = (int)(to >> chip->chip_shift);
struct mtd_oob_ops ops;
int ret;
- /* Wait for the device to get ready */
- panic_nand_wait(mtd, chip, 400);
-
/* Grab the device */
panic_nand_get_device(chip, mtd, FL_WRITING);
+ chip->select_chip(mtd, chipnr);
+
+ /* Wait for the device to get ready */
+ panic_nand_wait(mtd, chip, 400);
+
memset(&ops, 0, sizeof(ops));
ops.len = len;
ops.datbuf = (uint8_t *)buf;
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index e90c6a7333d7..2e4649655181 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -191,7 +191,7 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
*/
static int ipddp_create(struct ipddp_route *new_rt)
{
- struct ipddp_route *rt = kmalloc(sizeof(*rt), GFP_KERNEL);
+ struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL);
if (rt == NULL)
return -ENOMEM;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 6749b1829469..4d01d7bc24ef 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -652,6 +652,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota)
mbx_mask = hecc_read(priv, HECC_CANMIM);
mbx_mask |= HECC_TX_MBOX_MASK;
hecc_write(priv, HECC_CANMIM, mbx_mask);
+ } else {
+ /* repoll is done only if whole budget is used */
+ num_pkts = quota;
}
return num_pkts;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index eb7192fab593..357c9e89fdf9 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -290,6 +290,8 @@ static void ems_usb_read_interrupt_callback(struct urb *urb)
case -ECONNRESET: /* unlink */
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 4c6707ecc619..afa5b4a7a4a2 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -393,6 +393,8 @@ static void esd_usb2_read_bulk_callback(struct urb *urb)
break;
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 27e2352fcc42..b227f81e4a7e 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -430,7 +430,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
rc);
- return rc;
+ return (rc > 0) ? 0 : rc;
}
static void gs_usb_xmit_callback(struct urb *urb)
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index c2e2821a3346..db1855b0e08f 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -603,8 +603,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id,
}
if (pos + tmp->len > actual_len) {
- dev_err(dev->udev->dev.parent,
- "Format error\n");
+ dev_err_ratelimited(dev->udev->dev.parent,
+ "Format error\n");
break;
}
@@ -809,6 +809,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
if (err) {
netdev_err(netdev, "Error transmitting URB\n");
usb_unanchor_urb(urb);
+ kfree(buf);
usb_free_urb(urb);
return err;
}
@@ -1321,6 +1322,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
case 0:
break;
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
default:
@@ -1329,7 +1332,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
goto resubmit_urb;
}
- while (pos <= urb->actual_length - MSG_HEADER_LEN) {
+ while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) {
msg = urb->transfer_buffer + pos;
/* The Kvaser firmware can only read and write messages that
@@ -1348,7 +1351,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
}
if (pos + msg->len > urb->actual_length) {
- dev_err(dev->udev->dev.parent, "Format error\n");
+ dev_err_ratelimited(dev->udev->dev.parent,
+ "Format error\n");
break;
}
@@ -1767,6 +1771,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
usb_unanchor_urb(urb);
+ kfree(buf);
stats->tx_dropped++;
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index 449b2a47f9a8..522286cc0f9c 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -524,6 +524,8 @@ static void usb_8dev_read_bulk_callback(struct urb *urb)
break;
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 8860e74aa28f..027705117086 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1045,15 +1045,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
goto out;
}
- /* Insert TSB and checksum infos */
- if (priv->tsb_en) {
- skb = bcm_sysport_insert_tsb(skb, dev);
- if (!skb) {
- ret = NETDEV_TX_OK;
- goto out;
- }
- }
-
/* The Ethernet switch we are interfaced with needs packets to be at
* least 64 bytes (including FCS) otherwise they will be discarded when
* they enter the switch port logic. When Broadcom tags are enabled, we
@@ -1061,13 +1052,21 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
* (including FCS and tag) because the length verification is done after
* the Broadcom tag is stripped off the ingress packet.
*/
- if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
+ if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
ret = NETDEV_TX_OK;
goto out;
}
- skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ?
- ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len;
+ /* Insert TSB and checksum infos */
+ if (priv->tsb_en) {
+ skb = bcm_sysport_insert_tsb(skb, dev);
+ if (!skb) {
+ ret = NETDEV_TX_OK;
+ goto out;
+ }
+ }
+
+ skb_len = skb->len;
mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
if (dma_mapping_error(kdev, mapping)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 1c8123816745..abb3ff6498dc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13646,7 +13646,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
if (!netif_running(bp->dev)) {
DP(BNX2X_MSG_PTP,
"PTP adjfreq called while the interface is down\n");
- return -EFAULT;
+ return -ENETDOWN;
}
if (ppb < 0) {
@@ -13705,6 +13705,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP adjtime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
timecounter_adjtime(&bp->timecounter, delta);
@@ -13717,6 +13723,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP gettime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timecounter_read(&bp->timecounter);
DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13732,6 +13744,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp,
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP settime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timespec64_to_ns(ts);
DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9d027348cd09..5780830f78ad 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
/* Add/Remove the filter */
rc = bnx2x_config_vlan_mac(bp, &ramrod);
- if (rc && rc != -EEXIST) {
+ if (rc == -EEXIST)
+ return 0;
+ if (rc) {
BNX2X_ERR("Failed to %s %s\n",
filter->add ? "add" : "delete",
(filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
return rc;
}
+ filter->applied = true;
+
return 0;
}
@@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
i, filters->count + 1);
while (--i >= 0) {
+ if (!filters->filters[i].applied)
+ continue;
filters->filters[i].add = !filters->filters[i].add;
bnx2x_vf_mac_vlan_config(bp, vf, qid,
&filters->filters[i],
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 670a581ffabc..6f6f13dc2be3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter {
(BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
bool add;
+ bool applied;
u8 *mac;
u16 vid;
};
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 1374e5394a79..a12a4236b143 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
struct bnx2x *bp = netdev_priv(dev);
struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
- int rc, i = 0;
+ int rc = 0, i = 0;
struct netdev_hw_addr *ha;
if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
/* Get Rx mode requested */
DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
+ /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+ if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+ DP(NETIF_MSG_IFUP,
+ "VF supports not more than %d multicast MAC addresses\n",
+ PFVF_MAX_MULTICAST_PER_VF);
+ rc = -EINVAL;
+ goto out;
+ }
+
netdev_for_each_mc_addr(ha, dev) {
DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
i++;
}
- /* We support four PFVF_MAX_MULTICAST_PER_VF mcast
- * addresses tops
- */
- if (i >= PFVF_MAX_MULTICAST_PER_VF) {
- DP(NETIF_MSG_IFUP,
- "VF supports not more than %d multicast MAC addresses\n",
- PFVF_MAX_MULTICAST_PER_VF);
- return -EINVAL;
- }
-
req->n_multicast = i;
req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
req->vf_qid = 0;
@@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
out:
bnx2x_vfpf_finalize(bp, &req->first_tlv);
- return 0;
+ return rc;
}
/* request pf to add a vlan for the vf */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4744919440e0..a38a9cb3d544 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2014,6 +2014,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
return 0;
}
+static void bnxt_init_cp_rings(struct bnxt *bp)
+{
+ int i;
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+ struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+
+ ring->fw_ring_id = INVALID_HW_RING_ID;
+ }
+}
+
static int bnxt_init_rx_rings(struct bnxt *bp)
{
int i, rc = 0;
@@ -3977,6 +3989,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init)
static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
{
+ bnxt_init_cp_rings(bp);
bnxt_init_rx_rings(bp);
bnxt_init_tx_rings(bp);
bnxt_init_ring_grps(bp, irq_re_init);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f971d92f7b41..74dd48f2bd89 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1,7 +1,7 @@
/*
* Broadcom GENET (Gigabit Ethernet) controller driver
*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -778,8 +778,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
/* Misc UniMAC counters */
STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
- UMAC_RBUF_OVFL_CNT),
- STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+ UMAC_RBUF_OVFL_CNT_V1),
+ STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+ UMAC_RBUF_ERR_CNT_V1),
STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +822,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset,
}
}
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+ u16 new_offset;
+ u32 val;
+
+ switch (offset) {
+ case UMAC_RBUF_OVFL_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_OVFL_CNT_V2;
+ else
+ new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ case UMAC_RBUF_ERR_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_ERR_CNT_V2;
+ else
+ new_offset = RBUF_ERR_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ default:
+ val = bcmgenet_umac_readl(priv, offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0, offset);
+ break;
+ }
+
+ return val;
+}
+
static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
{
int i, j = 0;
@@ -836,19 +876,28 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
case BCMGENET_STAT_NETDEV:
case BCMGENET_STAT_SOFT:
continue;
- case BCMGENET_STAT_MIB_RX:
- case BCMGENET_STAT_MIB_TX:
case BCMGENET_STAT_RUNT:
- if (s->type != BCMGENET_STAT_MIB_RX)
- offset = BCMGENET_STAT_OFFSET;
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_TX:
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_RX:
val = bcmgenet_umac_readl(priv,
UMAC_MIB_START + j + offset);
+ offset = 0; /* Reset Offset */
break;
case BCMGENET_STAT_MISC:
- val = bcmgenet_umac_readl(priv, s->reg_offset);
- /* clear if overflowed */
- if (val == ~0)
- bcmgenet_umac_writel(priv, 0, s->reg_offset);
+ if (GENET_IS_V1(priv)) {
+ val = bcmgenet_umac_readl(priv, s->reg_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0,
+ s->reg_offset);
+ } else {
+ val = bcmgenet_update_stat_misc(priv,
+ s->reg_offset);
+ }
break;
}
@@ -2901,6 +2950,8 @@ err_irq0:
err_fini_dma:
bcmgenet_fini_dma(priv);
err_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
@@ -3277,6 +3328,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
*/
gphy_rev = reg & 0xffff;
+ /* This is reserved so should require special treatment */
+ if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+ pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+ return;
+ }
+
/* This is the good old scheme, just GPHY major, no minor nor patch */
if ((gphy_rev & 0xf0) != 0)
priv->gphy_rev = gphy_rev << 8;
@@ -3285,12 +3342,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
else if ((gphy_rev & 0xff00) != 0)
priv->gphy_rev = gphy_rev;
- /* This is reserved so should require special treatment */
- else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
- pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
- return;
- }
-
#ifdef CONFIG_PHYS_ADDR_T_64BIT
if (!(params->flags & GENET_HAS_40BITS))
pr_warn("GENET does not support 40-bits PA\n");
@@ -3333,6 +3384,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
const void *macaddr;
struct resource *r;
int err = -EIO;
+ const char *phy_mode_str;
/* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3438,6 +3490,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
priv->clk_eee = NULL;
}
+ /* If this is an internal GPHY, power it on now, before UniMAC is
+ * brought out of reset as absolutely no UniMAC activity is allowed
+ */
+ if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+ !strcasecmp(phy_mode_str, "internal"))
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
err = reset_umac(priv);
if (err)
goto err_clk_disable;
@@ -3604,6 +3663,8 @@ static int bcmgenet_resume(struct device *d)
return 0;
out_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 967367557309..cef53f2d9854 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@ struct bcmgenet_mib_counters {
#define MDIO_REG_SHIFT 16
#define MDIO_REG_MASK 0x1F
-#define UMAC_RBUF_OVFL_CNT 0x61C
+#define UMAC_RBUF_OVFL_CNT_V1 0x61C
+#define RBUF_OVFL_CNT_V2 0x80
+#define RBUF_OVFL_CNT_V3PLUS 0x94
#define UMAC_MPD_CTRL 0x620
#define MPD_EN (1 << 0)
@@ -224,7 +226,9 @@ struct bcmgenet_mib_counters {
#define UMAC_MPD_PW_MS 0x624
#define UMAC_MPD_PW_LS 0x628
-#define UMAC_RBUF_ERR_CNT 0x634
+#define UMAC_RBUF_ERR_CNT_V1 0x634
+#define RBUF_ERR_CNT_V2 0x84
+#define RBUF_ERR_CNT_V3PLUS 0x98
#define UMAC_MDF_ERR_CNT 0x638
#define UMAC_MDF_CTRL 0x650
#define UMAC_MDF_ADDR 0x654
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 3613469dc5c6..ab53e0cfb4dc 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14228,7 +14228,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
/* Reset PHY, otherwise the read DMA engine will be in a mode that
* breaks all requests to 256 bytes.
*/
- if (tg3_asic_rev(tp) == ASIC_REV_57766)
+ if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+ tg3_asic_rev(tp) == ASIC_REV_5717 ||
+ tg3_asic_rev(tp) == ASIC_REV_5719)
reset_phy = true;
err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 9e59663a6ead..0f6811860ad5 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1930,13 +1930,13 @@ static void
bfa_ioc_send_enable(struct bfa_ioc *ioc)
{
struct bfi_ioc_ctrl_req enable_req;
- struct timeval tv;
bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
bfa_ioc_portid(ioc));
enable_req.clscode = htons(ioc->clscode);
- do_gettimeofday(&tv);
- enable_req.tv_sec = ntohl(tv.tv_sec);
+ enable_req.rsvd = htons(0);
+ /* overflow in 2106 */
+ enable_req.tv_sec = ntohl(ktime_get_real_seconds());
bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
}
@@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc)
bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
bfa_ioc_portid(ioc));
+ disable_req.clscode = htons(ioc->clscode);
+ disable_req.rsvd = htons(0);
+ /* overflow in 2106 */
+ disable_req.tv_sec = ntohl(ktime_get_real_seconds());
bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
}
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 8fc246ea1fb8..a4ad782007ce 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -324,7 +324,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
return PTR_ERR(kern_buf);
rc = sscanf(kern_buf, "%x:%x", &addr, &len);
- if (rc < 2) {
+ if (rc < 2 || len > UINT_MAX >> 2) {
netdev_warn(bnad->netdev, "failed to read user buffer\n");
kfree(kern_buf);
return -EINVAL;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index ab716042bdd2..458e2d97d096 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2968,6 +2968,7 @@ static void set_multicast_list(struct net_device *ndev)
struct netdev_hw_addr *ha;
unsigned int i, bit, data, crc, tmp;
unsigned char hash;
+ unsigned int hash_high = 0, hash_low = 0;
if (ndev->flags & IFF_PROMISC) {
tmp = readl(fep->hwp + FEC_R_CNTRL);
@@ -2990,11 +2991,7 @@ static void set_multicast_list(struct net_device *ndev)
return;
}
- /* Clear filter and add the addresses in hash register
- */
- writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
- writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
-
+ /* Add the addresses in hash register */
netdev_for_each_mc_addr(ha, ndev) {
/* calculate crc32 value of mac address */
crc = 0xffffffff;
@@ -3012,16 +3009,14 @@ static void set_multicast_list(struct net_device *ndev)
*/
hash = (crc >> (32 - HASH_BITS)) & 0x3f;
- if (hash > 31) {
- tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
- tmp |= 1 << (hash - 32);
- writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
- } else {
- tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW);
- tmp |= 1 << hash;
- writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
- }
+ if (hash > 31)
+ hash_high |= 1 << (hash - 32);
+ else
+ hash_low |= 1 << hash;
}
+
+ writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
+ writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
}
/* Set a MAC change in hardware. */
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 91a5a0ae9cd7..1908a38e7f31 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1362,6 +1362,9 @@ out:
* Checks to see of the link status of the hardware has changed. If a
* change in link status has been detected, then we read the PHY registers
* to get the current speed/duplex if link exists.
+ *
+ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ * up).
**/
static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
{
@@ -1377,7 +1380,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
* Change or Rx Sequence Error interrupt.
*/
if (!mac->get_link_status)
- return 0;
+ return 1;
/* First we want to see if the MII Status Register reports
* link. If so, then we want to get the current speed/duplex
@@ -1585,10 +1588,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
* different link partner.
*/
ret_val = e1000e_config_fc_after_link_up(hw);
- if (ret_val)
+ if (ret_val) {
e_dbg("Error configuring flow control\n");
+ return ret_val;
+ }
- return ret_val;
+ return 1;
}
static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index acfb8b1f88a7..a8f9d0012d82 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -126,6 +126,9 @@ process_mbx:
struct fm10k_mbx_info *mbx = &vf_info->mbx;
u16 glort = vf_info->glort;
+ /* process the SM mailbox first to drain outgoing messages */
+ hw->mbx.ops.process(hw, &hw->mbx);
+
/* verify port mapping is valid, if not reset port */
if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
hw->iov.ops.reset_lport(hw, vf_info);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b5b228c9a030..06b38f50980c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4201,8 +4201,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi)
if (!vsi->netdev)
return;
- for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
- napi_enable(&vsi->q_vectors[q_idx]->napi);
+ for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+ struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_enable(&q_vector->napi);
+ }
}
/**
@@ -4216,8 +4220,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
if (!vsi->netdev)
return;
- for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
- napi_disable(&vsi->q_vectors[q_idx]->napi);
+ for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+ struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_disable(&q_vector->napi);
+ }
}
/**
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c55552c3d2f9..53803fd6350c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3005,6 +3005,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
/* Setup and initialize a copy of the hw vlan table array */
adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
GFP_ATOMIC);
+ if (!adapter->shadow_vfta)
+ return -ENOMEM;
/* This call may decrease the number of queues */
if (igb_init_interrupt_scheme(adapter, true)) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index ce61b36b94f1..105dd00ddc1a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3620,10 +3620,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
fw_cmd.ver_build = build;
fw_cmd.ver_sub = sub;
fw_cmd.hdr.checksum = 0;
- fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
- (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
fw_cmd.pad = 0;
fw_cmd.pad2 = 0;
+ fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+ (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 31f864fb30c1..a75f2e3ce86f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -564,6 +564,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
/* convert offset from words to bytes */
buffer.address = cpu_to_be32((offset + current_word) * 2);
buffer.length = cpu_to_be16(words_to_read * 2);
+ buffer.pad2 = 0;
+ buffer.pad3 = 0;
status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
sizeof(buffer),
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index fc2fb25343f4..c122b3b99cd8 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -241,7 +241,8 @@ static int orion_mdio_probe(struct platform_device *pdev)
dev->regs + MVMDIO_ERR_INT_MASK);
} else if (dev->err_interrupt == -EPROBE_DEFER) {
- return -EPROBE_DEFER;
+ ret = -EPROBE_DEFER;
+ goto out_mdio;
}
mutex_init(&dev->lock);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 15056f06754a..7430dd44019e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -914,6 +914,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
val &= ~MVNETA_GMAC0_PORT_ENABLE;
mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
+ pp->link = 0;
+ pp->duplex = -1;
+ pp->speed = 0;
+
udelay(200);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index d48d5793407d..fc222df47aa9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev)
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
/* PCI might be offline */
+
+ /* If device removal has been requested,
+ * do not continue retrying.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_NOWAIT) {
+ mlx4_warn(dev,
+ "communication channel is offline\n");
+ return -EIO;
+ }
+
msleep(100);
wr_toggle = swab32(readl(&priv->mfunc.comm->
slave_write));
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 99361352ed0d..a7d3144c2388 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1763,6 +1763,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev)
(u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
if (!offline_bit)
return 0;
+
+ /* If device removal has been requested,
+ * do not continue retrying.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_NOWAIT)
+ break;
+
/* There are cases as part of AER/Reset flow that PF needs
* around 100 msec to load. We therefore sleep for 100 msec
* to allow other tasks to make use of that CPU during this
@@ -3690,6 +3698,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
struct mlx4_priv *priv = mlx4_priv(dev);
int active_vfs = 0;
+ if (mlx4_is_slave(dev))
+ persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
mutex_lock(&persist->interface_state_mutex);
persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
mutex_unlock(&persist->interface_state_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 236fb5d2ad69..c7fe61f1f89f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -599,7 +599,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
#define MLXSW_REG_SPVM_ID 0x200F
#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \
MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
@@ -1139,7 +1139,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
#define MLXSW_REG_SPVMLR_ID 0x2020
#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
MLXSW_REG_SPVMLR_REC_LEN * \
MLXSW_REG_SPVMLR_REC_MAX_COUNT)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 585e90f8341d..f735dfcb64ae 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -831,14 +831,10 @@ static int ravb_poll(struct napi_struct *napi, int budget)
/* Receive error message handling */
priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
- if (priv->rx_over_errors != ndev->stats.rx_over_errors) {
+ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
ndev->stats.rx_over_errors = priv->rx_over_errors;
- netif_err(priv, rx_err, ndev, "Receive Descriptor Empty\n");
- }
- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) {
+ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
- netif_err(priv, rx_err, ndev, "Receive FIFO Overflow\n");
- }
out:
return budget - quota;
}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 479af106aaeb..424d1dee55c9 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3176,18 +3176,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
/* ioremap the TSU registers */
if (mdp->cd->tsu) {
struct resource *rtsu;
+
rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
- if (IS_ERR(mdp->tsu_addr)) {
- ret = PTR_ERR(mdp->tsu_addr);
+ if (!rtsu) {
+ dev_err(&pdev->dev, "no TSU resource\n");
+ ret = -ENODEV;
+ goto out_release;
+ }
+ /* We can only request the TSU region for the first port
+ * of the two sharing this TSU for the probe to succeed...
+ */
+ if (devno % 2 == 0 &&
+ !devm_request_mem_region(&pdev->dev, rtsu->start,
+ resource_size(rtsu),
+ dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "can't request TSU resource.\n");
+ ret = -EBUSY;
+ goto out_release;
+ }
+ mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
+ resource_size(rtsu));
+ if (!mdp->tsu_addr) {
+ dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
+ ret = -ENOMEM;
goto out_release;
}
mdp->port = devno % 2;
ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
}
- /* initialize first or needed device */
- if (!devno || pd->needs_init) {
+ /* Need to init only the first port of the two sharing a TSU */
+ if (devno % 2 == 0) {
if (mdp->cd->chip_reset)
mdp->cd->chip_reset(ndev);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index cbe9a330117a..063aca17e698 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4307,7 +4307,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
* MCFW do not support VFs.
*/
rc = efx_ef10_vport_set_mac_address(efx);
- } else {
+ } else if (rc) {
efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC,
sizeof(inbuf), NULL, 0, rc);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e9a9c7fd401d..a534672d0955 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -272,8 +272,14 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
{
char *phy_bus_name = priv->plat->phy_bus_name;
unsigned long flags;
+ int interface = priv->plat->interface;
bool ret = false;
+ if ((interface != PHY_INTERFACE_MODE_MII) &&
+ (interface != PHY_INTERFACE_MODE_GMII) &&
+ !phy_interface_mode_is_rgmii(interface))
+ goto out;
+
/* Using PCS we cannot dial with the phy registers at this stage
* so we do not support extra feature like EEE.
*/
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 0ddb54fe3d91..a539e831b4b1 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1205,7 +1205,7 @@ static void fjes_netdev_setup(struct net_device *netdev)
fjes_set_ethtool_ops(netdev);
netdev->mtu = fjes_support_mtu[0];
netdev->flags |= IFF_BROADCAST;
- netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
static void fjes_irq_watch_task(struct work_struct *work)
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 8c48bb2a94ea..af827faec7fe 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -388,7 +388,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
struct dst_entry *dst;
int err, ret = NET_XMIT_DROP;
struct flowi6 fl6 = {
- .flowi6_iif = dev->ifindex,
+ .flowi6_oif = dev->ifindex,
.daddr = ip6h->daddr,
.saddr = ip6h->saddr,
.flowi6_flags = FLOWI_FLAG_ANYSRC,
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index a0849f49bbec..c0192f97ecc8 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
memset(rd, 0, sizeof(*rd));
rd->hw = hwmap + i;
rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA);
- if (rd->buf == NULL ||
- !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) {
+ if (rd->buf)
+ busaddr = pci_map_single(pdev, rd->buf, len, dir);
+ if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) {
if (rd->buf) {
net_err_ratelimited("%s: failed to create PCI-MAP for %p\n",
__func__, rd->buf);
@@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
rd = r->rd + j;
busaddr = rd_get_addr(rd);
rd_set_addr_status(rd, 0, 0);
- if (busaddr)
- pci_unmap_single(pdev, busaddr, len, dir);
+ pci_unmap_single(pdev, busaddr, len, dir);
kfree(rd->buf);
rd->buf = NULL;
}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 40cd86614677..9897cabec371 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -441,7 +441,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
struct macvlan_dev, list);
else
vlan = macvlan_hash_lookup(port, eth->h_dest);
- if (vlan == NULL)
+ if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE)
return RX_HANDLER_PASS;
dev = vlan->dev;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 62361f8af375..e7df73ba8d63 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -105,7 +105,7 @@ static int at803x_set_wol(struct phy_device *phydev,
mac = (const u8 *) ndev->dev_addr;
if (!is_valid_ether_addr(mac))
- return -EFAULT;
+ return -EINVAL;
for (i = 0; i < 3; i++) {
phy_write(phydev, AT803X_MMD_ACCESS_CONTROL,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 66e7e85ecc7b..53af8e59b1a5 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -542,6 +542,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
phydev->link = 0;
if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
phydev->drv->config_intr(phydev);
+ return genphy_config_aneg(phydev);
}
return 0;
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index c72c42206850..21d22f86134e 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -310,6 +310,7 @@ static int ks8995_probe(struct spi_device *spi)
if (err)
return err;
+ sysfs_attr_init(&ks->regs_attr.attr);
err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
if (err) {
dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index dc454138d600..e2decf71c6d1 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -942,6 +942,7 @@ static __net_exit void ppp_exit_net(struct net *net)
unregister_netdevice_many(&list);
rtnl_unlock();
+ mutex_destroy(&pn->all_ppp_mutex);
idr_destroy(&pn->units_idr);
}
diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index e221bfcee76b..947bea81d924 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
{
int len = skb->len;
- if (skb_headroom(skb) < 2) {
- struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
+ if (skb_cow_head(skb, 2)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
skb_push(skb, 2);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 226668ead0d8..41e9ebd7d0a6 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2050,14 +2050,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
{
u32 tx_cmd_a, tx_cmd_b;
- if (skb_headroom(skb) < TX_OVERHEAD) {
- struct sk_buff *skb2;
-
- skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
+ if (skb_cow_head(skb, TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
if (lan78xx_linearize(skb) < 0)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index e325ca3ad565..b0ea8dee5f06 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -410,6 +410,10 @@ static const struct usb_device_id products[] = {
USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
.driver_info = (unsigned long)&qmi_wwan_info,
},
+ { /* Motorola Mapphone devices with MDM6600 */
+ USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
+ .driver_info = (unsigned long)&qmi_wwan_info,
+ },
/* 2. Combined interface devices matching on class+protocol */
{ /* Huawei E367 and possibly others in "Windows mode" */
@@ -733,6 +737,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 1c27e6fb99f9..89950f5cea71 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -25,12 +25,13 @@
#include <uapi/linux/mdio.h>
#include <linux/mdio.h>
#include <linux/usb/cdc.h>
+#include <linux/suspend.h>
/* Information for net-next */
#define NETNEXT_VERSION "08"
/* Information for net */
-#define NET_VERSION "2"
+#define NET_VERSION "3"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -604,6 +605,9 @@ struct r8152 {
struct delayed_work schedule;
struct mii_if_info mii;
struct mutex control; /* use for hw setting */
+#ifdef CONFIG_PM_SLEEP
+ struct notifier_block pm_notifier;
+#endif
struct rtl_ops {
void (*init)(struct r8152 *);
@@ -1207,6 +1211,7 @@ static void intr_callback(struct urb *urb)
}
} else {
if (netif_carrier_ok(tp->netdev)) {
+ netif_stop_queue(tp->netdev);
set_bit(RTL8152_LINK_CHG, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
}
@@ -1277,6 +1282,7 @@ static int alloc_all_mem(struct r8152 *tp)
spin_lock_init(&tp->rx_lock);
spin_lock_init(&tp->tx_lock);
INIT_LIST_HEAD(&tp->tx_free);
+ INIT_LIST_HEAD(&tp->rx_done);
skb_queue_head_init(&tp->tx_queue);
skb_queue_head_init(&tp->rx_queue);
@@ -1941,7 +1947,6 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev)
__le32 tmp[2];
u32 ocp_data;
- clear_bit(RTL8152_SET_RX_MODE, &tp->flags);
netif_stop_queue(netdev);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data &= ~RCR_ACPT_ALL;
@@ -2427,8 +2432,6 @@ static void rtl_phy_reset(struct r8152 *tp)
u16 data;
int i;
- clear_bit(PHY_RESET, &tp->flags);
-
data = r8152_mdio_read(tp, MII_BMCR);
/* don't reset again before the previous one complete */
@@ -2458,23 +2461,23 @@ static void r8153_teredo_off(struct r8152 *tp)
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0);
}
-static void r8152b_disable_aldps(struct r8152 *tp)
-{
- ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPDNPS | LINKENA | DIS_SDSAVE);
- msleep(20);
-}
-
-static inline void r8152b_enable_aldps(struct r8152 *tp)
+static void r8152_aldps_en(struct r8152 *tp, bool enable)
{
- ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPWRSAVE | ENPDNPS |
- LINKENA | DIS_SDSAVE);
+ if (enable) {
+ ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPWRSAVE | ENPDNPS |
+ LINKENA | DIS_SDSAVE);
+ } else {
+ ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPDNPS | LINKENA |
+ DIS_SDSAVE);
+ msleep(20);
+ }
}
static void rtl8152_disable(struct r8152 *tp)
{
- r8152b_disable_aldps(tp);
+ r8152_aldps_en(tp, false);
rtl_disable(tp);
- r8152b_enable_aldps(tp);
+ r8152_aldps_en(tp, true);
}
static void r8152b_hw_phy_cfg(struct r8152 *tp)
@@ -2786,30 +2789,26 @@ static void r8153_enter_oob(struct r8152 *tp)
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
-static void r8153_disable_aldps(struct r8152 *tp)
+static void r8153_aldps_en(struct r8152 *tp, bool enable)
{
u16 data;
data = ocp_reg_read(tp, OCP_POWER_CFG);
- data &= ~EN_ALDPS;
- ocp_reg_write(tp, OCP_POWER_CFG, data);
- msleep(20);
-}
-
-static void r8153_enable_aldps(struct r8152 *tp)
-{
- u16 data;
-
- data = ocp_reg_read(tp, OCP_POWER_CFG);
- data |= EN_ALDPS;
- ocp_reg_write(tp, OCP_POWER_CFG, data);
+ if (enable) {
+ data |= EN_ALDPS;
+ ocp_reg_write(tp, OCP_POWER_CFG, data);
+ } else {
+ data &= ~EN_ALDPS;
+ ocp_reg_write(tp, OCP_POWER_CFG, data);
+ msleep(20);
+ }
}
static void rtl8153_disable(struct r8152 *tp)
{
- r8153_disable_aldps(tp);
+ r8153_aldps_en(tp, false);
rtl_disable(tp);
- r8153_enable_aldps(tp);
+ r8153_aldps_en(tp, true);
usb_enable_lpm(tp->udev);
}
@@ -2887,10 +2886,9 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
r8152_mdio_write(tp, MII_ADVERTISE, anar);
r8152_mdio_write(tp, MII_BMCR, bmcr);
- if (test_bit(PHY_RESET, &tp->flags)) {
+ if (test_and_clear_bit(PHY_RESET, &tp->flags)) {
int i;
- clear_bit(PHY_RESET, &tp->flags);
for (i = 0; i < 50; i++) {
msleep(20);
if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0)
@@ -2899,7 +2897,6 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
}
out:
-
return ret;
}
@@ -2908,9 +2905,9 @@ static void rtl8152_up(struct r8152 *tp)
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
- r8152b_disable_aldps(tp);
+ r8152_aldps_en(tp, false);
r8152b_exit_oob(tp);
- r8152b_enable_aldps(tp);
+ r8152_aldps_en(tp, true);
}
static void rtl8152_down(struct r8152 *tp)
@@ -2921,9 +2918,9 @@ static void rtl8152_down(struct r8152 *tp)
}
r8152_power_cut_en(tp, false);
- r8152b_disable_aldps(tp);
+ r8152_aldps_en(tp, false);
r8152b_enter_oob(tp);
- r8152b_enable_aldps(tp);
+ r8152_aldps_en(tp, true);
}
static void rtl8153_up(struct r8152 *tp)
@@ -2932,9 +2929,9 @@ static void rtl8153_up(struct r8152 *tp)
return;
r8153_u1u2en(tp, false);
- r8153_disable_aldps(tp);
+ r8153_aldps_en(tp, false);
r8153_first_init(tp);
- r8153_enable_aldps(tp);
+ r8153_aldps_en(tp, true);
r8153_u2p3en(tp, true);
r8153_u1u2en(tp, true);
usb_enable_lpm(tp->udev);
@@ -2950,9 +2947,9 @@ static void rtl8153_down(struct r8152 *tp)
r8153_u1u2en(tp, false);
r8153_u2p3en(tp, false);
r8153_power_cut_en(tp, false);
- r8153_disable_aldps(tp);
+ r8153_aldps_en(tp, false);
r8153_enter_oob(tp);
- r8153_enable_aldps(tp);
+ r8153_aldps_en(tp, true);
}
static bool rtl8152_in_nway(struct r8152 *tp)
@@ -2986,7 +2983,6 @@ static void set_carrier(struct r8152 *tp)
struct net_device *netdev = tp->netdev;
u8 speed;
- clear_bit(RTL8152_LINK_CHG, &tp->flags);
speed = rtl8152_get_speed(tp);
if (speed & LINK_STATUS) {
@@ -3000,6 +2996,9 @@ static void set_carrier(struct r8152 *tp)
napi_enable(&tp->napi);
netif_wake_queue(netdev);
netif_info(tp, link, netdev, "carrier on\n");
+ } else if (netif_queue_stopped(netdev) &&
+ skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+ netif_wake_queue(netdev);
}
} else {
if (netif_carrier_ok(netdev)) {
@@ -3033,20 +3032,18 @@ static void rtl_work_func_t(struct work_struct *work)
goto out1;
}
- if (test_bit(RTL8152_LINK_CHG, &tp->flags))
+ if (test_and_clear_bit(RTL8152_LINK_CHG, &tp->flags))
set_carrier(tp);
- if (test_bit(RTL8152_SET_RX_MODE, &tp->flags))
+ if (test_and_clear_bit(RTL8152_SET_RX_MODE, &tp->flags))
_rtl8152_set_rx_mode(tp->netdev);
/* don't schedule napi before linking */
- if (test_bit(SCHEDULE_NAPI, &tp->flags) &&
- netif_carrier_ok(tp->netdev)) {
- clear_bit(SCHEDULE_NAPI, &tp->flags);
+ if (test_and_clear_bit(SCHEDULE_NAPI, &tp->flags) &&
+ netif_carrier_ok(tp->netdev))
napi_schedule(&tp->napi);
- }
- if (test_bit(PHY_RESET, &tp->flags))
+ if (test_and_clear_bit(PHY_RESET, &tp->flags))
rtl_phy_reset(tp);
mutex_unlock(&tp->control);
@@ -3055,6 +3052,33 @@ out1:
usb_autopm_put_interface(tp->intf);
}
+#ifdef CONFIG_PM_SLEEP
+static int rtl_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct r8152 *tp = container_of(nb, struct r8152, pm_notifier);
+
+ switch (action) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ usb_autopm_get_interface(tp->intf);
+ break;
+
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ usb_autopm_put_interface(tp->intf);
+ break;
+
+ case PM_POST_RESTORE:
+ case PM_RESTORE_PREPARE:
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
static int rtl8152_open(struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
@@ -3097,6 +3121,10 @@ static int rtl8152_open(struct net_device *netdev)
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
+#ifdef CONFIG_PM_SLEEP
+ tp->pm_notifier.notifier_call = rtl_notifier;
+ register_pm_notifier(&tp->pm_notifier);
+#endif
out:
return res;
@@ -3107,6 +3135,9 @@ static int rtl8152_close(struct net_device *netdev)
struct r8152 *tp = netdev_priv(netdev);
int res = 0;
+#ifdef CONFIG_PM_SLEEP
+ unregister_pm_notifier(&tp->pm_notifier);
+#endif
napi_disable(&tp->napi);
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
@@ -3245,7 +3276,7 @@ static void r8152b_init(struct r8152 *tp)
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
- r8152b_disable_aldps(tp);
+ r8152_aldps_en(tp, false);
if (tp->version == RTL_VER_01) {
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE);
@@ -3267,7 +3298,7 @@ static void r8152b_init(struct r8152 *tp)
ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
r8152b_enable_eee(tp);
- r8152b_enable_aldps(tp);
+ r8152_aldps_en(tp, true);
r8152b_enable_fc(tp);
rtl_tally_reset(tp);
@@ -3285,7 +3316,7 @@ static void r8153_init(struct r8152 *tp)
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
- r8153_disable_aldps(tp);
+ r8153_aldps_en(tp, false);
r8153_u1u2en(tp, false);
for (i = 0; i < 500; i++) {
@@ -3374,7 +3405,7 @@ static void r8153_init(struct r8152 *tp)
EEE_SPDWN_EN);
r8153_enable_eee(tp);
- r8153_enable_aldps(tp);
+ r8153_aldps_en(tp, true);
r8152b_enable_fc(tp);
rtl_tally_reset(tp);
r8153_u2p3en(tp, true);
@@ -3560,8 +3591,18 @@ static int rtl8152_resume(struct usb_interface *intf)
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
napi_disable(&tp->napi);
set_bit(WORK_ENABLE, &tp->flags);
- if (netif_carrier_ok(tp->netdev))
- rtl_start_rx(tp);
+
+ if (netif_carrier_ok(tp->netdev)) {
+ if (rtl8152_get_speed(tp) & LINK_STATUS) {
+ rtl_start_rx(tp);
+ } else {
+ netif_carrier_off(tp->netdev);
+ tp->rtl_ops.disable(tp);
+ netif_info(tp, link, tp->netdev,
+ "linking down\n");
+ }
+ }
+
napi_enable(&tp->napi);
} else {
tp->rtl_ops.up(tp);
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 30033dbe6662..c5f375befd2f 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2193,13 +2193,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
{
u32 tx_cmd_a, tx_cmd_b;
- if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
- struct sk_buff *skb2 =
- skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
+ if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 4a1e9c489f1f..aadfe1d1c37e 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
len = skb->len;
- if (skb_headroom(skb) < SR_TX_OVERHEAD) {
- struct sk_buff *skb2;
-
- skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
+ if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
__skb_push(skb, SR_TX_OVERHEAD);
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index e7f5910a6519..f8eb66ef2944 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface,
struct i2400mu *i2400mu;
struct usb_device *usb_dev = interface_to_usbdev(iface);
+ if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+ return -ENODEV;
+
if (usb_dev->speed != USB_SPEED_HIGH)
dev_err(dev, "device not connected as high speed\n");
diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c
index b4e6304afd40..7ee1a3183a06 100644
--- a/drivers/net/wireless/ath/ath9k/tx99.c
+++ b/drivers/net/wireless/ath/ath9k/tx99.c
@@ -180,6 +180,9 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf,
ssize_t len;
int r;
+ if (count < 1)
+ return -EINVAL;
+
if (sc->cur_chan->nvifs > 1)
return -EOPNOTSUPP;
@@ -187,6 +190,8 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf,
if (copy_from_user(buf, user_buf, len))
return -EFAULT;
+ buf[len] = '\0';
+
if (strtobool(buf, &start))
return -EINVAL;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 2a996a68fc2b..f877fbc7d7af 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2885,6 +2885,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
{
struct hwsim_new_radio_params param = { 0 };
const char *hwname = NULL;
+ int ret;
param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG];
param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE];
@@ -2924,7 +2925,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
param.regd = hwsim_world_regdom_custom[idx];
}
- return mac80211_hwsim_new_radio(info, &param);
+ ret = mac80211_hwsim_new_radio(info, &param);
+ kfree(hwname);
+ return ret;
}
static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 34a062ccb11d..fd221cc4cb79 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1840,27 +1840,19 @@ static int talk_to_netback(struct xenbus_device *dev,
xennet_destroy_queues(info);
err = xennet_create_queues(info, &num_queues);
- if (err < 0)
- goto destroy_ring;
+ if (err < 0) {
+ xenbus_dev_fatal(dev, err, "creating queues");
+ kfree(info->queues);
+ info->queues = NULL;
+ goto out;
+ }
/* Create shared ring, alloc event channel -- for each queue */
for (i = 0; i < num_queues; ++i) {
queue = &info->queues[i];
err = setup_netfront(dev, queue, feature_split_evtchn);
- if (err) {
- /* setup_netfront() will tidy up the current
- * queue on error, but we need to clean up
- * those already allocated.
- */
- if (i > 0) {
- rtnl_lock();
- netif_set_real_num_tx_queues(info->netdev, i);
- rtnl_unlock();
- goto destroy_ring;
- } else {
- goto out;
- }
- }
+ if (err)
+ goto destroy_ring;
}
again:
@@ -1950,9 +1942,9 @@ abort_transaction_no_dev_fatal:
xenbus_transaction_end(xbt, 1);
destroy_ring:
xennet_disconnect_backend(info);
- kfree(info->queues);
- info->queues = NULL;
+ xennet_destroy_queues(info);
out:
+ device_unregister(&dev->dev);
return err;
}
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index d0c2759076a2..312cb5b74dec 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1654,3 +1654,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
iounmap(base_addr);
}
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+ if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+ dev->subsystem_device != 0x1292)
+ return;
+
+ dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+ dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+ quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+ if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+ dev->subsystem_device != 0x1291)
+ return;
+
+ dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+ dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+ quirk_diva_aux_disable);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 357527712539..7680fc0349fc 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -161,7 +161,6 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
pci_device_add(virtfn, virtfn->bus);
mutex_unlock(&iov->dev->sriov->lock);
- pci_bus_add_device(virtfn);
sprintf(buf, "virtfn%u", id);
rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
if (rc)
@@ -172,6 +171,8 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+ pci_bus_add_device(virtfn);
+
return 0;
failed2:
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index fca925543fae..32bd8ab79d53 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -944,7 +944,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_resume_early(dev);
- pci_update_current_state(pci_dev, PCI_D0);
+ /*
+ * pci_restore_state() requires the device to be in D0 (because of MSI
+ * restoration among other things), so force it into D0 in case the
+ * driver's "freeze" callbacks put it into a low-power state directly.
+ */
+ pci_set_power_state(pci_dev, PCI_D0);
pci_restore_state(pci_dev);
if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 620b84a8a719..7290bd41be7b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3867,6 +3867,10 @@ static bool pci_bus_resetable(struct pci_bus *bus)
{
struct pci_dev *dev;
+
+ if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
+ return false;
+
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
(dev->subordinate && !pci_bus_resetable(dev->subordinate)))
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 4e14de0f0f98..ca5dbf03e388 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -388,7 +388,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
* If the error is reported by an end point, we think this
* error is related to the upstream link of the end point.
*/
- pci_walk_bus(dev->bus, cb, &result_data);
+ if (state == pci_channel_io_normal)
+ /*
+ * the error is non fatal so the bus is ok, just invoke
+ * the callback for the function that logged the error.
+ */
+ cb(dev, &result_data);
+ else
+ pci_walk_bus(dev->bus, cb, &result_data);
}
return result_data.result;
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 63fc63911295..deb903112974 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -233,6 +233,9 @@ static void pcie_pme_work_fn(struct work_struct *work)
break;
pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
+ if (rtsta == (u32) ~0)
+ break;
+
if (rtsta & PCI_EXP_RTSTA_PME) {
/*
* Clear PME status of the port. If there are other
@@ -280,7 +283,7 @@ static irqreturn_t pcie_pme_irq(int irq, void *context)
spin_lock_irqsave(&data->lock, flags);
pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
- if (!(rtsta & PCI_EXP_RTSTA_PME)) {
+ if (rtsta == (u32) ~0 || !(rtsta & PCI_EXP_RTSTA_PME)) {
spin_unlock_irqrestore(&data->lock, flags);
return IRQ_NONE;
}
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8a280e9c2ad1..7e67af2bb366 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -20,9 +20,9 @@ static void pci_stop_dev(struct pci_dev *dev)
pci_pme_active(dev, false);
if (dev->is_added) {
+ device_release_driver(&dev->dev);
pci_proc_detach_device(dev);
pci_remove_sysfs_dev_files(dev);
- device_release_driver(&dev->dev);
dev->is_added = 0;
}
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 312c78b27a32..073b6d1e5efa 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -26,7 +26,8 @@ config DEBUG_PINCTRL
config PINCTRL_ADI2
bool "ADI pin controller driver"
- depends on BLACKFIN
+ depends on (BF54x || BF60x)
+ depends on !GPIO_ADI
select PINMUX
select IRQ_DOMAIN
help
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index b58d3f29148a..6908b6ce2074 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1338,6 +1338,22 @@ static void st_gpio_irq_unmask(struct irq_data *d)
writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK);
}
+static int st_gpio_irq_request_resources(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+ st_gpio_direction_input(gc, d->hwirq);
+
+ return gpiochip_lock_as_irq(gc, d->hwirq);
+}
+
+static void st_gpio_irq_release_resources(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+ gpiochip_unlock_as_irq(gc, d->hwirq);
+}
+
static int st_gpio_irq_set_type(struct irq_data *d, unsigned type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1493,12 +1509,14 @@ static struct gpio_chip st_gpio_template = {
};
static struct irq_chip st_gpio_irqchip = {
- .name = "GPIO",
- .irq_disable = st_gpio_irq_mask,
- .irq_mask = st_gpio_irq_mask,
- .irq_unmask = st_gpio_irq_unmask,
- .irq_set_type = st_gpio_irq_set_type,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .name = "GPIO",
+ .irq_request_resources = st_gpio_irq_request_resources,
+ .irq_release_resources = st_gpio_irq_release_resources,
+ .irq_disable = st_gpio_irq_mask,
+ .irq_mask = st_gpio_irq_mask,
+ .irq_unmask = st_gpio_irq_unmask,
+ .irq_set_type = st_gpio_irq_set_type,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
static int st_gpiolib_register_bank(struct st_pinctrl *info,
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 9bb934ed2a7a..dcfd3655ef0a 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -764,7 +764,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
}
timerqueue_add(&rtc->timerqueue, &timer->node);
- if (!next) {
+ if (!next || ktime_before(timer->node.expires, next->expires)) {
struct rtc_wkalrm alarm;
int err;
alarm.time = rtc_ktime_to_tm(timer->node.expires);
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index c8f95b8e463a..45b5a3d47ccf 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -427,7 +427,7 @@ static unsigned long pcf8563_clkout_recalc_rate(struct clk_hw *hw,
return 0;
buf &= PCF8563_REG_CLKO_F_MASK;
- return clkout_rates[ret];
+ return clkout_rates[buf];
}
static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index e1687e19c59f..a30f24cb6c83 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev)
dev_pm_clear_wake_irq(&adev->dev);
device_init_wakeup(&adev->dev, false);
- free_irq(adev->irq[0], ldata);
+ if (adev->irq[0])
+ free_irq(adev->irq[0], ldata);
rtc_device_unregister(ldata->rtc);
iounmap(ldata->base);
kfree(ldata);
@@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
goto out_no_rtc;
}
- if (request_irq(adev->irq[0], pl031_interrupt,
- vendor->irqflags, "rtc-pl031", ldata)) {
- ret = -EIO;
- goto out_no_irq;
+ if (adev->irq[0]) {
+ ret = request_irq(adev->irq[0], pl031_interrupt,
+ vendor->irqflags, "rtc-pl031", ldata);
+ if (ret)
+ goto out_no_irq;
+ dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
}
- dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
return 0;
out_no_irq:
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index bf3c1b2301db..0d6888cbd96e 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2680,17 +2680,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
char daddr[16];
struct af_iucv_trans_hdr *iucv_hdr;
- skb_pull(skb, 14);
- card->dev->header_ops->create(skb, card->dev, 0,
- card->dev->dev_addr, card->dev->dev_addr,
- card->dev->addr_len);
- skb_pull(skb, 14);
- iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
memset(hdr, 0, sizeof(struct qeth_hdr));
hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
hdr->hdr.l3.ext_flags = 0;
- hdr->hdr.l3.length = skb->len;
+ hdr->hdr.l3.length = skb->len - ETH_HLEN;
hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
+
+ iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
memset(daddr, 0, sizeof(daddr));
daddr[0] = 0xfe;
daddr[1] = 0x80;
@@ -2873,10 +2869,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
(skb_shinfo(skb)->nr_frags == 0)) {
new_skb = skb;
- if (new_skb->protocol == ETH_P_AF_IUCV)
- data_offset = 0;
- else
- data_offset = ETH_HLEN;
+ data_offset = ETH_HLEN;
hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
if (!hdr)
goto tx_drop;
diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c
index 74a307c0a240..8f1c58d4d5b5 100644
--- a/drivers/scsi/bfa/bfad_debugfs.c
+++ b/drivers/scsi/bfa/bfad_debugfs.c
@@ -254,7 +254,8 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf,
struct bfad_s *bfad = port->bfad;
struct bfa_s *bfa = &bfad->bfa;
struct bfa_ioc_s *ioc = &bfa->ioc;
- int addr, len, rc, i;
+ int addr, rc, i;
+ u32 len;
u32 *regbuf;
void __iomem *rb, *reg_addr;
unsigned long flags;
@@ -265,7 +266,7 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf,
return PTR_ERR(kern_buf);
rc = sscanf(kern_buf, "%x:%x", &addr, &len);
- if (rc < 2) {
+ if (rc < 2 || len > (UINT_MAX >> 2)) {
printk(KERN_INFO
"bfad[%d]: %s failed to read user buf\n",
bfad->inst_no, __func__);
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 804806e1cbb4..7a48905b8195 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -1339,6 +1339,7 @@ static void release_offload_resources(struct cxgbi_sock *csk)
csk, csk->state, csk->flags, csk->tid);
cxgbi_sock_free_cpl_skbs(csk);
+ cxgbi_sock_purge_write_queue(csk);
if (csk->wr_cred != csk->wr_max_cred) {
cxgbi_sock_purge_wr_queue(csk);
cxgbi_sock_reset_wr_list(csk);
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index e9ce74afd13f..0c87f341fed4 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3466,7 +3466,7 @@ exit_failed:
* # (integer code indicating one of several NOT READY states
* describing why a volume is to be kept offline)
*/
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
unsigned char scsi3addr[])
{
struct CommandList *c;
@@ -3486,7 +3486,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
if (rc) {
cmd_free(h, c);
- return 0;
+ return HPSA_VPD_LV_STATUS_UNSUPPORTED;
}
sense = c->err_info->SenseInfo;
if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3497,19 +3497,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
cmd_status = c->err_info->CommandStatus;
scsi_status = c->err_info->ScsiStatus;
cmd_free(h, c);
- /* Is the volume 'not ready'? */
- if (cmd_status != CMD_TARGET_STATUS ||
- scsi_status != SAM_STAT_CHECK_CONDITION ||
- sense_key != NOT_READY ||
- asc != ASC_LUN_NOT_READY) {
- return 0;
- }
/* Determine the reason for not ready state */
ldstat = hpsa_get_volume_status(h, scsi3addr);
/* Keep volume offline in certain cases: */
switch (ldstat) {
+ case HPSA_LV_FAILED:
case HPSA_LV_UNDERGOING_ERASE:
case HPSA_LV_NOT_AVAILABLE:
case HPSA_LV_UNDERGOING_RPI:
@@ -3531,7 +3525,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
default:
break;
}
- return 0;
+ return HPSA_LV_OK;
}
/*
@@ -3615,10 +3609,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
/* Do an inquiry to the device to see what it is. */
if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
- /* Inquiry failed (msg printed already) */
dev_err(&h->pdev->dev,
- "hpsa_update_device_info: inquiry failed\n");
- rc = -EIO;
+ "%s: inquiry failed, device will be skipped.\n",
+ __func__);
+ rc = HPSA_INQUIRY_FAILED;
goto bail_out;
}
@@ -3638,15 +3632,19 @@ static int hpsa_update_device_info(struct ctlr_info *h,
if (this_device->devtype == TYPE_DISK &&
is_logical_dev_addr_mode(scsi3addr)) {
- int volume_offline;
+ unsigned char volume_offline;
hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
hpsa_get_ioaccel_status(h, scsi3addr, this_device);
volume_offline = hpsa_volume_offline(h, scsi3addr);
- if (volume_offline < 0 || volume_offline > 0xff)
- volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
- this_device->volume_offline = volume_offline & 0xff;
+ if (volume_offline == HPSA_LV_FAILED) {
+ rc = HPSA_LV_FAILED;
+ dev_err(&h->pdev->dev,
+ "%s: LV failed, device will be skipped.\n",
+ __func__);
+ goto bail_out;
+ }
} else {
this_device->raid_level = RAID_UNKNOWN;
this_device->offload_config = 0;
@@ -4115,8 +4113,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
goto out;
}
if (rc) {
- dev_warn(&h->pdev->dev,
- "Inquiry failed, skipping device.\n");
+ h->drv_req_rescan = 1;
continue;
}
@@ -5257,7 +5254,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
spin_lock_irqsave(&h->scan_lock, flags);
h->scan_finished = 1;
- wake_up_all(&h->scan_wait_queue);
+ wake_up(&h->scan_wait_queue);
spin_unlock_irqrestore(&h->scan_lock, flags);
}
@@ -5275,11 +5272,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
if (unlikely(lockup_detected(h)))
return hpsa_scan_complete(h);
+ /*
+ * If a scan is already waiting to run, no need to add another
+ */
+ spin_lock_irqsave(&h->scan_lock, flags);
+ if (h->scan_waiting) {
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+
/* wait until any scan already in progress is finished. */
while (1) {
spin_lock_irqsave(&h->scan_lock, flags);
if (h->scan_finished)
break;
+ h->scan_waiting = 1;
spin_unlock_irqrestore(&h->scan_lock, flags);
wait_event(h->scan_wait_queue, h->scan_finished);
/* Note: We don't need to worry about a race between this
@@ -5289,6 +5298,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
*/
}
h->scan_finished = 0; /* mark scan as in progress */
+ h->scan_waiting = 0;
spin_unlock_irqrestore(&h->scan_lock, flags);
if (unlikely(lockup_detected(h)))
@@ -8505,6 +8515,7 @@ reinit_after_soft_reset:
init_waitqueue_head(&h->event_sync_wait_queue);
mutex_init(&h->reset_mutex);
h->scan_finished = 1; /* no scan currently in progress */
+ h->scan_waiting = 0;
pci_set_drvdata(pdev, h);
h->ndevices = 0;
@@ -8797,6 +8808,8 @@ static void hpsa_remove_one(struct pci_dev *pdev)
destroy_workqueue(h->rescan_ctlr_wq);
destroy_workqueue(h->resubmit_wq);
+ hpsa_delete_sas_host(h);
+
/*
* Call before disabling interrupts.
* scsi_remove_host can trigger I/O operations especially
@@ -8831,8 +8844,6 @@ static void hpsa_remove_one(struct pci_dev *pdev)
h->lockup_detected = NULL; /* init_one 2 */
/* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */
- hpsa_delete_sas_host(h);
-
kfree(h); /* init_one 1 */
}
@@ -9324,9 +9335,9 @@ static void hpsa_free_sas_phy(struct hpsa_sas_phy *hpsa_sas_phy)
struct sas_phy *phy = hpsa_sas_phy->phy;
sas_port_delete_phy(hpsa_sas_phy->parent_port->port, phy);
- sas_phy_free(phy);
if (hpsa_sas_phy->added_to_port)
list_del(&hpsa_sas_phy->phy_list_entry);
+ sas_phy_delete(phy);
kfree(hpsa_sas_phy);
}
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index ae5beda1bdb5..0e602750487a 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -200,6 +200,7 @@ struct ctlr_info {
dma_addr_t errinfo_pool_dhandle;
unsigned long *cmd_pool_bits;
int scan_finished;
+ u8 scan_waiting : 1;
spinlock_t scan_lock;
wait_queue_head_t scan_wait_queue;
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index d92ef0d352b5..26488e2a7f02 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -155,6 +155,7 @@
#define CFGTBL_BusType_Fibre2G 0x00000200l
/* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED 0x02
#define HPSA_VPD_SUPPORTED_PAGES 0x00
#define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1
#define HPSA_VPD_LV_IOACCEL_STATUS 0xC2
@@ -164,6 +165,7 @@
/* Logical volume states */
#define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff
#define HPSA_LV_OK 0x0
+#define HPSA_LV_FAILED 0x01
#define HPSA_LV_NOT_AVAILABLE 0x0b
#define HPSA_LV_UNDERGOING_ERASE 0x0F
#define HPSA_LV_UNDERGOING_RPI 0x12
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index d278362448ca..fd8fe1202dbe 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -7491,7 +7491,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
did, vport->port_state, ndlp->nlp_flag);
phba->fc_stat.elsRcvPRLI++;
- if (vport->port_state < LPFC_DISC_AUTH) {
+ if ((vport->port_state < LPFC_DISC_AUTH) &&
+ (vport->fc_flag & FC_FABRIC)) {
rjt_err = LSRJT_UNABLE_TPC;
rjt_exp = LSEXP_NOTHING_MORE;
break;
@@ -7887,11 +7888,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
spin_lock_irq(shost->host_lock);
vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
spin_unlock_irq(shost->host_lock);
- if (vport->port_type == LPFC_PHYSICAL_PORT
- && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
- lpfc_issue_init_vfi(vport);
- else
+ if (mb->mbxStatus == MBX_NOT_FINISHED)
+ break;
+ if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+ !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ lpfc_issue_init_vfi(vport);
+ else
+ lpfc_initial_flogi(vport);
+ } else {
lpfc_initial_fdisc(vport);
+ }
break;
}
} else {
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d3668aa555d5..be901f6db6d3 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4777,7 +4777,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
lpfc_cancel_retry_delay_tmo(vport, ndlp);
if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
!(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) &&
- !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
+ !(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
+ phba->sli_rev != LPFC_SLI_REV4) {
/* For this case we need to cleanup the default rpi
* allocated by the firmware.
*/
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index f224cdb2fce4..507869bc0673 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3180,7 +3180,7 @@ struct lpfc_mbx_get_port_name {
#define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4
#define MB_CQE_STATUS_DMA_FAILED 0x5
-#define LPFC_MBX_WR_CONFIG_MAX_BDE 8
+#define LPFC_MBX_WR_CONFIG_MAX_BDE 1
struct lpfc_mbx_wr_object {
struct mbox_header header;
union {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index e333029e4b6c..e111c3d8c5d6 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4588,6 +4588,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
} else if (log_info == VIRTUAL_IO_FAILED_RETRY) {
scmd->result = DID_RESET << 16;
break;
+ } else if ((scmd->device->channel == RAID_CHANNEL) &&
+ (scsi_state == (MPI2_SCSI_STATE_TERMINATED |
+ MPI2_SCSI_STATE_NO_SCSI_STATUS))) {
+ scmd->result = DID_RESET << 16;
+ break;
}
scmd->result = DID_SOFT_ERROR << 16;
break;
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 11cdb172cfaf..60720e5b1ebc 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -160,7 +160,7 @@ static struct {
{"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, storage on LUN 0 */
{"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, no storage on LUN 0 */
{"EMC", "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
- {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN},
+ {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2},
{"EMULEX", "MD21/S2 ESDI", NULL, BLIST_SINGLELUN},
{"easyRAID", "16P", NULL, BLIST_NOREPORTLUN},
{"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN},
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7e1681cf287c..dd72205ba298 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -233,11 +233,15 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr,
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
+ bool v;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- sdp->manage_start_stop = simple_strtoul(buf, NULL, 10);
+ if (kstrtobool(buf, &v))
+ return -EINVAL;
+
+ sdp->manage_start_stop = v;
return count;
}
@@ -255,6 +259,7 @@ static ssize_t
allow_restart_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
+ bool v;
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
@@ -264,7 +269,10 @@ allow_restart_store(struct device *dev, struct device_attribute *attr,
if (sdp->type != TYPE_DISK)
return -EINVAL;
- sdp->allow_restart = simple_strtoul(buf, NULL, 10);
+ if (kstrtobool(buf, &v))
+ return -EINVAL;
+
+ sdp->allow_restart = v;
return count;
}
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 6df2841cb7f9..5e4e1ba96f10 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -379,8 +379,6 @@ MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
*/
static int storvsc_timeout = 180;
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
static void storvsc_on_channel_callback(void *context);
@@ -1241,6 +1239,22 @@ static int storvsc_do_io(struct hv_device *device,
return ret;
}
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+ /*
+ * Set blist flag to permit the reading of the VPD pages even when
+ * the target may claim SPC-2 compliance. MSFT targets currently
+ * claim SPC-2 compliance while they implement post SPC-2 features.
+ * With this flag we can correctly handle WRITE_SAME_16 issues.
+ *
+ * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+ * still supports REPORT LUN.
+ */
+ sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+ return 0;
+}
+
static int storvsc_device_configure(struct scsi_device *sdevice)
{
@@ -1256,14 +1270,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
sdevice->no_write_same = 1;
/*
- * Add blist flags to permit the reading of the VPD pages even when
- * the target may claim SPC-2 compliance. MSFT targets currently
- * claim SPC-2 compliance while they implement post SPC-2 features.
- * With this patch we can correctly handle WRITE_SAME_16 issues.
- */
- sdevice->sdev_bflags |= msft_blist_flags;
-
- /*
* If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
* if the device is a MSFT virtual device. If the host is
* WIN10 or newer, allow write_same.
@@ -1529,6 +1535,7 @@ static struct scsi_host_template scsi_driver = {
.eh_host_reset_handler = storvsc_host_reset_handler,
.proc_name = "storvsc_host",
.eh_timed_out = storvsc_eh_timed_out,
+ .slave_alloc = storvsc_device_alloc,
.slave_configure = storvsc_device_configure,
.cmd_per_lun = 255,
.this_id = -1,
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1b9008cab6eb..4bc2ee1f42bf 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3207,10 +3207,10 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
completion = ktime_get();
delta_us = ktime_us_delta(completion,
req->lat_hist_io_start);
- /* rq_data_dir() => true if WRITE */
- blk_update_latency_hist(&hba->io_lat_s,
- (rq_data_dir(req) == READ),
- delta_us);
+ blk_update_latency_hist(
+ (rq_data_dir(req) == READ) ?
+ &hba->io_lat_read :
+ &hba->io_lat_write, delta_us);
}
}
/* Do not touch lrbp after scsi done */
@@ -5382,9 +5382,10 @@ latency_hist_store(struct device *dev, struct device_attribute *attr,
if (kstrtol(buf, 0, &value))
return -EINVAL;
- if (value == BLK_IO_LAT_HIST_ZERO)
- blk_zero_latency_hist(&hba->io_lat_s);
- else if (value == BLK_IO_LAT_HIST_ENABLE ||
+ if (value == BLK_IO_LAT_HIST_ZERO) {
+ memset(&hba->io_lat_read, 0, sizeof(hba->io_lat_read));
+ memset(&hba->io_lat_write, 0, sizeof(hba->io_lat_write));
+ } else if (value == BLK_IO_LAT_HIST_ENABLE ||
value == BLK_IO_LAT_HIST_DISABLE)
hba->latency_hist_enabled = value;
return count;
@@ -5395,8 +5396,14 @@ latency_hist_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
+ size_t written_bytes;
- return blk_latency_hist_show(&hba->io_lat_s, buf);
+ written_bytes = blk_latency_hist_show("Read", &hba->io_lat_read,
+ buf, PAGE_SIZE);
+ written_bytes += blk_latency_hist_show("Write", &hba->io_lat_write,
+ buf + written_bytes, PAGE_SIZE - written_bytes);
+
+ return written_bytes;
}
static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index b74eedbd831f..c1310ead0c2a 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -542,7 +542,8 @@ struct ufs_hba {
bool is_sys_suspended;
int latency_hist_enabled;
- struct io_latency_state io_lat_s;
+ struct io_latency_state io_lat_read;
+ struct io_latency_state io_lat_write;
};
/* Returns true if clocks can be gated. Otherwise false */
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b0a24dedd1ed..8b9c2a38d1cc 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -315,7 +315,6 @@ config SPI_FSL_SPI
config SPI_FSL_DSPI
tristate "Freescale DSPI controller"
select REGMAP_MMIO
- depends on HAS_DMA
depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
help
This enables support for the Freescale DSPI controller in master
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index d22de4c8c399..3de39bd794b6 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -863,7 +863,7 @@ static int sh_msiof_transfer_one(struct spi_master *master,
break;
copy32 = copy_bswap32;
} else if (bits <= 16) {
- if (l & 1)
+ if (l & 3)
break;
copy32 = copy_wswap32;
} else {
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index 3009121173cd..3c6ea5c3ddd2 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
while (remaining_words) {
int n_words, tx_words, rx_words;
u32 sr;
+ int stalled;
n_words = min(remaining_words, xspi->buffer_size);
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
/* Read out all the data from the Rx FIFO */
rx_words = n_words;
+ stalled = 10;
while (rx_words) {
+ if (rx_words == n_words && !(stalled--) &&
+ !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+ (sr & XSPI_SR_RX_EMPTY_MASK)) {
+ dev_err(&spi->dev,
+ "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+ xspi_init_hw(xspi);
+ return -EIO;
+ }
+
if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
xilinx_spi_rx(xspi);
rx_words--;
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 7df978371c9a..44fffbd1bc74 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -402,15 +402,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
result = VM_FAULT_LOCKED;
break;
case -ENODATA:
+ case -EAGAIN:
case -EFAULT:
result = VM_FAULT_NOPAGE;
break;
case -ENOMEM:
result = VM_FAULT_OOM;
break;
- case -EAGAIN:
- result = VM_FAULT_RETRY;
- break;
default:
result = VM_FAULT_SIGBUS;
break;
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index fefbf826c622..8fd8f3a2d1bf 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1693,10 +1693,11 @@ static int vt6655_suspend(struct pci_dev *pcid, pm_message_t state)
MACbShutdown(priv->PortOffset);
pci_disable_device(pcid);
- pci_set_power_state(pcid, pci_choose_state(pcid, state));
spin_unlock_irqrestore(&priv->lock, flags);
+ pci_set_power_state(pcid, pci_choose_state(pcid, state));
+
return 0;
}
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index bb73401f5761..58fe27705b96 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -674,6 +674,7 @@ static int iscsit_add_reject_from_cmd(
unsigned char *buf)
{
struct iscsi_conn *conn;
+ const bool do_put = cmd->se_cmd.se_tfo != NULL;
if (!cmd->conn) {
pr_err("cmd->conn is NULL for ITT: 0x%08x\n",
@@ -704,7 +705,7 @@ static int iscsit_add_reject_from_cmd(
* Perform the kref_put now if se_cmd has already been setup by
* scsit_setup_scsi_cmd()
*/
- if (cmd->se_cmd.se_tfo != NULL) {
+ if (do_put) {
pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n");
target_put_sess_cmd(&cmd->se_cmd);
}
@@ -1758,7 +1759,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct iscsi_tmr_req *tmr_req;
struct iscsi_tm *hdr;
int out_of_order_cmdsn = 0, ret;
- bool sess_ref = false;
u8 function, tcm_function = TMR_UNKNOWN;
hdr = (struct iscsi_tm *) buf;
@@ -1800,18 +1800,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
buf);
}
+ transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
+ conn->sess->se_sess, 0, DMA_NONE,
+ TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
+
+ target_get_sess_cmd(&cmd->se_cmd, true);
+
/*
* TASK_REASSIGN for ERL=2 / connection stays inside of
* LIO-Target $FABRIC_MOD
*/
if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
- transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
- conn->sess->se_sess, 0, DMA_NONE,
- TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
-
- target_get_sess_cmd(&cmd->se_cmd, true);
- sess_ref = true;
-
switch (function) {
case ISCSI_TM_FUNC_ABORT_TASK:
tcm_function = TMR_ABORT_TASK;
@@ -1950,12 +1949,8 @@ attach:
* For connection recovery, this is also the default action for
* TMR TASK_REASSIGN.
*/
- if (sess_ref) {
- pr_debug("Handle TMR, using sess_ref=true check\n");
- target_put_sess_cmd(&cmd->se_cmd);
- }
-
iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ target_put_sess_cmd(&cmd->se_cmd);
return 0;
}
EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 634ad3662ed6..8c49bc3dcc8c 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1210,7 +1210,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg(
ret = core_tpg_register(wwn, &tpg->tpg_se_tpg, SCSI_PROTOCOL_ISCSI);
if (ret < 0)
- return NULL;
+ goto free_out;
ret = iscsit_tpg_add_portal_group(tiqn, tpg);
if (ret != 0)
@@ -1222,6 +1222,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg(
return &tpg->tpg_se_tpg;
out:
core_tpg_deregister(&tpg->tpg_se_tpg);
+free_out:
kfree(tpg);
return NULL;
}
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 49aba4a31747..1fe782f9ee81 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1010,7 +1010,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
{
struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
- struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
+ struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
@@ -1073,17 +1073,8 @@ static int core_alua_do_transition_tg_pt(
/*
* Flush any pending transitions
*/
- if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
- atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
- ALUA_ACCESS_STATE_TRANSITION) {
- /* Just in case */
- tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
- tg_pt_gp->tg_pt_gp_transition_complete = &wait;
- flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
- wait_for_completion(&wait);
- tg_pt_gp->tg_pt_gp_transition_complete = NULL;
- return 0;
- }
+ if (!explicit)
+ flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Save the old primary ALUA access state, and set the current state
@@ -1114,17 +1105,9 @@ static int core_alua_do_transition_tg_pt(
atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
- if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
- unsigned long transition_tmo;
-
- transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
- queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
- &tg_pt_gp->tg_pt_gp_transition_work,
- transition_tmo);
- } else {
+ schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
+ if (explicit) {
tg_pt_gp->tg_pt_gp_transition_complete = &wait;
- queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
- &tg_pt_gp->tg_pt_gp_transition_work, 0);
wait_for_completion(&wait);
tg_pt_gp->tg_pt_gp_transition_complete = NULL;
}
@@ -1692,8 +1675,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
- INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
- core_alua_do_transition_tg_pt_work);
+ INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
+ core_alua_do_transition_tg_pt_work);
tg_pt_gp->tg_pt_gp_dev = dev;
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
@@ -1801,7 +1784,7 @@ void core_alua_free_tg_pt_gp(
dev->t10_alua.alua_tg_pt_gps_counter--;
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
- flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+ flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Allow a struct t10_alua_tg_pt_gp_member * referenced by
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 041a56987845..2e35db7f4aac 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -466,6 +466,10 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
struct inode *inode = file->f_mapping->host;
int ret;
+ if (!nolb) {
+ return 0;
+ }
+
if (cmd->se_dev->dev_attrib.pi_prot_type) {
ret = fd_do_prot_unmap(cmd, lba, nolb);
if (ret)
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index e7933115087a..e38b4582d43e 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -56,8 +56,10 @@ void core_pr_dump_initiator_port(
char *buf,
u32 size)
{
- if (!pr_reg->isid_present_at_reg)
+ if (!pr_reg->isid_present_at_reg) {
buf[0] = '\0';
+ return;
+ }
snprintf(buf, size, ",i,0x%s", pr_reg->pr_reg_isid);
}
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index c9be953496ec..e926dd52b6b5 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
spin_unlock(&se_cmd->t_state_lock);
return false;
}
+ if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) {
+ if (se_cmd->scsi_status) {
+ pr_debug("Attempted to abort io tag: %llu early failure"
+ " status: 0x%02x\n", se_cmd->tag,
+ se_cmd->scsi_status);
+ spin_unlock(&se_cmd->t_state_lock);
+ return false;
+ }
+ }
if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
pr_debug("Attempted to abort io tag: %llu already shutdown,"
" skipping\n", se_cmd->tag);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 37abf881ca75..21f888ac550e 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1933,6 +1933,7 @@ void target_execute_cmd(struct se_cmd *cmd)
}
cmd->t_state = TRANSPORT_PROCESSING;
+ cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
spin_unlock_irq(&cmd->t_state_lock);
@@ -2572,6 +2573,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
ret = -ESHUTDOWN;
goto out;
}
+ se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
out:
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig
index 2330a4eb4e8b..a6df12d88f90 100644
--- a/drivers/tee/Kconfig
+++ b/drivers/tee/Kconfig
@@ -1,6 +1,7 @@
# Generic Trusted Execution Environment Configuration
config TEE
tristate "Trusted Execution Environment support"
+ depends on HAVE_ARM_SMCCC || COMPILE_TEST
select DMA_SHARED_BUFFER
select GENERIC_ALLOCATOR
help
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index 18c8c0a50d37..edb6e4e9ef3a 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -224,13 +224,14 @@ static void optee_release(struct tee_context *ctx)
if (!IS_ERR(shm)) {
arg = tee_shm_get_va(shm, 0);
/*
- * If va2pa fails for some reason, we can't call
- * optee_close_session(), only free the memory. Secure OS
- * will leak sessions and finally refuse more sessions, but
- * we will at least let normal world reclaim its memory.
+ * If va2pa fails for some reason, we can't call into
+ * secure world, only free the memory. Secure OS will leak
+ * sessions and finally refuse more sessions, but we will
+ * at least let normal world reclaim its memory.
*/
if (!IS_ERR(arg))
- tee_shm_va2pa(shm, arg, &parg);
+ if (tee_shm_va2pa(shm, arg, &parg))
+ arg = NULL; /* prevent usage of parg below */
}
list_for_each_entry_safe(sess, sess_tmp, &ctxdata->sess_list,
@@ -258,7 +259,7 @@ static void optee_release(struct tee_context *ctx)
}
}
-static struct tee_driver_ops optee_ops = {
+static const struct tee_driver_ops optee_ops = {
.get_version = optee_get_version,
.open = optee_open,
.release = optee_release,
@@ -268,13 +269,13 @@ static struct tee_driver_ops optee_ops = {
.cancel_req = optee_cancel_req,
};
-static struct tee_desc optee_desc = {
+static const struct tee_desc optee_desc = {
.name = DRIVER_NAME "-clnt",
.ops = &optee_ops,
.owner = THIS_MODULE,
};
-static struct tee_driver_ops optee_supp_ops = {
+static const struct tee_driver_ops optee_supp_ops = {
.get_version = optee_get_version,
.open = optee_open,
.release = optee_release,
@@ -282,7 +283,7 @@ static struct tee_driver_ops optee_supp_ops = {
.supp_send = optee_supp_send,
};
-static struct tee_desc optee_supp_desc = {
+static const struct tee_desc optee_supp_desc = {
.name = DRIVER_NAME "-supp",
.ops = &optee_supp_ops,
.owner = THIS_MODULE,
diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h
index 13b7c98cdf25..069c8e1429de 100644
--- a/drivers/tee/optee/optee_smc.h
+++ b/drivers/tee/optee/optee_smc.h
@@ -298,7 +298,7 @@ struct optee_smc_disable_shm_cache_result {
OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE)
/*
- * Resume from RPC (for example after processing an IRQ)
+ * Resume from RPC (for example after processing a foreign interrupt)
*
* Call register usage:
* a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC
@@ -383,19 +383,19 @@ struct optee_smc_disable_shm_cache_result {
OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE)
/*
- * Deliver an IRQ in normal world.
+ * Deliver foreign interrupt to normal world.
*
* "Call" register usage:
- * a0 OPTEE_SMC_RETURN_RPC_IRQ
+ * a0 OPTEE_SMC_RETURN_RPC_FOREIGN_INTR
* a1-7 Resume information, must be preserved
*
* "Return" register usage:
* a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
* a1-7 Preserved
*/
-#define OPTEE_SMC_RPC_FUNC_IRQ 4
-#define OPTEE_SMC_RETURN_RPC_IRQ \
- OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_IRQ)
+#define OPTEE_SMC_RPC_FUNC_FOREIGN_INTR 4
+#define OPTEE_SMC_RETURN_RPC_FOREIGN_INTR \
+ OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FOREIGN_INTR)
/*
* Do an RPC request. The supplied struct optee_msg_arg tells which
diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c
index 8c1ab49c83c3..9577e22994e3 100644
--- a/drivers/tee/optee/rpc.c
+++ b/drivers/tee/optee/rpc.c
@@ -141,11 +141,8 @@ static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg)
msec_to_wait = arg->params[0].u.value.a;
- /* set task's state to interruptible sleep */
- set_current_state(TASK_INTERRUPTIBLE);
-
- /* take a nap */
- msleep(msec_to_wait);
+ /* Go to interruptible sleep */
+ msleep_interruptible(msec_to_wait);
arg->ret = TEEC_SUCCESS;
return;
@@ -375,11 +372,11 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param)
shm = reg_pair_to_ptr(param->a1, param->a2);
tee_shm_free(shm);
break;
- case OPTEE_SMC_RPC_FUNC_IRQ:
+ case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR:
/*
- * An IRQ was raised while secure world was executing,
- * since all IRQs are handled in Linux a dummy RPC is
- * performed to let Linux take the IRQ through the normal
+ * A foreign interrupt was raised while secure world was
+ * executing, since they are handled in Linux a dummy RPC is
+ * performed to let Linux take the interrupt through the normal
* vector.
*/
break;
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 5c60bf4423e6..58a5009eacc3 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -90,8 +90,13 @@ static int tee_ioctl_version(struct tee_context *ctx,
struct tee_ioctl_version_data vers;
ctx->teedev->desc->ops->get_version(ctx->teedev, &vers);
+
+ if (ctx->teedev->desc->flags & TEE_DESC_PRIVILEGED)
+ vers.gen_caps |= TEE_GEN_CAP_PRIVILEGED;
+
if (copy_to_user(uvers, &vers, sizeof(vers)))
return -EFAULT;
+
return 0;
}
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 0be1e3e93bee..37207ae2de7b 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -80,7 +80,7 @@ static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
size, vma->vm_page_prot);
}
-static struct dma_buf_ops tee_shm_dma_buf_ops = {
+static const struct dma_buf_ops tee_shm_dma_buf_ops = {
.map_dma_buf = tee_shm_op_map_dma_buf,
.unmap_dma_buf = tee_shm_op_unmap_dma_buf,
.release = tee_shm_op_release,
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 36d07295f8e3..a56f6cac6fc5 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -389,8 +389,11 @@ static int hisi_thermal_suspend(struct device *dev)
static int hisi_thermal_resume(struct device *dev)
{
struct hisi_thermal_data *data = dev_get_drvdata(dev);
+ int ret;
- clk_prepare_enable(data->clk);
+ ret = clk_prepare_enable(data->clk);
+ if (ret)
+ return ret;
data->irq_enabled = true;
hisi_thermal_enable_bind_irq_sensor(data);
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index ea9366ad3e6b..7814d18e8940 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -31,8 +31,7 @@
* If the temperature is higher than a trip point,
* a. if the trend is THERMAL_TREND_RAISING, use higher cooling
* state for this trip point
- * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
- * state for this trip point
+ * b. if the trend is THERMAL_TREND_DROPPING, do nothing
* c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit
* for this trip point
* d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit
@@ -94,9 +93,11 @@ static unsigned long get_target_state(struct thermal_instance *instance,
if (!throttle)
next_target = THERMAL_NO_TARGET;
} else {
- next_target = cur_state - 1;
- if (next_target > instance->upper)
- next_target = instance->upper;
+ if (!throttle) {
+ next_target = cur_state - 1;
+ if (next_target > instance->upper)
+ next_target = instance->upper;
+ }
}
break;
case THERMAL_TREND_DROP_FULL:
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 84e71bd19082..41dda25da049 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1801,7 +1801,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
{
struct n_tty_data *ldata = tty->disc_data;
- if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
+ if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
ldata->line_start = ldata->read_tail;
if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2493,7 +2493,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
case TIOCINQ:
down_write(&tty->termios_rwsem);
- if (L_ICANON(tty))
+ if (L_ICANON(tty) && !L_EXTPROC(tty))
retval = inq_canon(ldata);
else
retval = read_cnt(ldata);
diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c
index 89474399ab89..1d5a9e5fb069 100644
--- a/drivers/tty/serial/8250/8250_fintek.c
+++ b/drivers/tty/serial/8250/8250_fintek.c
@@ -117,7 +117,7 @@ static int fintek_8250_rs485_config(struct uart_port *port,
if ((!!(rs485->flags & SER_RS485_RTS_ON_SEND)) ==
(!!(rs485->flags & SER_RS485_RTS_AFTER_SEND)))
- rs485->flags &= SER_RS485_ENABLED;
+ rs485->flags &= ~SER_RS485_ENABLED;
else
config |= RS485_URA;
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index cf3da51a3536..7025f47fa284 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5797,6 +5797,9 @@ static struct pci_device_id serial_pci_tbl[] = {
{ PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 },
{ PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 },
+ /* Amazon PCI serial device */
+ { PCI_DEVICE(0x1d0f, 0x8250), .driver_data = pbn_b0_1_115200 },
+
/*
* These entries match devices with class COMMUNICATION_SERIAL,
* COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 43f49386cde4..56343cca2f38 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2223,8 +2223,11 @@ static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
serial_dl_write(up, quot);
/* XR17V35x UARTs have an extra fractional divisor register (DLD) */
- if (up->port.type == PORT_XR17V35X)
+ if (up->port.type == PORT_XR17V35X) {
+ /* Preserve bits not related to baudrate; DLD[7:4]. */
+ quot_frac |= serial_port_in(port, 0x2) & 0xf0;
serial_port_out(port, 0x2, quot_frac);
+ }
}
static unsigned int
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 1fa4128eb88e..ed27fda13387 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -133,6 +133,12 @@ static void sysrq_handle_crash(int key)
{
char *killer = NULL;
+ /* we need to release the RCU read lock here,
+ * otherwise we get an annoying
+ * 'BUG: sleeping function called from invalid context'
+ * complaint from the kernel before the panic.
+ */
+ rcu_read_unlock();
panic_on_oops = 1; /* force panic */
wmb();
*killer = 1;
@@ -237,8 +243,10 @@ static void sysrq_handle_showallcpus(int key)
* architecture has no support for it:
*/
if (!trigger_all_cpu_backtrace()) {
- struct pt_regs *regs = get_irq_regs();
+ struct pt_regs *regs = NULL;
+ if (in_irq())
+ regs = get_irq_regs();
if (regs) {
pr_info("CPU%d:\n", smp_processor_id());
show_regs(regs);
@@ -257,7 +265,10 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
static void sysrq_handle_showregs(int key)
{
- struct pt_regs *regs = get_irq_regs();
+ struct pt_regs *regs = NULL;
+
+ if (in_irq())
+ regs = get_irq_regs();
if (regs)
show_regs(regs);
perf_event_print_debug();
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 5172bec612eb..22dcccf2d286 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -521,6 +521,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
unsigned iad_num = 0;
memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
+ nintf = nintf_orig = config->desc.bNumInterfaces;
+ config->desc.bNumInterfaces = 0; // Adjusted later
+
if (config->desc.bDescriptorType != USB_DT_CONFIG ||
config->desc.bLength < USB_DT_CONFIG_SIZE ||
config->desc.bLength > size) {
@@ -534,7 +537,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
buffer += config->desc.bLength;
size -= config->desc.bLength;
- nintf = nintf_orig = config->desc.bNumInterfaces;
if (nintf > USB_MAXINTERFACES) {
dev_warn(ddev, "config %d has too many interfaces: %d, "
"using maximum allowed: %d\n",
@@ -871,14 +873,25 @@ void usb_release_bos_descriptor(struct usb_device *dev)
}
}
+static const __u8 bos_desc_len[256] = {
+ [USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE,
+ [USB_CAP_TYPE_EXT] = USB_DT_USB_EXT_CAP_SIZE,
+ [USB_SS_CAP_TYPE] = USB_DT_USB_SS_CAP_SIZE,
+ [USB_SSP_CAP_TYPE] = USB_DT_USB_SSP_CAP_SIZE(1),
+ [CONTAINER_ID_TYPE] = USB_DT_USB_SS_CONTN_ID_SIZE,
+ [USB_PTM_CAP_TYPE] = USB_DT_USB_PTM_ID_SIZE,
+};
+
/* Get BOS descriptor set */
int usb_get_bos_descriptor(struct usb_device *dev)
{
struct device *ddev = &dev->dev;
struct usb_bos_descriptor *bos;
struct usb_dev_cap_header *cap;
+ struct usb_ssp_cap_descriptor *ssp_cap;
unsigned char *buffer;
- int length, total_len, num, i;
+ int length, total_len, num, i, ssac;
+ __u8 cap_type;
int ret;
bos = kzalloc(sizeof(struct usb_bos_descriptor), GFP_KERNEL);
@@ -931,7 +944,13 @@ int usb_get_bos_descriptor(struct usb_device *dev)
dev->bos->desc->bNumDeviceCaps = i;
break;
}
+ cap_type = cap->bDevCapabilityType;
length = cap->bLength;
+ if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) {
+ dev->bos->desc->bNumDeviceCaps = i;
+ break;
+ }
+
total_len -= length;
if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
@@ -939,7 +958,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
continue;
}
- switch (cap->bDevCapabilityType) {
+ switch (cap_type) {
case USB_CAP_TYPE_WIRELESS_USB:
/* Wireless USB cap descriptor is handled by wusb */
break;
@@ -952,13 +971,19 @@ int usb_get_bos_descriptor(struct usb_device *dev)
(struct usb_ss_cap_descriptor *)buffer;
break;
case USB_SSP_CAP_TYPE:
- dev->bos->ssp_cap =
- (struct usb_ssp_cap_descriptor *)buffer;
+ ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
+ ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
+ USB_SSP_SUBLINK_SPEED_ATTRIBS);
+ if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
+ dev->bos->ssp_cap = ssp_cap;
break;
case CONTAINER_ID_TYPE:
dev->bos->ss_id =
(struct usb_ss_container_id_descriptor *)buffer;
break;
+ case USB_PTM_CAP_TYPE:
+ dev->bos->ptm_cap =
+ (struct usb_ptm_cap_descriptor *)buffer;
default:
break;
}
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index f4c3a37e00ba..ad2e6d235c30 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -113,42 +113,38 @@ enum snoop_when {
#define USB_DEVICE_DEV MKDEV(USB_DEVICE_MAJOR, 0)
/* Limit on the total amount of memory we can allocate for transfers */
-static unsigned usbfs_memory_mb = 16;
+static u32 usbfs_memory_mb = 16;
module_param(usbfs_memory_mb, uint, 0644);
MODULE_PARM_DESC(usbfs_memory_mb,
"maximum MB allowed for usbfs buffers (0 = no limit)");
/* Hard limit, necessary to avoid arithmetic overflow */
-#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000)
+#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000)
-static atomic_t usbfs_memory_usage; /* Total memory currently allocated */
+static atomic64_t usbfs_memory_usage; /* Total memory currently allocated */
/* Check whether it's okay to allocate more memory for a transfer */
-static int usbfs_increase_memory_usage(unsigned amount)
+static int usbfs_increase_memory_usage(u64 amount)
{
- unsigned lim;
+ u64 lim;
- /*
- * Convert usbfs_memory_mb to bytes, avoiding overflows.
- * 0 means use the hard limit (effectively unlimited).
- */
lim = ACCESS_ONCE(usbfs_memory_mb);
- if (lim == 0 || lim > (USBFS_XFER_MAX >> 20))
- lim = USBFS_XFER_MAX;
- else
- lim <<= 20;
+ lim <<= 20;
- atomic_add(amount, &usbfs_memory_usage);
- if (atomic_read(&usbfs_memory_usage) <= lim)
- return 0;
- atomic_sub(amount, &usbfs_memory_usage);
- return -ENOMEM;
+ atomic64_add(amount, &usbfs_memory_usage);
+
+ if (lim > 0 && atomic64_read(&usbfs_memory_usage) > lim) {
+ atomic64_sub(amount, &usbfs_memory_usage);
+ return -ENOMEM;
+ }
+
+ return 0;
}
/* Memory for a transfer is being deallocated */
-static void usbfs_decrease_memory_usage(unsigned amount)
+static void usbfs_decrease_memory_usage(u64 amount)
{
- atomic_sub(amount, &usbfs_memory_usage);
+ atomic64_sub(amount, &usbfs_memory_usage);
}
static int connected(struct usb_dev_state *ps)
@@ -1077,7 +1073,7 @@ static int proc_bulk(struct usb_dev_state *ps, void __user *arg)
if (!usb_maxpacket(dev, pipe, !(bulk.ep & USB_DIR_IN)))
return -EINVAL;
len1 = bulk.len;
- if (len1 >= USBFS_XFER_MAX)
+ if (len1 >= (INT_MAX - sizeof(struct urb)))
return -EINVAL;
ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb));
if (ret)
@@ -1297,13 +1293,19 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
int number_of_packets = 0;
unsigned int stream_id = 0;
void *buf;
-
- if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
- USBDEVFS_URB_SHORT_NOT_OK |
+ unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK |
USBDEVFS_URB_BULK_CONTINUATION |
USBDEVFS_URB_NO_FSBR |
USBDEVFS_URB_ZERO_PACKET |
- USBDEVFS_URB_NO_INTERRUPT))
+ USBDEVFS_URB_NO_INTERRUPT;
+ /* USBDEVFS_URB_ISO_ASAP is a special case */
+ if (uurb->type == USBDEVFS_URB_TYPE_ISO)
+ mask |= USBDEVFS_URB_ISO_ASAP;
+
+ if (uurb->flags & ~mask)
+ return -EINVAL;
+
+ if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX)
return -EINVAL;
if (uurb->buffer_length > 0 && !uurb->buffer)
return -EINVAL;
@@ -1424,10 +1426,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
return -EINVAL;
}
- if (uurb->buffer_length >= USBFS_XFER_MAX) {
- ret = -EINVAL;
- goto error;
- }
if (uurb->buffer_length > 0 &&
!access_ok(is_in ? VERIFY_WRITE : VERIFY_READ,
uurb->buffer, uurb->buffer_length)) {
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 097a2ac9c2bb..6f96744267b6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4859,6 +4859,15 @@ loop:
usb_put_dev(udev);
if ((status == -ENOTCONN) || (status == -ENOTSUPP))
break;
+
+ /* When halfway through our retry count, power-cycle the port */
+ if (i == (SET_CONFIG_TRIES / 2) - 1) {
+ dev_info(&port_dev->dev, "attempt power cycle\n");
+ usb_hub_set_port_power(hdev, hub, port1, false);
+ msleep(2 * hub_power_on_good_delay(hub));
+ usb_hub_set_port_power(hdev, hub, port1, true);
+ msleep(hub_power_on_good_delay(hub));
+ }
}
if (hub->hdev->parent ||
!hcd->driver->port_handed_over ||
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 7699e4ab424d..db7b15b37c41 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -57,10 +57,11 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Microsoft LifeCam-VX700 v2.0 */
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
- /* Logitech HD Pro Webcams C920, C920-C and C930e */
+ /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
+ { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
/* Logitech ConferenceCam CC3000e */
{ USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -154,6 +155,12 @@ static const struct usb_device_id usb_quirk_list[] = {
/* appletouch */
{ USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },
+ /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
+ { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
+
+ /* ELSA MicroLink 56K */
+ { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
+
/* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
{ USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index a0bd3c542d06..bccc2e0af58a 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -301,6 +301,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item,
ret = unregister_gadget(gi);
if (ret)
goto err;
+ kfree(name);
} else {
if (gi->udc_name) {
ret = -EBUSY;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 22d092bc0890..598e73089da6 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -791,7 +791,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
}
if (io_data->aio) {
- req = usb_ep_alloc_request(ep->ep, GFP_KERNEL);
+ req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
if (unlikely(!req))
goto error_lock;
@@ -3496,7 +3496,8 @@ static void ffs_closed(struct ffs_data *ffs)
ci = opts->func_inst.group.cg_item.ci_parent->ci_parent;
ffs_dev_unlock();
- unregister_gadget_item(ci);
+ if (test_bit(FFS_FL_BOUND, &ffs->flags))
+ unregister_gadget_item(ci);
return;
done:
ffs_dev_unlock();
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index c7689d05356c..f8a1881609a2 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U);
opts->streaming_maxburst = min(opts->streaming_maxburst, 15U);
+ /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */
+ if (opts->streaming_maxburst &&
+ (opts->streaming_maxpacket % 1024) != 0) {
+ opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
+ INFO(cdev, "overriding streaming_maxpacket to %d\n",
+ opts->streaming_maxpacket);
+ }
+
/* Fill in the FS/HS/SS Video Streaming specific descriptors from the
* module parameters.
*
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index b6df47aa25af..81f3c9cb333c 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1837,8 +1837,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
spin_lock_irq (&dev->lock);
value = -EINVAL;
- if (dev->buf)
+ if (dev->buf) {
+ kfree(kbuf);
goto fail;
+ }
dev->buf = kbuf;
/* full or low speed config */
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index 7a04157ff579..2806457b4748 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1534,7 +1534,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev,
td = phys_to_virt(addr);
addr2 = (dma_addr_t)td->next;
pci_pool_free(dev->data_requests, td, addr);
- td->next = 0x00;
addr = addr2;
}
req->chain_len = 1;
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index b26b96e25a13..8e0b9377644b 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -851,7 +851,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf)
default: /* unknown */
break;
}
- temp = (cap >> 8) & 0xff;
+ offset = (cap >> 8) & 0xff;
}
}
#endif
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index cf6bbaff42d0..d9363713b7f1 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -981,6 +981,12 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id)
if (!vdev)
return;
+ if (vdev->real_port == 0 ||
+ vdev->real_port > HCS_MAX_PORTS(xhci->hcs_params1)) {
+ xhci_dbg(xhci, "Bad vdev->real_port.\n");
+ goto out;
+ }
+
tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts);
list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) {
/* is this a hub device that added a tt_info to the tts list */
@@ -994,6 +1000,7 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id)
}
}
}
+out:
/* we are now at a leaf device */
xhci_free_virt_device(xhci, slot_id);
}
@@ -1010,10 +1017,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
return 0;
}
- xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags);
- if (!xhci->devs[slot_id])
+ dev = kzalloc(sizeof(*dev), flags);
+ if (!dev)
return 0;
- dev = xhci->devs[slot_id];
/* Allocate the (output) device context that will be used in the HC. */
dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags);
@@ -1061,9 +1067,18 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
&xhci->dcbaa->dev_context_ptrs[slot_id],
le64_to_cpu(xhci->dcbaa->dev_context_ptrs[slot_id]));
+ xhci->devs[slot_id] = dev;
+
return 1;
fail:
- xhci_free_virt_device(xhci, slot_id);
+ if (dev->eps[0].ring)
+ xhci_ring_free(xhci, dev->eps[0].ring);
+ if (dev->in_ctx)
+ xhci_free_container_ctx(xhci, dev->in_ctx);
+ if (dev->out_ctx)
+ xhci_free_container_ctx(xhci, dev->out_ctx);
+ kfree(dev);
+
return 0;
}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index e8f990642281..cbf3be66f89c 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -185,6 +185,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_BROKEN_STREAMS;
}
if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+ pdev->device == 0x0014)
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+ if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
pdev->device == 0x0015)
xhci->quirks |= XHCI_RESET_ON_RESUME;
if (pdev->vendor == PCI_VENDOR_ID_VIA)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 008def456f0a..203b90bf44d3 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -309,7 +309,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match);
static struct platform_driver usb_xhci_driver = {
.probe = xhci_plat_probe,
.remove = xhci_plat_remove,
- .shutdown = usb_hcd_platform_shutdown,
+ .shutdown = usb_hcd_platform_shutdown,
.driver = {
.name = "xhci-hcd",
.pm = DEV_PM_OPS,
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index b45cb77c0744..9e8789877763 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub)
if (gpio_is_valid(hub->gpio_reset)) {
err = devm_gpio_request_one(dev, hub->gpio_reset,
GPIOF_OUT_INIT_LOW, "usb3503 reset");
+ /* Datasheet defines a hardware reset to be at least 100us */
+ usleep_range(100, 10000);
if (err) {
dev_err(dev,
"unable to request GPIO %d as reset pin (%d)\n",
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 3598f1a62673..251d123d9046 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1001,7 +1001,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
break;
case MON_IOCQ_RING_SIZE:
+ mutex_lock(&rp->fetch_lock);
ret = rp->b_size;
+ mutex_unlock(&rp->fetch_lock);
break;
case MON_IOCT_RING_SIZE:
@@ -1228,12 +1230,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long offset, chunk_idx;
struct page *pageptr;
+ mutex_lock(&rp->fetch_lock);
offset = vmf->pgoff << PAGE_SHIFT;
- if (offset >= rp->b_size)
+ if (offset >= rp->b_size) {
+ mutex_unlock(&rp->fetch_lock);
return VM_FAULT_SIGBUS;
+ }
chunk_idx = offset / CHUNK_SIZE;
pageptr = rp->b_vec[chunk_idx].pg;
get_page(pageptr);
+ mutex_unlock(&rp->fetch_lock);
vmf->page = pageptr;
return 0;
}
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 9a9c82a4d35d..d6a8e325950c 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -350,7 +350,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
portstate(musb->port1_status |= USB_PORT_STAT_POWER);
del_timer(&otg_workaround);
- } else {
+ } else if (!(musb->int_usb & MUSB_INTR_BABBLE)){
+ /*
+ * When babble condition happens, drvvbus interrupt
+ * is also generated. Ignore this drvvbus interrupt
+ * and let babble interrupt handler recovers the
+ * controller; otherwise, the host-mode flag is lost
+ * due to the MUSB_DEV_MODE() call below and babble
+ * recovery logic will not called.
+ */
musb->is_active = 0;
MUSB_DEV_MODE(musb);
otg->default_a = 0;
diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c
index b2685e75a683..3eaa4ba6867d 100644
--- a/drivers/usb/musb/ux500.c
+++ b/drivers/usb/musb/ux500.c
@@ -348,7 +348,9 @@ static int ux500_suspend(struct device *dev)
struct ux500_glue *glue = dev_get_drvdata(dev);
struct musb *musb = glue_to_musb(glue);
- usb_phy_set_suspend(musb->xceiv, 1);
+ if (musb)
+ usb_phy_set_suspend(musb->xceiv, 1);
+
clk_disable_unprepare(glue->clk);
return 0;
@@ -366,7 +368,8 @@ static int ux500_resume(struct device *dev)
return ret;
}
- usb_phy_set_suspend(musb->xceiv, 0);
+ if (musb)
+ usb_phy_set_suspend(musb->xceiv, 0);
return 0;
}
diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c
index db68156568e6..b3b33cf7ddf6 100644
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = {
};
MODULE_DEVICE_TABLE(i2c, isp1301_id);
+static const struct of_device_id isp1301_of_match[] = {
+ {.compatible = "nxp,isp1301" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, isp1301_of_match);
+
static struct i2c_client *isp1301_i2c_client;
static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear)
@@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client)
static struct i2c_driver isp1301_driver = {
.driver = {
.name = DRV_NAME,
+ .of_match_table = of_match_ptr(isp1301_of_match),
},
.probe = isp1301_probe,
.remove = isp1301_remove,
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index ab5d364f6e8c..335a1ef35224 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -368,7 +368,8 @@ static int tahvo_usb_probe(struct platform_device *pdev)
tu->extcon = devm_extcon_dev_allocate(&pdev->dev, tahvo_cable);
if (IS_ERR(tu->extcon)) {
dev_err(&pdev->dev, "failed to allocate memory for extcon\n");
- return -ENOMEM;
+ ret = PTR_ERR(tu->extcon);
+ goto err_disable_clk;
}
ret = devm_extcon_dev_register(&pdev->dev, tu->extcon);
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 1f5ecf905b7d..a4ab4fdf5ba3 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -120,6 +120,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -170,6 +171,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+ { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
{ USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 30344efc123f..64fe9dc25ed4 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1017,6 +1017,7 @@ static const struct usb_device_id id_table_combined[] = {
.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
+ { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
{ } /* Terminating entry */
};
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index f9d15bd62785..543d2801632b 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -914,6 +914,12 @@
#define ICPDAS_I7563U_PID 0x0105
/*
+ * Airbus Defence and Space
+ */
+#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */
+#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */
+
+/*
* RT Systems programming cables for various ham radios
*/
#define RTSYSTEMS_VID 0x2100 /* Vendor ID */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index db3d34c2c82e..a818c43a02ec 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -236,11 +236,14 @@ static void option_instat_callback(struct urb *urb);
/* These Quectel products use Qualcomm's vendor ID */
#define QUECTEL_PRODUCT_UC20 0x9003
#define QUECTEL_PRODUCT_UC15 0x9090
+/* These Yuga products use Qualcomm's vendor ID */
+#define YUGA_PRODUCT_CLM920_NC5 0x9625
#define QUECTEL_VENDOR_ID 0x2c7c
/* These Quectel products use Quectel's vendor ID */
#define QUECTEL_PRODUCT_EC21 0x0121
#define QUECTEL_PRODUCT_EC25 0x0125
+#define QUECTEL_PRODUCT_BG96 0x0296
#define CMOTECH_VENDOR_ID 0x16d8
#define CMOTECH_PRODUCT_6001 0x6001
@@ -282,6 +285,7 @@ static void option_instat_callback(struct urb *urb);
#define TELIT_PRODUCT_LE922_USBCFG3 0x1043
#define TELIT_PRODUCT_LE922_USBCFG5 0x1045
#define TELIT_PRODUCT_ME910 0x1100
+#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101
#define TELIT_PRODUCT_LE920 0x1200
#define TELIT_PRODUCT_LE910 0x1201
#define TELIT_PRODUCT_LE910_USBCFG4 0x1206
@@ -647,6 +651,11 @@ static const struct option_blacklist_info telit_me910_blacklist = {
.reserved = BIT(1) | BIT(3),
};
+static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
+ .sendsetup = BIT(0),
+ .reserved = BIT(3),
+};
+
static const struct option_blacklist_info telit_le910_blacklist = {
.sendsetup = BIT(0),
.reserved = BIT(1) | BIT(2),
@@ -676,6 +685,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
.reserved = BIT(4) | BIT(5),
};
+static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
+ .reserved = BIT(1) | BIT(4),
+};
+
static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1180,11 +1193,16 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ /* Yuga products use Qualcomm vendor ID */
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
+ .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
/* Quectel products using Quectel vendor ID */
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+ .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1244,6 +1262,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = (kernel_ulong_t)&telit_me910_blacklist },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
+ .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 4516291df1b8..fb6dc16c754a 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = {
{DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */
{DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */
{DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */
+ {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */
+ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */
{DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -346,6 +348,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
break;
case 2:
dev_dbg(dev, "NMEA GPS interface found\n");
+ sendsetup = true;
break;
case 3:
dev_dbg(dev, "Modem port found\n");
diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h
index a155cd02bce2..ecc83c405a8b 100644
--- a/drivers/usb/storage/uas-detect.h
+++ b/drivers/usb/storage/uas-detect.h
@@ -111,6 +111,10 @@ static int uas_use_uas_driver(struct usb_interface *intf,
}
}
+ /* All Seagate disk enclosures have broken ATA pass-through support */
+ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2)
+ flags |= US_FL_NO_ATA_1X;
+
usb_stor_adjust_quirks(udev, &flags);
if (flags & US_FL_IGNORE_UAS) {
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index fb96755550ec..c10eceb76c39 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2149,6 +2149,13 @@ UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_REPORT_OPCODES),
+/* Reported by David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+ "JMicron",
+ "JMS567",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BROKEN_FUA),
+
/*
* Patch by Constantin Baranov <const@tltsu.ru>
* Report by Andreas Koenecke.
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index a37ed1e59e99..8ed80f28416f 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -141,6 +141,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
+/* Reported-by: David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+ "JMicron",
+ "JMS567",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BROKEN_FUA),
+
/* Reported-by: Hans de Goede <hdegoede@redhat.com> */
UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
"VIA",
@@ -148,6 +155,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_ATA_1X),
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+ "Norelsys",
+ "NS1068X",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
/* Reported-by: Takeo Nakayama <javhera@gmx.com> */
UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
"JMicron",
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index af10f7b131a4..325b4c05acdd 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -252,11 +252,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
struct stub_priv *priv;
struct urb *urb;
- dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
+ dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
while ((priv = stub_priv_pop(sdev))) {
urb = priv->urb;
- dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
+ dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
+ priv->seqnum);
usb_kill_urb(urb);
kmem_cache_free(stub_priv_cache, priv);
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 00e475c51a12..7de54a66044f 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -230,9 +230,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
continue;
- dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
- priv->urb);
-
/*
* This matched urb is not completed yet (i.e., be in
* flight in usb hcd hardware/driver). Now we are
@@ -271,8 +268,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
ret = usb_unlink_urb(priv->urb);
if (ret != -EINPROGRESS)
dev_err(&priv->urb->dev->dev,
- "failed to unlink a urb %p, ret %d\n",
- priv->urb, ret);
+ "failed to unlink a urb # %lu, ret %d\n",
+ priv->seqnum, ret);
return 0;
}
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
index 021003c4de53..f4dd30c56f36 100644
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -178,6 +178,13 @@ static int stub_send_ret_submit(struct stub_device *sdev)
memset(&pdu_header, 0, sizeof(pdu_header));
memset(&msg, 0, sizeof(msg));
+ if (urb->actual_length > 0 && !urb->transfer_buffer) {
+ dev_err(&sdev->udev->dev,
+ "urb: actual_length %d transfer_buffer null\n",
+ urb->actual_length);
+ return -1;
+ }
+
if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
iovnum = 2 + urb->number_of_packets;
else
@@ -194,8 +201,8 @@ static int stub_send_ret_submit(struct stub_device *sdev)
/* 1. setup usbip_header */
setup_ret_submit_pdu(&pdu_header, urb);
- usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
- pdu_header.base.seqnum, urb);
+ usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+ pdu_header.base.seqnum);
usbip_header_correct_endian(&pdu_header, 1);
iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index e40da7759a0e..9752b93f754e 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -103,7 +103,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
udev->devnum, udev->devpath, usb_speed_string(udev->speed));
- pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+ pr_debug("tt hub ttport %d\n", udev->ttport);
dev_dbg(dev, " ");
for (i = 0; i < 16; i++)
@@ -136,12 +136,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
}
pr_debug("\n");
- dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
- dev_dbg(dev,
- "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
- &udev->descriptor, udev->config,
- udev->actconfig, udev->rawdescriptors);
+ dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+ udev->bus->bus_name);
dev_dbg(dev, "have_langid %d, string_langid %d\n",
udev->have_langid, udev->string_langid);
@@ -249,9 +245,6 @@ void usbip_dump_urb(struct urb *urb)
dev = &urb->dev->dev;
- dev_dbg(dev, " urb :%p\n", urb);
- dev_dbg(dev, " dev :%p\n", urb->dev);
-
usbip_dump_usb_device(urb->dev);
dev_dbg(dev, " pipe :%08x ", urb->pipe);
@@ -260,11 +253,9 @@ void usbip_dump_urb(struct urb *urb)
dev_dbg(dev, " status :%d\n", urb->status);
dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
- dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
dev_dbg(dev, " transfer_buffer_length:%d\n",
urb->transfer_buffer_length);
dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
- dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
usbip_dump_usb_ctrlrequest(
@@ -274,8 +265,6 @@ void usbip_dump_urb(struct urb *urb)
dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
dev_dbg(dev, " interval :%d\n", urb->interval);
dev_dbg(dev, " error_count :%d\n", urb->error_count);
- dev_dbg(dev, " context :%p\n", urb->context);
- dev_dbg(dev, " complete :%p\n", urb->complete);
}
EXPORT_SYMBOL_GPL(usbip_dump_urb);
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 81b2b9f808b5..f9af04d7f02f 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -467,9 +467,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
int ret = 0;
struct vhci_device *vdev;
- usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
- hcd, urb, mem_flags);
-
/* patch to usb_sg_init() is in 2.5.60 */
BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length);
@@ -627,8 +624,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
struct vhci_priv *priv;
struct vhci_device *vdev;
- pr_info("dequeue a urb %p\n", urb);
-
spin_lock(&the_controller->lock);
priv = urb->hcpriv;
@@ -656,7 +651,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
/* tcp connection is closed */
spin_lock(&vdev->priv_lock);
- pr_info("device %p seems to be disconnected\n", vdev);
list_del(&priv->list);
kfree(priv);
urb->hcpriv = NULL;
@@ -668,8 +662,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
* vhci_rx will receive RET_UNLINK and give back the URB.
* Otherwise, we give back it here.
*/
- pr_info("gives back urb %p\n", urb);
-
usb_hcd_unlink_urb_from_ep(hcd, urb);
spin_unlock(&the_controller->lock);
@@ -698,8 +690,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
unlink->unlink_seqnum = priv->seqnum;
- pr_info("device %p seems to be still connected\n", vdev);
-
/* send cmd_unlink and try to cancel the pending URB in the
* peer */
list_add_tail(&unlink->list, &vdev->unlink_tx);
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index 00e4a54308e4..bc4eb0855314 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -37,24 +37,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
urb = priv->urb;
status = urb->status;
- usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
- urb, priv, seqnum);
+ usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
switch (status) {
case -ENOENT:
/* fall through */
case -ECONNRESET:
- dev_info(&urb->dev->dev,
- "urb %p was unlinked %ssynchronuously.\n", urb,
- status == -ENOENT ? "" : "a");
+ dev_dbg(&urb->dev->dev,
+ "urb seq# %u was unlinked %ssynchronuously\n",
+ seqnum, status == -ENOENT ? "" : "a");
break;
case -EINPROGRESS:
/* no info output */
break;
default:
- dev_info(&urb->dev->dev,
- "urb %p may be in a error, status %d\n", urb,
- status);
+ dev_dbg(&urb->dev->dev,
+ "urb seq# %u may be in a error, status %d\n",
+ seqnum, status);
}
list_del(&priv->list);
@@ -78,8 +77,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
spin_unlock(&vdev->priv_lock);
if (!urb) {
- pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
- pr_info("max seqnum %d\n",
+ pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
+ pdu->base.seqnum,
atomic_read(&the_controller->seqnum));
usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
return;
@@ -102,7 +101,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
if (usbip_dbg_flag_vhci_rx)
usbip_dump_urb(urb);
- usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+ usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
spin_lock(&the_controller->lock);
usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
@@ -165,7 +164,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
pr_info("the urb (seqnum %d) was already given back\n",
pdu->base.seqnum);
} else {
- usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+ usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
/* If unlink is successful, status is -ECONNRESET */
urb->status = pdu->u.ret_unlink.status;
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index 409fd99f3257..3c5796c8633a 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -82,7 +82,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
memset(&msg, 0, sizeof(msg));
memset(&iov, 0, sizeof(iov));
- usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
+ usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
+ priv->seqnum);
/* 1. setup usbip_header */
setup_cmd_submit_pdu(&pdu_header, urb);
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 4cee1fcd246d..e6c38dbcea41 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb)
static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness)
{
unsigned int lth = pb->lth_brightness;
- int duty_cycle;
+ u64 duty_cycle;
if (pb->levels)
duty_cycle = pb->levels[brightness];
else
duty_cycle = brightness;
- return (duty_cycle * (pb->period - lth) / pb->scale) + lth;
+ duty_cycle *= pb->period - lth;
+ do_div(duty_cycle, pb->scale);
+
+ return duty_cycle + lth;
}
static int pwm_backlight_update_status(struct backlight_device *bl)
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index f9507b1894df..789d3f16ff9f 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1680,8 +1680,10 @@ static int au1200fb_drv_probe(struct platform_device *dev)
fbi = framebuffer_alloc(sizeof(struct au1200fb_device),
&dev->dev);
- if (!fbi)
+ if (!fbi) {
+ ret = -ENOMEM;
goto failed;
+ }
_au1200fb_infos[plane] = fbi;
fbdev = fbi->par;
@@ -1699,7 +1701,8 @@ static int au1200fb_drv_probe(struct platform_device *dev)
if (!fbdev->fb_mem) {
print_err("fail to allocate frambuffer (size: %dK))",
fbdev->fb_len / 1024);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto failed;
}
/*
diff --git a/drivers/video/fbdev/controlfb.h b/drivers/video/fbdev/controlfb.h
index 6026c60fc100..261522fabdac 100644
--- a/drivers/video/fbdev/controlfb.h
+++ b/drivers/video/fbdev/controlfb.h
@@ -141,5 +141,7 @@ static struct max_cmodes control_mac_modes[] = {
{{ 1, 2}}, /* 1152x870, 75Hz */
{{ 0, 1}}, /* 1280x960, 75Hz */
{{ 0, 1}}, /* 1280x1024, 75Hz */
+ {{ 1, 2}}, /* 1152x768, 60Hz */
+ {{ 0, 1}}, /* 1600x1024, 60Hz */
};
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index e9c2f7ba3c8e..53326badfb61 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -769,11 +769,11 @@ static int dlfb_get_edid(struct dlfb_data *dev, char *edid, int len)
for (i = 0; i < len; i++) {
ret = usb_control_msg(dev->udev,
- usb_rcvctrlpipe(dev->udev, 0), (0x02),
- (0x80 | (0x02 << 5)), i << 8, 0xA1, rbuf, 2,
- HZ);
- if (ret < 1) {
- pr_err("Read EDID byte %d failed err %x\n", i, ret);
+ usb_rcvctrlpipe(dev->udev, 0), 0x02,
+ (0x80 | (0x02 << 5)), i << 8, 0xA1,
+ rbuf, 2, USB_CTRL_GET_TIMEOUT);
+ if (ret < 2) {
+ pr_err("Read EDID byte %d failed: %d\n", i, ret);
i--;
break;
}
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 7062bb0975a5..462e183609b6 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -323,6 +323,8 @@ int register_virtio_device(struct virtio_device *dev)
/* device_register() causes the bus infrastructure to look for a
* matching driver. */
err = device_register(&dev->dev);
+ if (err)
+ ida_simple_remove(&virtio_index_ida, dev->index);
out:
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 7ef637d7f3a5..7d54efd73519 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
{
struct afs_server *server;
struct afs_vnode *vnode, *xvnode;
- time_t now;
+ time64_t now;
long timeout;
int ret;
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find the first vnode to update */
spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
/* and then reschedule */
_debug("reschedule");
- vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+ vnode->update_at = ktime_get_real_seconds() +
+ afs_vnode_update_timeout;
spin_lock(&server->cb_lock);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4b0eff6da674..83a8a33a0d73 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -115,6 +115,9 @@ bool afs_cm_incoming_call(struct afs_call *call)
case CBProbe:
call->type = &afs_SRXCBProbe;
return true;
+ case CBProbeUuid:
+ call->type = &afs_SRXCBProbeUuid;
+ return true;
case CBTellMeAboutYourself:
call->type = &afs_SRXCBTellMeAboutYourself;
return true;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 999bc3caec92..cf8a07e282a6 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -29,6 +29,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
const struct file_operations afs_file_operations = {
.open = afs_open,
+ .flush = afs_flush,
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930ec2888..10ce44214005 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -105,7 +105,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_mode = mode;
}
- vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+ vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_version = data_version;
@@ -139,7 +139,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
vnode->cb_version = ntohl(*bp++);
vnode->cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds();
*_bp = bp;
}
@@ -703,8 +703,8 @@ int afs_fs_create(struct afs_server *server,
memset(bp, 0, padsz);
bp = (void *) bp + padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -981,8 +981,8 @@ int afs_fs_symlink(struct afs_server *server,
memset(bp, 0, c_padsz);
bp = (void *) bp + c_padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1192,8 +1192,8 @@ static int afs_fs_store_data64(struct afs_server *server,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
@@ -1225,7 +1225,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
_enter(",%x,{%x:%u},,",
key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
- size = to - offset;
+ size = (loff_t)to - (loff_t)offset;
if (first != last)
size += (loff_t)(last - first) << PAGE_SHIFT;
pos = (loff_t)first << PAGE_SHIFT;
@@ -1269,8 +1269,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e06f5a23352a..f8fa92b1d43c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -69,9 +69,9 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
- inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_gid = vnode->status.group;
inode->i_size = vnode->status.size;
- inode->i_ctime.tv_sec = vnode->status.mtime_server;
+ inode->i_ctime.tv_sec = vnode->status.mtime_client;
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
@@ -244,12 +244,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
vnode->cb_version = 0;
vnode->cb_expiry = 0;
vnode->cb_type = 0;
- vnode->cb_expires = get_seconds();
+ vnode->cb_expires = ktime_get_real_seconds();
} else {
vnode->cb_version = cb->version;
vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry +
+ ktime_get_real_seconds();
}
}
@@ -322,7 +323,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
!test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- if (vnode->cb_expires < get_seconds() + 10) {
+ if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
_debug("callback expired");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
} else {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 71d5982312f3..1330b2a695ff 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/ktime.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/skbuff.h>
@@ -247,7 +248,7 @@ struct afs_cache_vhash {
*/
struct afs_vlocation {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct list_head link; /* link in cell volume location list */
struct list_head grave; /* link in master graveyard list */
struct list_head update; /* link in master update list */
@@ -258,7 +259,7 @@ struct afs_vlocation {
struct afs_cache_vlocation vldb; /* volume information DB record */
struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
wait_queue_head_t waitq; /* status change waitqueue */
- time_t update_at; /* time at which record should be updated */
+ time64_t update_at; /* time at which record should be updated */
spinlock_t lock; /* access lock */
afs_vlocation_state_t state; /* volume location state */
unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
@@ -271,7 +272,7 @@ struct afs_vlocation {
*/
struct afs_server {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct in_addr addr; /* server address */
struct afs_cell *cell; /* cell in which server resides */
struct list_head link; /* link in cell's server list */
@@ -374,8 +375,8 @@ struct afs_vnode {
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
struct work_struct cb_broken_work; /* work to be done on callback break */
- time_t cb_expires; /* time at which callback expires */
- time_t cb_expires_at; /* time used to order cb_promise */
+ time64_t cb_expires; /* time at which callback expires */
+ time64_t cb_expires_at; /* time used to order cb_promise */
unsigned cb_version; /* callback version */
unsigned cb_expiry; /* callback expiry time */
afs_callback_type_t cb_type; /* type of callback */
@@ -749,6 +750,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc89..bfa9d3428383 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
} else {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
+ if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+ goto permission_denied;
if (mask & (MAY_EXEC | MAY_READ)) {
if (!(access & AFS_ACE_READ))
goto permission_denied;
+ if (!(inode->i_mode & S_IRUSR))
+ goto permission_denied;
} else if (mask & MAY_WRITE) {
if (!(access & AFS_ACE_WRITE))
goto permission_denied;
+ if (!(inode->i_mode & S_IWUSR))
+ goto permission_denied;
}
}
key_put(key);
- ret = generic_permission(inode, mask);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f342acf3547d..3bc1a46f0bd6 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -237,7 +237,7 @@ void afs_put_server(struct afs_server *server)
spin_lock(&afs_server_graveyard_lock);
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &afs_server_graveyard);
- server->time_of_death = get_seconds();
+ server->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_server_reaper,
afs_server_timeout * HZ);
}
@@ -272,9 +272,9 @@ static void afs_reap_server(struct work_struct *work)
LIST_HEAD(corpses);
struct afs_server *server;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_server_graveyard_lock);
while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 52976785a32c..ee9015c0db5a 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
struct afs_vlocation *xvl;
/* wait at least 10 minutes before updating... */
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
if (atomic_read(&vl->usage) == 0) {
_debug("buried");
list_move_tail(&vl->grave, &afs_vlocation_graveyard);
- vl->time_of_death = get_seconds();
+ vl->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_vlocation_reap,
afs_vlocation_timeout * HZ);
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
LIST_HEAD(corpses);
struct afs_vlocation *vl;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_vlocation_graveyard_lock);
while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
{
struct afs_cache_vlocation vldb;
struct afs_vlocation *vl, *xvl;
- time_t now;
+ time64_t now;
long timeout;
int ret;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find a record to update */
spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
/* and then reschedule */
_debug("reschedule");
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 0714abcd7f32..5cfc05ca184c 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -148,12 +148,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
kfree(candidate);
return -ENOMEM;
}
- *pagep = page;
- /* page won't leak in error case: it eventually gets cleaned off LRU */
if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
kfree(candidate);
_leave(" = %d [prep]", ret);
return ret;
@@ -161,6 +161,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
+ /* page won't leak in error case: it eventually gets cleaned off LRU */
+ *pagep = page;
+
try_again:
spin_lock(&vnode->writeback_lock);
@@ -296,10 +299,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
- ClearPageUptodate(pv.pages[loop]);
+ struct page *page = pv.pages[loop];
+ ClearPageUptodate(page);
if (error)
- SetPageError(pv.pages[loop]);
- end_page_writeback(pv.pages[loop]);
+ SetPageError(page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ if (page->index >= first)
+ first = page->index + 1;
}
__pagevec_release(&pv);
@@ -503,6 +510,7 @@ static int afs_writepages_region(struct address_space *mapping,
if (PageWriteback(page) || !PageDirty(page)) {
unlock_page(page);
+ put_page(page);
continue;
}
@@ -740,6 +748,20 @@ out:
}
/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+ _enter("");
+
+ if ((file->f_mode & FMODE_WRITE) == 0)
+ return 0;
+
+ return vfs_fsync(file, 0);
+}
+
+/*
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index fe6e7050fe50..98198c57370b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -174,7 +174,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
mutex_unlock(&sbi->wq_mutex);
- if (autofs4_write(sbi, pipe, &pkt, pktsz))
switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
case 0:
break;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c36a03fa7678..260f94b019c9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3361,13 +3361,6 @@ again:
goto again;
}
- /* We've already setup this transaction, go ahead and exit */
- if (block_group->cache_generation == trans->transid &&
- i_size_read(inode)) {
- dcs = BTRFS_DC_SETUP;
- goto out_put;
- }
-
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
@@ -3391,6 +3384,13 @@ again:
}
WARN_ON(ret);
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
if (i_size_read(inode) > 0) {
ret = btrfs_check_trunc_cache_free_space(root,
&root->fs_info->global_block_rsv);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bebd6517355d..af1da85da509 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6735,6 +6735,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
ret = btrfs_decompress(compress_type, tmp, page,
extent_offset, inline_size, max_size);
+
+ /*
+ * decompression code contains a memset to fill in any space between the end
+ * of the uncompressed data and the end of max_size in case the decompressed
+ * data ends up shorter than ram_bytes. That doesn't cover the hole between
+ * the end of an inline extent and the beginning of the next block, so we
+ * cover that region here.
+ */
+
+ if (max_size + pg_offset < PAGE_SIZE) {
+ char *map = kmap(page);
+ memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+ kunmap(page);
+ }
kfree(tmp);
return ret;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ead89489ae71..35e6e0b2cf34 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1400,6 +1400,29 @@ static int __close_session(struct ceph_mds_client *mdsc,
return request_close_session(mdsc, session);
}
+static bool drop_negative_children(struct dentry *dentry)
+{
+ struct dentry *child;
+ bool all_negative = true;
+
+ if (!d_is_dir(dentry))
+ goto out;
+
+ spin_lock(&dentry->d_lock);
+ list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+ if (d_really_is_positive(child)) {
+ all_negative = false;
+ break;
+ }
+ }
+ spin_unlock(&dentry->d_lock);
+
+ if (all_negative)
+ shrink_dcache_parent(dentry);
+out:
+ return all_negative;
+}
+
/*
* Trim old(er) caps.
*
@@ -1445,16 +1468,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
if ((used | wanted) & ~oissued & mine)
goto out; /* we need these caps */
- session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
__ceph_remove_cap(cap, true);
+ session->s_trim_caps--;
} else {
+ struct dentry *dentry;
/* try dropping referring dentries */
spin_unlock(&ci->i_ceph_lock);
- d_prune_aliases(inode);
- dout("trim_caps_cb %p cap %p pruned, count now %d\n",
- inode, cap, atomic_read(&inode->i_count));
+ dentry = d_find_any_alias(inode);
+ if (dentry && drop_negative_children(dentry)) {
+ int count;
+ dput(dentry);
+ d_prune_aliases(inode);
+ count = atomic_read(&inode->i_count);
+ if (count == 1)
+ session->s_trim_caps--;
+ dout("trim_caps_cb %p cap %p pruned, count now %d\n",
+ inode, cap, count);
+ } else {
+ dput(dentry);
+ }
return 0;
}
diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
index 9f6607f17b53..cb496989a6b6 100644
--- a/fs/crypto/Makefile
+++ b/fs/crypto/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
-fscrypto-y := crypto.o fname.o policy.o keyinfo.o
+fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o
fscrypto-$(CONFIG_BLOCK) += bio.o
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index c7835df7e7b8..732a786cce9d 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -126,21 +126,6 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
}
EXPORT_SYMBOL(fscrypt_get_ctx);
-/**
- * page_crypt_complete() - completion callback for page crypto
- * @req: The asynchronous cipher request context
- * @res: The result of the cipher operation
- */
-static void page_crypt_complete(struct crypto_async_request *req, int res)
-{
- struct fscrypt_completion_result *ecr = req->data;
-
- if (res == -EINPROGRESS)
- return;
- ecr->res = res;
- complete(&ecr->completion);
-}
-
int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u64 lblk_num, struct page *src_page,
struct page *dest_page, unsigned int len,
@@ -151,7 +136,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u8 padding[FS_IV_SIZE - sizeof(__le64)];
} iv;
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct scatterlist dst, src;
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
@@ -179,7 +164,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- page_crypt_complete, &ecr);
+ crypto_req_done, &wait);
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, len, offs);
@@ -187,14 +172,9 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
sg_set_page(&src, src_page, len, offs);
skcipher_request_set_crypt(req, &src, &dst, len, &iv);
if (rw == FS_DECRYPT)
- res = crypto_skcipher_decrypt(req);
+ res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
else
- res = crypto_skcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- BUG_ON(req->base.data != &ecr);
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res) {
printk_ratelimited(KERN_ERR
@@ -340,7 +320,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
dir = dget_parent(dentry);
- if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) {
+ if (!IS_ENCRYPTED(d_inode(dir))) {
dput(dir);
return 0;
}
@@ -410,11 +390,8 @@ int fscrypt_initialize(unsigned int cop_flags)
{
int i, res = -ENOMEM;
- /*
- * No need to allocate a bounce page pool if there already is one or
- * this FS won't use it.
- */
- if (cop_flags & FS_CFLG_OWN_PAGES || fscrypt_bounce_page_pool)
+ /* No need to allocate a bounce page pool if this FS won't use it. */
+ if (cop_flags & FS_CFLG_OWN_PAGES)
return 0;
mutex_lock(&fscrypt_init_mutex);
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index ad9f814fdead..6eb434363ff2 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -15,21 +15,6 @@
#include "fscrypt_private.h"
/**
- * fname_crypt_complete() - completion callback for filename crypto
- * @req: The asynchronous cipher request context
- * @res: The result of the cipher operation
- */
-static void fname_crypt_complete(struct crypto_async_request *req, int res)
-{
- struct fscrypt_completion_result *ecr = req->data;
-
- if (res == -EINPROGRESS)
- return;
- ecr->res = res;
- complete(&ecr->completion);
-}
-
-/**
* fname_encrypt() - encrypt a filename
*
* The caller must have allocated sufficient memory for the @oname string.
@@ -40,7 +25,7 @@ static int fname_encrypt(struct inode *inode,
const struct qstr *iname, struct fscrypt_str *oname)
{
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
@@ -76,17 +61,12 @@ static int fname_encrypt(struct inode *inode,
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- fname_crypt_complete, &ecr);
+ crypto_req_done, &wait);
sg_init_one(&sg, oname->name, cryptlen);
skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv);
/* Do the encryption */
- res = crypto_skcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- /* Request is being completed asynchronously; wait for it */
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
@@ -110,7 +90,7 @@ static int fname_decrypt(struct inode *inode,
struct fscrypt_str *oname)
{
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
@@ -131,7 +111,7 @@ static int fname_decrypt(struct inode *inode,
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- fname_crypt_complete, &ecr);
+ crypto_req_done, &wait);
/* Initialize IV */
memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
@@ -140,11 +120,7 @@ static int fname_decrypt(struct inode *inode,
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
- res = crypto_skcipher_decrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
@@ -382,8 +358,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
- if (!dir->i_sb->s_cop->is_encrypted(dir) ||
- fscrypt_is_dot_dotdot(iname)) {
+ if (!IS_ENCRYPTED(dir) || fscrypt_is_dot_dotdot(iname)) {
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 79d79755d79b..c3ad415cd14f 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -11,7 +11,8 @@
#ifndef _FSCRYPT_PRIVATE_H
#define _FSCRYPT_PRIVATE_H
-#include <linux/fscrypt_supp.h>
+#define __FS_HAS_ENCRYPTION 1
+#include <linux/fscrypt.h>
#include <crypto/hash.h>
/* Encryption parameters */
@@ -69,15 +70,6 @@ typedef enum {
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
-struct fscrypt_completion_result {
- struct completion completion;
- int res;
-};
-
-#define DECLARE_FS_COMPLETION_RESULT(ecr) \
- struct fscrypt_completion_result ecr = { \
- COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 }
-
/* bio stuffs */
#define REQ_OP_READ READ
#define REQ_OP_WRITE WRITE
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
new file mode 100644
index 000000000000..9f5fb2eb9cf7
--- /dev/null
+++ b/fs/crypto/hooks.c
@@ -0,0 +1,112 @@
+/*
+ * fs/crypto/hooks.c
+ *
+ * Encryption hooks for higher-level filesystem operations.
+ */
+
+#include <linux/ratelimit.h>
+#include "fscrypt_private.h"
+
+/**
+ * fscrypt_file_open - prepare to open a possibly-encrypted regular file
+ * @inode: the inode being opened
+ * @filp: the struct file being set up
+ *
+ * Currently, an encrypted regular file can only be opened if its encryption key
+ * is available; access to the raw encrypted contents is not supported.
+ * Therefore, we first set up the inode's encryption key (if not already done)
+ * and return an error if it's unavailable.
+ *
+ * We also verify that if the parent directory (from the path via which the file
+ * is being opened) is encrypted, then the inode being opened uses the same
+ * encryption policy. This is needed as part of the enforcement that all files
+ * in an encrypted directory tree use the same encryption policy, as a
+ * protection against certain types of offline attacks. Note that this check is
+ * needed even when opening an *unencrypted* file, since it's forbidden to have
+ * an unencrypted file in an encrypted directory.
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ */
+int fscrypt_file_open(struct inode *inode, struct file *filp)
+{
+ int err;
+ struct dentry *dir;
+
+ err = fscrypt_require_key(inode);
+ if (err)
+ return err;
+
+ dir = dget_parent(file_dentry(filp));
+ if (IS_ENCRYPTED(d_inode(dir)) &&
+ !fscrypt_has_permitted_context(d_inode(dir), inode)) {
+ pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu",
+ d_inode(dir)->i_ino, inode->i_ino);
+ err = -EPERM;
+ }
+ dput(dir);
+ return err;
+}
+EXPORT_SYMBOL_GPL(fscrypt_file_open);
+
+int __fscrypt_prepare_link(struct inode *inode, struct inode *dir)
+{
+ int err;
+
+ err = fscrypt_require_key(dir);
+ if (err)
+ return err;
+
+ if (!fscrypt_has_permitted_context(dir, inode))
+ return -EPERM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_link);
+
+int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ int err;
+
+ err = fscrypt_require_key(old_dir);
+ if (err)
+ return err;
+
+ err = fscrypt_require_key(new_dir);
+ if (err)
+ return err;
+
+ if (old_dir != new_dir) {
+ if (IS_ENCRYPTED(new_dir) &&
+ !fscrypt_has_permitted_context(new_dir,
+ d_inode(old_dentry)))
+ return -EPERM;
+
+ if ((flags & RENAME_EXCHANGE) &&
+ IS_ENCRYPTED(old_dir) &&
+ !fscrypt_has_permitted_context(old_dir,
+ d_inode(new_dentry)))
+ return -EPERM;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename);
+
+int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry)
+{
+ int err = fscrypt_get_encryption_info(dir);
+
+ if (err)
+ return err;
+
+ if (fscrypt_has_encryption_key(dir)) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
+ spin_unlock(&dentry->d_lock);
+ }
+
+ d_set_d_op(dentry, &fscrypt_d_ops);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 66e0728e9bbe..444c65ed6db8 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -17,17 +17,6 @@
static struct crypto_shash *essiv_hash_tfm;
-static void derive_crypt_complete(struct crypto_async_request *req, int rc)
-{
- struct fscrypt_completion_result *ecr = req->data;
-
- if (rc == -EINPROGRESS)
- return;
-
- ecr->res = rc;
- complete(&ecr->completion);
-}
-
/**
* derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
@@ -42,7 +31,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
{
int res = 0;
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
@@ -59,7 +48,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- derive_crypt_complete, &ecr);
+ crypto_req_done, &wait);
res = crypto_skcipher_setkey(tfm, deriving_key,
FS_AES_128_ECB_KEY_SIZE);
if (res < 0)
@@ -69,11 +58,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
sg_init_one(&dst_sg, derived_raw_key, source_key->size);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
NULL);
- res = crypto_skcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
out:
skcipher_request_free(req);
crypto_free_skcipher(tfm);
@@ -109,6 +94,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
goto out;
}
ukp = user_key_payload(keyring_key);
+ if (!ukp) {
+ /* key was revoked before we acquired its semaphore */
+ res = -EKEYREVOKED;
+ goto out;
+ }
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
goto out;
@@ -268,7 +258,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode) ||
- inode->i_sb->s_cop->is_encrypted(inode))
+ IS_ENCRYPTED(inode))
return res;
/* Fake up a context for an unencrypted directory */
memset(&ctx, 0, sizeof(ctx));
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 9914d51dff86..2f2c53f2e136 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -109,7 +109,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
struct fscrypt_policy policy;
int res;
- if (!inode->i_sb->s_cop->is_encrypted(inode))
+ if (!IS_ENCRYPTED(inode))
return -ENODATA;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
@@ -166,11 +166,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
return 1;
/* No restrictions if the parent directory is unencrypted */
- if (!cops->is_encrypted(parent))
+ if (!IS_ENCRYPTED(parent))
return 1;
/* Encrypted directories must not contain unencrypted files */
- if (!cops->is_encrypted(child))
+ if (!IS_ENCRYPTED(child))
return 0;
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 31a3e480d484..403c4bae3e18 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4746,6 +4746,7 @@ retry:
EXT4_INODE_EOFBLOCKS);
}
ext4_mark_inode_dirty(handle, inode);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 530e790f22e0..fbc4f31c87cc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4194,8 +4194,11 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
if (test_opt(inode->i_sb, DAX))
new_fl |= S_DAX;
+ if (flags & EXT4_ENCRYPT_FL)
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
+ S_ENCRYPTED);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6445d84266fa..4c36dca486cc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1403,6 +1403,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
"falling back\n"));
}
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+ if (!nblocks) {
+ ret = NULL;
+ goto cleanup_and_exit;
+ }
start = EXT4_I(dir)->i_dir_start_lookup;
if (start >= nblocks)
start = 0;
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 112f8e04c549..3f52efa0f94f 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -253,6 +253,9 @@ static int __f2fs_set_acl(struct inode *inode, int type,
int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
return __f2fs_set_acl(inode, type, acl, NULL);
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e86f67ac96c6..2eb778174a9b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
set_ckpt_flags(sbi, CP_ERROR_FLAG);
- sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_writes(sbi);
}
@@ -402,24 +401,23 @@ const struct address_space_operations f2fs_meta_aops = {
#endif
};
-static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
{
struct inode_management *im = &sbi->im[type];
struct ino_entry *e, *tmp;
tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
-retry:
+
radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
spin_lock(&im->ino_lock);
e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = tmp;
- if (radix_tree_insert(&im->ino_root, ino, e)) {
- spin_unlock(&im->ino_lock);
- radix_tree_preload_end();
- goto retry;
- }
+ if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
+ f2fs_bug_on(sbi, 1);
+
memset(e, 0, sizeof(struct ino_entry));
e->ino = ino;
@@ -427,6 +425,10 @@ retry:
if (type != ORPHAN_INO)
im->ino_num++;
}
+
+ if (type == FLUSH_INO)
+ f2fs_set_bit(devidx, (char *)&e->dirty_device);
+
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
@@ -455,7 +457,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
- __add_ino_entry(sbi, ino, type);
+ __add_ino_entry(sbi, ino, 0, type);
}
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -481,7 +483,7 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
struct ino_entry *e, *tmp;
int i;
- for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
+ for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
struct inode_management *im = &sbi->im[i];
spin_lock(&im->ino_lock);
@@ -495,6 +497,27 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
}
}
+void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
+{
+ __add_ino_entry(sbi, ino, devidx, type);
+}
+
+bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
+{
+ struct inode_management *im = &sbi->im[type];
+ struct ino_entry *e;
+ bool is_dirty = false;
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
+ is_dirty = true;
+ spin_unlock(&im->ino_lock);
+ return is_dirty;
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
@@ -531,7 +554,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
void add_orphan_inode(struct inode *inode)
{
/* add new orphan ino entry into list */
- __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
+ __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
update_inode_page(inode);
}
@@ -555,7 +578,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
return err;
}
- __add_ino_entry(sbi, ino, ORPHAN_INO);
+ __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -591,6 +614,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
unsigned int s_flags = sbi->sb->s_flags;
int err = 0;
+#ifdef CONFIG_QUOTA
+ int quota_enabled;
+#endif
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
@@ -603,8 +629,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
+
/* Turn on quotas so that they are updated correctly */
- f2fs_enable_quota_files(sbi);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -632,7 +659,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
- f2fs_quota_off_umount(sbi->sb);
+ if (quota_enabled)
+ f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -987,7 +1015,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
update_inode_page(inode);
iput(inode);
}
- };
+ }
return 0;
}
@@ -1147,6 +1175,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
+ int err;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -1240,6 +1269,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ /* flush all device cache */
+ err = f2fs_flush_device_cache(sbi);
+ if (err)
+ return err;
+
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c8583d7a1845..cdccc429325b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -172,7 +172,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
{
struct bio *bio;
- bio = f2fs_bio_alloc(npages);
+ bio = f2fs_bio_alloc(sbi, npages, true);
f2fs_target_device(sbi, blk_addr, bio);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
@@ -417,8 +417,8 @@ next:
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
- /* set submitted = 1 as a return value */
- fio->submitted = 1;
+ /* set submitted = true as a return value */
+ fio->submitted = true;
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
@@ -472,7 +472,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
f2fs_wait_on_block_writeback(sbi, blkaddr);
}
- bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
+ bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio) {
if (ctx)
fscrypt_release_ctx(ctx);
@@ -832,6 +832,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
+ /* convert inline data for Direct I/O*/
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -844,15 +851,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
- if (iocb->ki_flags & IOCB_DIRECT) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
+ if (iocb->ki_flags & IOCB_DIRECT)
return f2fs_map_blocks(inode, &map, 1,
__force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
- }
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -1332,7 +1335,7 @@ static int f2fs_read_data_pages(struct file *file,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = mapping->host;
struct page *page = list_last_entry(pages, struct page, lru);
trace_f2fs_readpages(inode, page, nr_pages);
@@ -1493,6 +1496,7 @@ static int __write_data_page(struct page *page, bool *submitted,
int err = 0;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
@@ -1564,8 +1568,11 @@ write:
err = do_write_data_page(&fio);
}
}
+
+ down_write(&F2FS_I(inode)->i_sem);
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
+ up_write(&F2FS_I(inode)->i_sem);
done:
if (err && err != -ENOENT)
@@ -1945,6 +1952,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
}
trace_f2fs_write_begin(inode, pos, len, flags);
+ if (f2fs_is_atomic_file(inode) &&
+ !available_free_memory(sbi, INMEM_PAGES)) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
@@ -1960,7 +1973,8 @@ repeat:
* Do not use grab_cache_page_write_begin() to avoid deadlock due to
* wait_for_stable_page. Will wait that below with our IO control.
*/
- page = grab_cache_page(mapping, index);
+ page = f2fs_pagecache_get_page(mapping, index,
+ FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
if (!page) {
err = -ENOMEM;
goto fail;
@@ -2021,6 +2035,8 @@ repeat:
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
+ if (f2fs_is_atomic_file(inode))
+ drop_inmem_pages_all(sbi);
return err;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 87f449845f5f..ecada8425268 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+ si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+
+ si->nquota_files = 0;
+ if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (f2fs_qf_ino(sbi->sb, i))
+ si->nquota_files++;
+ }
+ }
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
@@ -61,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ si->flush_list_empty =
+ llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
si->nr_discarded =
@@ -96,9 +107,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
- si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
+ si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
si->avail_nids = NM_I(sbi)->available_nids;
- si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
+ si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -231,14 +242,14 @@ get_cache:
}
/* free nids */
- si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
- NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
+ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
+ NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
- for (i = 0; i <= ORPHAN_INO; i++)
+ for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree);
@@ -262,9 +273,10 @@ static int stat_show(struct seq_file *s, void *v)
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
update_general_status(si->sbi);
- seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n",
+ seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
- f2fs_readonly(si->sbi->sb) ? "RO": "RW");
+ f2fs_readonly(si->sbi->sb) ? "RO": "RW",
+ f2fs_cp_error(si->sbi) ? "Error": "Good");
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -349,10 +361,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
+ seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
+ si->flush_list_empty,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
@@ -365,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
seq_printf(s, " - datas: %4d in files:%4d\n",
si->ndirty_data, si->ndirty_files);
+ seq_printf(s, " - quota datas: %4d in quota files:%4d\n",
+ si->ndirty_qdata, si->nquota_files);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4d\n",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4f2a8fedb313..1955707b138b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -10,10 +10,12 @@
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
#include "xattr.h"
+#include <trace/events/f2fs.h>
static unsigned long dir_blocks(struct inode *inode)
{
@@ -847,6 +849,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct f2fs_dentry_block *dentry_blk = NULL;
struct page *dentry_page = NULL;
struct file_ra_state *ra = &file->f_ra;
+ loff_t start_pos = ctx->pos;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
struct f2fs_dentry_ptr d;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
@@ -855,24 +858,32 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
if (f2fs_encrypted_inode(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err && err != -ENOKEY)
- return err;
+ goto out;
err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
if (err < 0)
- return err;
+ goto out;
}
if (f2fs_has_inline_dentry(inode)) {
err = f2fs_read_inline_dir(file, ctx, &fstr);
- goto out;
+ goto out_free;
}
- /* readahead for multi pages of dir */
- if (npages - n > 1 && !ra_has_index(ra, n))
- page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) {
+
+ /* allow readdir() to be interrupted */
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out_free;
+ }
+ cond_resched();
+
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
@@ -880,7 +891,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
err = 0;
continue;
} else {
- goto out;
+ goto out_free;
}
}
@@ -896,12 +907,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
break;
}
- ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
-out:
+out_free:
fscrypt_fname_free_buffer(&fstr);
+out:
+ trace_f2fs_readdir(inode, start_pos, ctx->pos, err);
return err < 0 ? err : 0;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c1a0aef8efc6..983f4a7b505d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -23,14 +23,12 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/quotaops.h>
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-#include <linux/fscrypt_supp.h>
-#else
-#include <linux/fscrypt_notsupp.h>
-#endif
#include <crypto/hash.h>
#include <linux/writeback.h>
+#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
+#include <linux/fscrypt.h>
+
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
#else
@@ -47,6 +45,8 @@
enum {
FAULT_KMALLOC,
FAULT_PAGE_ALLOC,
+ FAULT_PAGE_GET,
+ FAULT_ALLOC_BIO,
FAULT_ALLOC_NID,
FAULT_ORPHAN,
FAULT_BLOCK,
@@ -94,6 +94,7 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define F2FS_MOUNT_PRJQUOTA 0x00200000
#define F2FS_MOUNT_QUOTA 0x00400000
+#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -119,6 +120,8 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_EXTRA_ATTR 0x0008
#define F2FS_FEATURE_PRJQUOTA 0x0010
#define F2FS_FEATURE_INODE_CHKSUM 0x0020
+#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
+#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -214,7 +217,7 @@ enum {
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
-#define DISCARD_ISSUE_RATE 8
+#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_CP_INTERVAL 60 /* 60 secs */
@@ -225,7 +228,6 @@ struct cp_control {
__u64 trim_start;
__u64 trim_end;
__u64 trim_minlen;
- __u64 trimmed;
};
/*
@@ -244,12 +246,14 @@ enum {
ORPHAN_INO, /* for orphan ino list */
APPEND_INO, /* for append ino list */
UPDATE_INO, /* for update ino list */
+ FLUSH_INO, /* for multiple device flushing */
MAX_INO_ENTRY, /* max. list */
};
struct ino_entry {
- struct list_head list; /* list head */
- nid_t ino; /* inode number */
+ struct list_head list; /* list head */
+ nid_t ino; /* inode number */
+ unsigned int dirty_device; /* dirty device bitmap */
};
/* for the list of inodes to be GCed */
@@ -273,10 +277,6 @@ struct discard_entry {
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : (blk_num - 1))
-#define P_ACTIVE 0x01
-#define P_TRIM 0x02
-#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM))
-
enum {
D_PREP,
D_SUBMIT,
@@ -308,12 +308,32 @@ struct discard_cmd {
int error; /* bio error */
};
+enum {
+ DPOLICY_BG,
+ DPOLICY_FORCE,
+ DPOLICY_FSTRIM,
+ DPOLICY_UMOUNT,
+ MAX_DPOLICY,
+};
+
+struct discard_policy {
+ int type; /* type of discard */
+ unsigned int min_interval; /* used for candidates exist */
+ unsigned int max_interval; /* used for candidates not exist */
+ unsigned int max_requests; /* # of discards issued per round */
+ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
+ bool io_aware; /* issue discard in idle time */
+ bool sync; /* submit discard with REQ_SYNC flag */
+ unsigned int granularity; /* discard granularity */
+};
+
struct discard_cmd_control {
struct task_struct *f2fs_issue_discard; /* discard thread */
struct list_head entry_list; /* 4KB discard entry list */
struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
struct list_head wait_list; /* store on-flushing entries */
+ struct list_head fstrim_list; /* in-flight discard from fstrim */
wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
unsigned int discard_wake; /* to wake up discard thread */
struct mutex cmd_lock;
@@ -443,11 +463,14 @@ struct f2fs_flush_device {
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
+#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
-#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
- (CUR_ADDRS_PER_INODE(inode) - \
- DEF_INLINE_RESERVED_SIZE - \
- F2FS_INLINE_XATTR_ADDRS))
+static inline int get_inline_xattr_addrs(struct inode *inode);
+#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode)
+#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
+ (CUR_ADDRS_PER_INODE(inode) - \
+ F2FS_INLINE_XATTR_ADDRS(inode) - \
+ DEF_INLINE_RESERVED_SIZE))
/* for inline dir */
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
@@ -647,6 +670,7 @@ struct f2fs_inode_info {
#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
+ struct list_head inmem_ilist; /* list for inmem inodes */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
@@ -657,6 +681,7 @@ struct f2fs_inode_info {
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
+ int i_inline_xattr_size; /* inline xattr size */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -730,10 +755,13 @@ static inline void __try_update_largest_extent(struct inode *inode,
}
}
-enum nid_list {
- FREE_NID_LIST,
- ALLOC_NID_LIST,
- MAX_NID_LIST,
+/*
+ * For free nid management
+ */
+enum nid_state {
+ FREE_NID, /* newly added to free nid list */
+ PREALLOC_NID, /* it is preallocated */
+ MAX_NID_STATE,
};
struct f2fs_nm_info {
@@ -756,8 +784,8 @@ struct f2fs_nm_info {
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
- struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
- unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
+ struct list_head free_nid_list; /* list for free nids excluding preallocated nids */
+ unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
@@ -835,6 +863,7 @@ enum {
struct flush_cmd {
struct completion wait;
struct llist_node llnode;
+ nid_t ino;
int ret;
};
@@ -853,6 +882,8 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
+ struct rw_semaphore curseg_lock; /* for preventing curseg change */
+
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -874,6 +905,7 @@ struct f2fs_sm_info {
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
+ unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
/* for flush command control */
struct flush_cmd_control *fcc_info;
@@ -895,6 +927,7 @@ struct f2fs_sm_info {
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
+ F2FS_DIRTY_QDATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
@@ -943,6 +976,18 @@ enum need_lock_type {
LOCK_RETRY,
};
+enum cp_reason_type {
+ CP_NO_NEEDED,
+ CP_NON_REGULAR,
+ CP_HARDLINK,
+ CP_SB_NEED_CP,
+ CP_WRONG_PINO,
+ CP_NO_SPC_ROLL,
+ CP_NODE_NEED_CP,
+ CP_FASTBOOT_MODE,
+ CP_SPEC_LOG_NUM,
+};
+
enum iostat_type {
APP_DIRECT_IO, /* app direct IOs */
APP_BUFFERED_IO, /* app buffered IOs */
@@ -962,6 +1007,7 @@ enum iostat_type {
struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
+ nid_t ino; /* inode number */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
enum temp_type temp; /* contains HOT/WARM/COLD */
int op; /* contains REQ_OP_ */
@@ -1006,6 +1052,7 @@ enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
DIRTY_META, /* for all dirtied inode metadata */
+ ATOMIC_FILE, /* for all atomic files */
NR_INODE_TYPE,
};
@@ -1108,12 +1155,15 @@ struct f2fs_sb_info {
loff_t max_file_blocks; /* max block index of file */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
+ int inline_xattr_size; /* inline xattr size */
+ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
block_t reserved_blocks; /* configurable reserved blocks */
+ block_t current_reserved_blocks; /* current reserved blocks */
u32 s_next_generation; /* for NFS support */
@@ -1179,6 +1229,8 @@ struct f2fs_sb_info {
struct list_head s_list;
int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */
+ unsigned int dirty_device; /* for checkpoint data flush */
+ spinlock_t dev_lock; /* protect dirty_device */
struct mutex umount_mutex;
unsigned int shrinker_run_no;
@@ -1242,8 +1294,7 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
{
- struct timespec ts = {sbi->interval_time[type], 0};
- unsigned long interval = timespec_to_jiffies(&ts);
+ unsigned long interval = sbi->interval_time[type] * HZ;
return time_after(jiffies, sbi->last_time[type] + interval);
}
@@ -1410,6 +1461,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
}
+static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type)
+{
+ if (type < F2FS_MAX_QUOTAS)
+ return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]);
+ return 0;
+}
+
static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
{
size_t crc_offset = le32_to_cpu(cp->checksum_offset);
@@ -1588,7 +1646,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
sbi->total_valid_block_count += (block_t)(*count);
- avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks;
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
*count -= diff;
@@ -1622,6 +1681,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
+ if (sbi->reserved_blocks &&
+ sbi->current_reserved_blocks < sbi->reserved_blocks)
+ sbi->current_reserved_blocks = min(sbi->reserved_blocks,
+ sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1642,6 +1705,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode)
atomic_inc(&F2FS_I(inode)->dirty_pages);
inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ if (IS_NOQUOTA(inode))
+ inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1658,6 +1723,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
atomic_dec(&F2FS_I(inode)->dirty_pages);
dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ if (IS_NOQUOTA(inode))
+ dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1765,10 +1832,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_BLOCK)) {
+ f2fs_show_injection_info(FAULT_BLOCK);
+ goto enospc;
+ }
+#endif
+
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + 1;
- if (unlikely(valid_block_count + sbi->reserved_blocks >
+ if (unlikely(valid_block_count + sbi->current_reserved_blocks >
sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
@@ -1811,6 +1885,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
+ if (sbi->reserved_blocks &&
+ sbi->current_reserved_blocks < sbi->reserved_blocks)
+ sbi->current_reserved_blocks++;
spin_unlock(&sbi->stat_lock);
@@ -1857,6 +1934,19 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
}
+static inline struct page *f2fs_pagecache_get_page(
+ struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp_mask)
+{
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
+ f2fs_show_injection_info(FAULT_PAGE_GET);
+ return NULL;
+ }
+#endif
+ return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
+}
+
static inline void f2fs_copy_page(struct page *src, struct page *dst)
{
char *src_kaddr = kmap(src);
@@ -1906,15 +1996,25 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
return entry;
}
-static inline struct bio *f2fs_bio_alloc(int npages)
+static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
+ int npages, bool no_fail)
{
struct bio *bio;
- /* No failure on bio allocation */
- bio = bio_alloc(GFP_NOIO, npages);
- if (!bio)
- bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
- return bio;
+ if (no_fail) {
+ /* No failure on bio allocation */
+ bio = bio_alloc(GFP_NOIO, npages);
+ if (!bio)
+ bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
+ return bio;
+ }
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
+ f2fs_show_injection_info(FAULT_ALLOC_BIO);
+ return NULL;
+ }
+#endif
+ return bio_alloc(GFP_KERNEL, npages);
}
static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
@@ -2224,25 +2324,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
static inline unsigned int addrs_per_inode(struct inode *inode)
{
- if (f2fs_has_inline_xattr(inode))
- return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS;
- return CUR_ADDRS_PER_INODE(inode);
+ return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode);
}
-static inline void *inline_xattr_addr(struct page *page)
+static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
- F2FS_INLINE_XATTR_ADDRS]);
+ F2FS_INLINE_XATTR_ADDRS(inode)]);
}
static inline int inline_xattr_size(struct inode *inode)
{
- if (f2fs_has_inline_xattr(inode))
- return F2FS_INLINE_XATTR_ADDRS << 2;
- else
- return 0;
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2323,9 +2418,10 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
+ bool ret;
+
if (dsync) {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- bool ret;
spin_lock(&sbi->inode_lock[DIRTY_META]);
ret = list_empty(&F2FS_I(inode)->gdirty_list);
@@ -2336,9 +2432,15 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
file_keep_isize(inode) ||
i_size_read(inode) & PAGE_MASK)
return false;
- return F2FS_I(inode)->last_disk_size == i_size_read(inode);
+
+ down_read(&F2FS_I(inode)->i_sem);
+ ret = F2FS_I(inode)->last_disk_size == i_size_read(inode);
+ up_read(&F2FS_I(inode)->i_sem);
+
+ return ret;
}
+#define sb_rdonly f2fs_readonly
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
@@ -2406,6 +2508,12 @@ static inline int get_extra_isize(struct inode *inode)
return F2FS_I(inode)->i_extra_isize / sizeof(__le32);
}
+static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb);
+static inline int get_inline_xattr_addrs(struct inode *inode)
+{
+ return F2FS_I(inode)->i_inline_xattr_size;
+}
+
#define get_inode_mode(i) \
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -2534,7 +2642,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
*/
int f2fs_inode_dirtied(struct inode *inode, bool sync);
void f2fs_inode_synced(struct inode *inode);
-void f2fs_enable_quota_files(struct f2fs_sb_info *sbi);
+int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
void f2fs_quota_off_umount(struct super_block *sb);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -2562,7 +2670,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int truncate_inode_blocks(struct inode *inode, pgoff_t from);
-int truncate_xattr_node(struct inode *inode, struct page *page);
+int truncate_xattr_node(struct inode *inode);
int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
int remove_inode_page(struct inode *inode);
struct page *new_inode_page(struct inode *inode);
@@ -2597,19 +2705,22 @@ void destroy_node_manager_caches(void);
*/
bool need_SSR(struct f2fs_sb_info *sbi);
void register_inmem_page(struct inode *inode, struct page *page);
+void drop_inmem_pages_all(struct f2fs_sb_info *sbi);
void drop_inmem_pages(struct inode *inode);
void drop_inmem_page(struct inode *inode, struct page *page);
int commit_inmem_pages(struct inode *inode);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
-int f2fs_issue_flush(struct f2fs_sb_info *sbi);
+int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
-void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
+ unsigned int granularity);
void stop_discard_thread(struct f2fs_sb_info *sbi);
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount);
+bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void release_discard_addrs(struct f2fs_sb_info *sbi);
int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
@@ -2664,6 +2775,10 @@ void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type);
+bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type);
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
int acquire_orphan_inode(struct f2fs_sb_info *sbi);
void release_orphan_inode(struct f2fs_sb_info *sbi);
@@ -2751,14 +2866,16 @@ struct f2fs_stat_info {
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, zombie_tree, ext_node;
- int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
+ int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
+ int ndirty_data, ndirty_qdata;
int inmem_pages;
- unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+ unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits;
int free_nids, avail_nids, alloc_nids;
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data;
- int nr_flushing, nr_flushed, nr_discarding, nr_discarded;
+ int nr_flushing, nr_flushed, flush_list_empty;
+ int nr_discarding, nr_discarded;
int nr_discard_cmd;
unsigned int undiscard_blks;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
@@ -3033,6 +3150,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
{
#ifdef CONFIG_F2FS_FS_ENCRYPTION
file_set_encrypt(inode);
+ inode->i_flags |= S_ENCRYPTED;
#endif
}
@@ -3066,6 +3184,16 @@ static inline int f2fs_sb_has_inode_chksum(struct super_block *sb)
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM);
}
+static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb)
+{
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
+}
+
+static inline int f2fs_sb_has_quota_ino(struct super_block *sb)
+{
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO);
+}
+
#ifdef CONFIG_BLK_DEV_ZONED
static inline int get_blkz_type(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkaddr)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a9e1655a6bf8..bfff53f658e1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -56,6 +56,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct dnode_of_data dn;
int err;
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto err;
+ }
+
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -117,6 +122,7 @@ out_sem:
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
+err:
return block_page_mkwrite_return(err);
}
@@ -141,27 +147,29 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
return 1;
}
-static inline bool need_do_checkpoint(struct inode *inode)
+static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- bool need_cp = false;
+ enum cp_reason_type cp_reason = CP_NO_NEEDED;
- if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
- need_cp = true;
+ if (!S_ISREG(inode->i_mode))
+ cp_reason = CP_NON_REGULAR;
+ else if (inode->i_nlink != 1)
+ cp_reason = CP_HARDLINK;
else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
- need_cp = true;
+ cp_reason = CP_SB_NEED_CP;
else if (file_wrong_pino(inode))
- need_cp = true;
+ cp_reason = CP_WRONG_PINO;
else if (!space_for_roll_forward(sbi))
- need_cp = true;
+ cp_reason = CP_NO_SPC_ROLL;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
- need_cp = true;
+ cp_reason = CP_NODE_NEED_CP;
else if (test_opt(sbi, FASTBOOT))
- need_cp = true;
+ cp_reason = CP_FASTBOOT_MODE;
else if (sbi->active_logs == 2)
- need_cp = true;
+ cp_reason = CP_SPEC_LOG_NUM;
- return need_cp;
+ return cp_reason;
}
static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
@@ -196,7 +204,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t ino = inode->i_ino;
int ret = 0;
- bool need_cp = false;
+ enum cp_reason_type cp_reason = 0;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
@@ -215,7 +223,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
clear_inode_flag(inode, FI_NEED_IPU);
if (ret) {
- trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
return ret;
}
@@ -246,10 +254,10 @@ go_write:
* sudden-power-off.
*/
down_read(&F2FS_I(inode)->i_sem);
- need_cp = need_do_checkpoint(inode);
+ cp_reason = need_do_checkpoint(inode);
up_read(&F2FS_I(inode)->i_sem);
- if (need_cp) {
+ if (cp_reason) {
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
@@ -297,19 +305,24 @@ sync_nodes:
remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
- remove_ino_entry(sbi, ino, UPDATE_INO);
- clear_inode_flag(inode, FI_UPDATE_WRITE);
if (!atomic)
- ret = f2fs_issue_flush(sbi);
+ ret = f2fs_issue_flush(sbi, inode->i_ino);
+ if (!ret) {
+ remove_ino_entry(sbi, ino, UPDATE_INO);
+ clear_inode_flag(inode, FI_UPDATE_WRITE);
+ remove_ino_entry(sbi, ino, FLUSH_INO);
+ }
f2fs_update_time(sbi, REQ_TIME);
out:
- trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
f2fs_trace_ios(NULL, 1);
return ret;
}
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
+ return -EIO;
return f2fs_do_sync_file(file, start, end, datasync, false);
}
@@ -446,6 +459,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file_inode(file);
int err;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
/* we don't need to use inline_data strictly */
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -632,6 +648,9 @@ int f2fs_truncate(struct inode *inode)
{
int err;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return 0;
@@ -667,7 +686,8 @@ int f2fs_getattr(struct vfsmount *mnt,
generic_fillattr(inode, stat);
/* we need to show initial sectors used for inline_data/dentries */
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
+ f2fs_has_inline_dentry(inode))
stat->blocks += (stat->size + 511) >> 9;
return 0;
@@ -709,6 +729,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
int err;
bool size_changed = false;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
err = inode_change_ok(inode, attr);
if (err)
return err;
@@ -761,6 +784,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_mtime = inode->i_ctime = current_time(inode);
}
+ down_write(&F2FS_I(inode)->i_sem);
+ F2FS_I(inode)->last_disk_size = i_size_read(inode);
+ up_write(&F2FS_I(inode)->i_sem);
+
size_changed = true;
}
@@ -834,7 +861,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
if (err) {
if (err == -ENOENT) {
- pg_start++;
+ pg_start = get_next_page_offset(&dn, pg_start);
continue;
}
return err;
@@ -1149,11 +1176,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
goto out;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
truncate_pagecache(inode, offset);
ret = f2fs_do_collapse(inode, pg_start, pg_end);
if (ret)
- goto out;
+ goto out_unlock;
/* write out all moved pages, if possible */
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1165,7 +1195,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = truncate_blocks(inode, new_size, true);
if (!ret)
f2fs_i_size_write(inode, new_size);
-
+out_unlock:
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
@@ -1348,6 +1379,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
goto out;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
truncate_pagecache(inode, offset);
pg_start = offset >> PAGE_SHIFT;
@@ -1375,6 +1409,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret)
f2fs_i_size_write(inode, new_size);
+
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
@@ -1424,8 +1460,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
}
- if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
- f2fs_i_size_write(inode, new_size);
+ if (new_size > i_size_read(inode)) {
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ file_set_keep_isize(inode);
+ else
+ f2fs_i_size_write(inode, new_size);
+ }
return err;
}
@@ -1436,6 +1476,9 @@ static long f2fs_fallocate(struct file *file, int mode,
struct inode *inode = file_inode(file);
long ret = 0;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
/* f2fs only support ->fallocate for regular file */
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -1469,8 +1512,6 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, false);
- if (mode & FALLOC_FL_KEEP_SIZE)
- file_set_keep_isize(inode);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
@@ -1864,6 +1905,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ if (!f2fs_sb_has_crypto(inode->i_sb))
+ return -EOPNOTSUPP;
+
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
@@ -1871,6 +1915,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
+ if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb))
+ return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -2226,9 +2272,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
+ down_write(&F2FS_I(src)->dio_rwsem[WRITE]);
if (src != dst) {
- if (!inode_trylock(dst)) {
- ret = -EBUSY;
+ ret = -EBUSY;
+ if (!inode_trylock(dst))
+ goto out;
+ if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) {
+ inode_unlock(dst);
goto out;
}
}
@@ -2288,9 +2338,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
f2fs_unlock_op(sbi);
out_unlock:
- if (src != dst)
+ if (src != dst) {
+ up_write(&F2FS_I(dst)->dio_rwsem[WRITE]);
inode_unlock(dst);
+ }
out:
+ up_write(&F2FS_I(src)->dio_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
@@ -2412,6 +2465,9 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
+ return -EIO;
+
switch (cmd) {
case F2FS_IOC_GETFLAGS:
return f2fs_ioc_getflags(filp, arg);
@@ -2465,6 +2521,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct blk_plug plug;
ssize_t ret;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
@@ -2475,6 +2534,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
err = f2fs_preallocate_blocks(iocb, from);
if (err) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
inode_unlock(inode);
return err;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index bd16e6631cf3..be9fd616736b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -267,16 +267,6 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
-static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
- unsigned int segno)
-{
- unsigned int valid_blocks =
- get_valid_blocks(sbi, segno, true);
-
- return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
- valid_blocks * 2 : valid_blocks;
-}
-
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
@@ -285,7 +275,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
- return get_greedy_cost(sbi, segno);
+ return get_valid_blocks(sbi, segno, true);
else
return get_cb_cost(sbi, segno);
}
@@ -466,10 +456,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
struct seg_entry *sentry;
int ret;
- mutex_lock(&sit_i->sentry_lock);
+ down_read(&sit_i->sentry_lock);
sentry = get_seg_entry(sbi, segno);
ret = f2fs_test_bit(offset, sentry->cur_valid_map);
- mutex_unlock(&sit_i->sentry_lock);
+ up_read(&sit_i->sentry_lock);
return ret;
}
@@ -608,6 +598,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
+ .ino = inode->i_ino,
.type = DATA,
.temp = COLD,
.op = REQ_OP_READ,
@@ -659,8 +650,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA, NULL, false);
- fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
- FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
+ newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
goto recover_block;
@@ -738,6 +729,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
} else {
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
+ .ino = inode->i_ino,
.type = DATA,
.temp = COLD,
.op = REQ_OP_WRITE,
@@ -840,10 +832,17 @@ next_step:
continue;
}
+ if (!down_write_trylock(
+ &F2FS_I(inode)->dio_rwsem[WRITE])) {
+ iput(inode);
+ continue;
+ }
+
start_bidx = start_bidx_of_node(nofs, inode);
data_page = get_read_data_page(inode,
start_bidx + ofs_in_node, REQ_RAHEAD,
true);
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
continue;
@@ -901,10 +900,10 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
struct sit_info *sit_i = SIT_I(sbi);
int ret;
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
NO_CHECK_TYPE, LFS);
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
return ret;
}
@@ -952,8 +951,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
/*
* this is to avoid deadlock:
* - lock_page(sum_page) - f2fs_replace_block
- * - check_valid_map() - mutex_lock(sentry_lock)
- * - mutex_lock(sentry_lock) - change_curseg()
+ * - check_valid_map() - down_write(sentry_lock)
+ * - down_read(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index fbf22b0f667f..91d5d831be72 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -130,6 +130,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(dn->inode),
+ .ino = dn->inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 50c88e37ed66..b4c4f2b25304 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -43,8 +43,11 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
+ if (f2fs_encrypted_inode(inode))
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
+ S_ENCRYPTED);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -232,6 +235,23 @@ static int do_read_inode(struct inode *inode)
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
+ fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
+ } else if (f2fs_has_inline_xattr(inode) ||
+ f2fs_has_inline_dentry(inode)) {
+ fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ } else {
+
+ /*
+ * Previous inline data or directory always reserved 200 bytes
+ * in inode layout, even if inline_xattr is disabled. In order
+ * to keep inline_dentry's structure for backward compatibility,
+ * we get the space back only from inline_data.
+ */
+ fi->i_inline_xattr_size = 0;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -384,6 +404,10 @@ int update_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
+ if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+ ri->i_inline_xattr_size =
+ cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
+
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
@@ -480,6 +504,7 @@ void f2fs_evict_inode(struct inode *inode)
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
@@ -519,8 +544,10 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
- if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))
+ if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
+ else
+ f2fs_inode_synced(inode);
/* ino == 0, if f2fs_new_inode() was failed t*/
if (inode->i_ino)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d92b8e9064cb..cf8f4370d256 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -29,6 +29,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_t ino;
struct inode *inode;
bool nid_free = false;
+ int xattr_size = 0;
int err;
inode = new_inode(dir->i_sb);
@@ -86,11 +87,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
+
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(inode, FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
+ if (f2fs_has_inline_xattr(inode))
+ xattr_size = sbi->inline_xattr_size;
+ /* Otherwise, will be 0 */
+ } else if (f2fs_has_inline_xattr(inode) ||
+ f2fs_has_inline_dentry(inode)) {
+ xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ }
+ F2FS_I(inode)->i_inline_xattr_size = xattr_size;
+
f2fs_init_extent_tree(inode, NULL);
stat_inc_inline_xattr(inode);
@@ -177,6 +190,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -221,6 +237,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if (f2fs_encrypted_inode(dir) &&
!fscrypt_has_permitted_context(dir, inode))
return -EPERM;
@@ -331,12 +350,15 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct f2fs_dir_entry *de;
struct page *page;
- nid_t ino;
+ struct dentry *new;
+ nid_t ino = -1;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
+ trace_f2fs_lookup_start(dir, dentry, flags);
+
if (f2fs_encrypted_inode(dir)) {
- int res = fscrypt_get_encryption_info(dir);
+ err = fscrypt_get_encryption_info(dir);
/*
* DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
@@ -346,18 +368,22 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (fscrypt_has_encryption_key(dir))
fscrypt_set_encrypted_dentry(dentry);
fscrypt_set_d_op(dentry);
- if (res && res != -ENOKEY)
- return ERR_PTR(res);
+ if (err && err != -ENOKEY)
+ goto out;
}
- if (dentry->d_name.len > F2FS_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
+ if (dentry->d_name.len > F2FS_NAME_LEN) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
- if (IS_ERR(page))
- return (struct dentry *)page;
- return d_splice_alias(inode, dentry);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+ goto out_splice;
}
ino = le32_to_cpu(de->ino);
@@ -365,19 +391,21 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
f2fs_put_page(page, 0);
inode = f2fs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) {
err = __recover_dot_dentries(dir, root_ino);
if (err)
- goto err_out;
+ goto out_iput;
}
if (f2fs_has_inline_dots(inode)) {
err = __recover_dot_dentries(inode, dir->i_ino);
if (err)
- goto err_out;
+ goto out_iput;
}
if (f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
@@ -386,12 +414,18 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
"Inconsistent encryption contexts: %lu/%lu",
dir->i_ino, inode->i_ino);
err = -EPERM;
- goto err_out;
+ goto out_iput;
}
- return d_splice_alias(inode, dentry);
-
-err_out:
+out_splice:
+ new = d_splice_alias(inode, dentry);
+ if (IS_ERR(new))
+ err = PTR_ERR(new);
+ trace_f2fs_lookup_end(dir, dentry, ino, err);
+ return new;
+out_iput:
iput(inode);
+out:
+ trace_f2fs_lookup_end(dir, dentry, ino, err);
return ERR_PTR(err);
}
@@ -405,9 +439,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
trace_f2fs_unlink_enter(dir, dentry);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
@@ -457,6 +497,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct fscrypt_symlink_data *sd = NULL;
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if (f2fs_encrypted_inode(dir)) {
err = fscrypt_get_encryption_info(dir);
if (err)
@@ -563,6 +606,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -615,6 +661,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -709,6 +758,9 @@ out:
static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
+ return -EIO;
+
if (f2fs_encrypted_inode(dir)) {
int err = fscrypt_get_encryption_info(dir);
if (err)
@@ -720,6 +772,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
+ return -EIO;
+
return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
}
@@ -739,6 +794,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
@@ -764,6 +822,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto out;
+ if (new_inode) {
+ err = dquot_initialize(new_inode);
+ if (err)
+ goto out;
+ }
+
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -932,6 +996,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 32474db18ad9..964c99655942 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -46,7 +46,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
* give 25%, 25%, 50%, 50%, 50% memory for each components respectively
*/
if (type == FREE_NIDS) {
- mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
+ mem_size = (nm_i->nid_cnt[FREE_NID] *
sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
@@ -63,7 +63,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
} else if (type == INO_ENTRIES) {
int i;
- for (i = 0; i <= UPDATE_INO; i++)
+ for (i = 0; i < MAX_INO_ENTRY; i++)
mem_size += sbi->im[i].ino_num *
sizeof(struct ino_entry);
mem_size >>= PAGE_SHIFT;
@@ -74,6 +74,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == INMEM_PAGES) {
+ /* it allows 20% / total_ram for inmemory pages */
+ mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
+ res = mem_size < (val.totalram / 5);
} else {
if (!sbi->sb->s_bdi->wb.dirty_exceeded)
return true;
@@ -134,6 +138,44 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
+static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
+{
+ struct nat_entry *new;
+
+ if (no_fail)
+ new = f2fs_kmem_cache_alloc(nat_entry_slab,
+ GFP_NOFS | __GFP_ZERO);
+ else
+ new = kmem_cache_alloc(nat_entry_slab,
+ GFP_NOFS | __GFP_ZERO);
+ if (new) {
+ nat_set_nid(new, nid);
+ nat_reset_flag(new);
+ }
+ return new;
+}
+
+static void __free_nat_entry(struct nat_entry *e)
+{
+ kmem_cache_free(nat_entry_slab, e);
+}
+
+/* must be locked by nat_tree_lock */
+static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
+ struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
+{
+ if (no_fail)
+ f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
+ else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
+ return NULL;
+
+ if (raw_ne)
+ node_info_from_raw_nat(&ne->ni, raw_ne);
+ list_add_tail(&ne->list, &nm_i->nat_entries);
+ nm_i->nat_cnt++;
+ return ne;
+}
+
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
@@ -150,7 +192,7 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
- kmem_cache_free(nat_entry_slab, e);
+ __free_nat_entry(e);
}
static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -246,49 +288,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
return need_update;
}
-static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
- bool no_fail)
-{
- struct nat_entry *new;
-
- if (no_fail) {
- new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
- f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
- } else {
- new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
- if (!new)
- return NULL;
- if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
- kmem_cache_free(nat_entry_slab, new);
- return NULL;
- }
- }
-
- memset(new, 0, sizeof(struct nat_entry));
- nat_set_nid(new, nid);
- nat_reset_flag(new);
- list_add_tail(&new->list, &nm_i->nat_entries);
- nm_i->nat_cnt++;
- return new;
-}
-
+/* must be locked by nat_tree_lock */
static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry *ne)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct nat_entry *e;
+ struct nat_entry *new, *e;
+ new = __alloc_nat_entry(nid, false);
+ if (!new)
+ return;
+
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
- if (!e) {
- e = grab_nat_entry(nm_i, nid, false);
- if (e)
- node_info_from_raw_nat(&e->ni, ne);
- } else {
+ if (!e)
+ e = __init_nat_entry(nm_i, new, ne, false);
+ else
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
le32_to_cpu(ne->block_addr) ||
nat_get_version(e) != ne->version);
- }
+ up_write(&nm_i->nat_tree_lock);
+ if (e != new)
+ __free_nat_entry(new);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -296,11 +318,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
+ struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
- e = grab_nat_entry(nm_i, ni->nid, true);
+ e = __init_nat_entry(nm_i, new, NULL, true);
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -312,6 +335,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
}
+ /* let's free early to reduce memory consumption */
+ if (e != new)
+ __free_nat_entry(new);
/* sanity check */
f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
@@ -327,10 +353,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
unsigned char version = nat_get_version(e);
nat_set_version(e, inc_node_version(version));
-
- /* in order to reuse the nid */
- if (nm_i->next_scan_nid > ni->nid)
- nm_i->next_scan_nid = ni->nid;
}
/* change address */
@@ -424,9 +446,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
f2fs_put_page(page, 1);
cache:
/* cache nat entry */
- down_write(&nm_i->nat_tree_lock);
cache_nat_entry(sbi, nid, &ne);
- up_write(&nm_i->nat_tree_lock);
}
/*
@@ -962,7 +982,8 @@ fail:
return err > 0 ? 0 : err;
}
-int truncate_xattr_node(struct inode *inode, struct page *page)
+/* caller must lock inode page */
+int truncate_xattr_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t nid = F2FS_I(inode)->i_xattr_nid;
@@ -978,10 +999,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, page, npage, nid);
-
- if (page)
- dn.inode_page_locked = true;
+ set_new_dnode(&dn, inode, NULL, npage, nid);
truncate_node(&dn);
return 0;
}
@@ -1000,7 +1018,7 @@ int remove_inode_page(struct inode *inode)
if (err)
return err;
- err = truncate_xattr_node(inode, dn.inode_page);
+ err = truncate_xattr_node(inode);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -1220,7 +1238,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
if (!inode)
return;
- page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
+ page = f2fs_pagecache_get_page(inode->i_mapping, 0,
+ FGP_LOCK|FGP_NOWAIT, 0);
if (!page)
goto iput_out;
@@ -1244,37 +1263,6 @@ iput_out:
iput(inode);
}
-void move_node_page(struct page *node_page, int gc_type)
-{
- if (gc_type == FG_GC) {
- struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 1,
- .for_reclaim = 0,
- };
-
- set_page_dirty(node_page);
- f2fs_wait_on_page_writeback(node_page, NODE, true);
-
- f2fs_bug_on(sbi, PageWriteback(node_page));
- if (!clear_page_dirty_for_io(node_page))
- goto out_page;
-
- if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
- unlock_page(node_page);
- goto release_page;
- } else {
- /* set page dirty and write it */
- if (!PageWriteback(node_page))
- set_page_dirty(node_page);
- }
-out_page:
- unlock_page(node_page);
-release_page:
- f2fs_put_page(node_page, 0);
-}
-
static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
{
pgoff_t index, end;
@@ -1344,6 +1332,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct node_info ni;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = ino_of_node(page),
.type = NODE,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
@@ -1416,6 +1405,37 @@ redirty_out:
return AOP_WRITEPAGE_ACTIVATE;
}
+void move_node_page(struct page *node_page, int gc_type)
+{
+ if (gc_type == FG_GC) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 1,
+ .for_reclaim = 0,
+ };
+
+ set_page_dirty(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE, true);
+
+ f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
+ if (!clear_page_dirty_for_io(node_page))
+ goto out_page;
+
+ if (__write_node_page(node_page, false, NULL,
+ &wbc, false, FS_GC_NODE_IO))
+ unlock_page(node_page);
+ goto release_page;
+ } else {
+ /* set page dirty and write it */
+ if (!PageWriteback(node_page))
+ set_page_dirty(node_page);
+ }
+out_page:
+ unlock_page(node_page);
+release_page:
+ f2fs_put_page(node_page, 0);
+}
+
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
@@ -1764,35 +1784,54 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
- struct free_nid *i, enum nid_list list, bool new)
+static int __insert_free_nid(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_state state)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (new) {
- int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
- if (err)
- return err;
- }
+ int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
+ if (err)
+ return err;
- f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
- i->state != NID_ALLOC);
- nm_i->nid_cnt[list]++;
- list_add_tail(&i->list, &nm_i->nid_list[list]);
+ f2fs_bug_on(sbi, state != i->state);
+ nm_i->nid_cnt[state]++;
+ if (state == FREE_NID)
+ list_add_tail(&i->list, &nm_i->free_nid_list);
return 0;
}
-static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
- struct free_nid *i, enum nid_list list, bool reuse)
+static void __remove_free_nid(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_state state)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ f2fs_bug_on(sbi, state != i->state);
+ nm_i->nid_cnt[state]--;
+ if (state == FREE_NID)
+ list_del(&i->list);
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
+}
+
+static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
+ enum nid_state org_state, enum nid_state dst_state)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
- i->state != NID_ALLOC);
- nm_i->nid_cnt[list]--;
- list_del(&i->list);
- if (!reuse)
- radix_tree_delete(&nm_i->free_nid_root, i->nid);
+ f2fs_bug_on(sbi, org_state != i->state);
+ i->state = dst_state;
+ nm_i->nid_cnt[org_state]--;
+ nm_i->nid_cnt[dst_state]++;
+
+ switch (dst_state) {
+ case PREALLOC_NID:
+ list_del(&i->list);
+ break;
+ case FREE_NID:
+ list_add_tail(&i->list, &nm_i->free_nid_list);
+ break;
+ default:
+ BUG_ON(1);
+ }
}
/* return if the nid is recognized as free */
@@ -1810,7 +1849,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
- i->state = NID_NEW;
+ i->state = FREE_NID;
if (radix_tree_preload(GFP_NOFS))
goto err;
@@ -1823,7 +1862,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
* - f2fs_create
* - f2fs_new_inode
* - alloc_nid
- * - __insert_nid_to_list(ALLOC_NID_LIST)
+ * - __insert_nid_to_list(PREALLOC_NID)
* - f2fs_balance_fs_bg
* - build_free_nids
* - __build_free_nids
@@ -1836,8 +1875,8 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
* - new_node_page
* - set_node_addr
* - alloc_nid_done
- * - __remove_nid_from_list(ALLOC_NID_LIST)
- * - __insert_nid_to_list(FREE_NID_LIST)
+ * - __remove_nid_from_list(PREALLOC_NID)
+ * - __insert_nid_to_list(FREE_NID)
*/
ne = __lookup_nat_cache(nm_i, nid);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
@@ -1846,13 +1885,13 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
e = __lookup_free_nid_list(nm_i, nid);
if (e) {
- if (e->state == NID_NEW)
+ if (e->state == FREE_NID)
ret = true;
goto err_out;
}
}
ret = true;
- err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
+ err = __insert_free_nid(sbi, i, FREE_NID);
err_out:
spin_unlock(&nm_i->nid_list_lock);
radix_tree_preload_end();
@@ -1870,8 +1909,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
- if (i && i->state == NID_NEW) {
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ if (i && i->state == FREE_NID) {
+ __remove_free_nid(sbi, i, FREE_NID);
need_free = true;
}
spin_unlock(&nm_i->nid_list_lock);
@@ -1890,15 +1929,18 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
return;
- if (set)
+ if (set) {
+ if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+ return;
__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
- else
- __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
-
- if (set)
nm_i->free_nid_count[nat_ofs]++;
- else if (!build)
- nm_i->free_nid_count[nat_ofs]--;
+ } else {
+ if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+ return;
+ __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ if (!build)
+ nm_i->free_nid_count[nat_ofs]--;
+ }
}
static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1933,12 +1975,32 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
}
}
-static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+static void scan_curseg_cache(struct f2fs_sb_info *sbi)
{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
+ int i;
+
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
+ block_t addr;
+ nid_t nid;
+
+ addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+ nid = le32_to_cpu(nid_in_journal(journal, i));
+ if (addr == NULL_ADDR)
+ add_free_nid(sbi, nid, true);
+ else
+ remove_free_nid(sbi, nid);
+ }
+ up_read(&curseg->journal_rwsem);
+}
+
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int i, idx;
+ nid_t nid;
down_read(&nm_i->nat_tree_lock);
@@ -1948,40 +2010,27 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
if (!nm_i->free_nid_count[i])
continue;
for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
- nid_t nid;
-
- if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
- continue;
+ idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
+ NAT_ENTRY_PER_BLOCK, idx);
+ if (idx >= NAT_ENTRY_PER_BLOCK)
+ break;
nid = i * NAT_ENTRY_PER_BLOCK + idx;
add_free_nid(sbi, nid, true);
- if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
+ if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
goto out;
}
}
out:
- down_read(&curseg->journal_rwsem);
- for (i = 0; i < nats_in_cursum(journal); i++) {
- block_t addr;
- nid_t nid;
+ scan_curseg_cache(sbi);
- addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
- nid = le32_to_cpu(nid_in_journal(journal, i));
- if (addr == NULL_ADDR)
- add_free_nid(sbi, nid, true);
- else
- remove_free_nid(sbi, nid);
- }
- up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
}
static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_journal *journal = curseg->journal;
int i = 0;
nid_t nid = nm_i->next_scan_nid;
@@ -1989,7 +2038,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
nid = 0;
/* Enough entries */
- if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
+ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return;
if (!sync && !available_free_memory(sbi, FREE_NIDS))
@@ -1999,7 +2048,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
- if (nm_i->nid_cnt[FREE_NID_LIST])
+ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return;
}
@@ -2027,18 +2076,8 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
nm_i->next_scan_nid = nid;
/* find free nids from current sum_pages */
- down_read(&curseg->journal_rwsem);
- for (i = 0; i < nats_in_cursum(journal); i++) {
- block_t addr;
+ scan_curseg_cache(sbi);
- addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
- nid = le32_to_cpu(nid_in_journal(journal, i));
- if (addr == NULL_ADDR)
- add_free_nid(sbi, nid, true);
- else
- remove_free_nid(sbi, nid);
- }
- up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
@@ -2076,15 +2115,13 @@ retry:
}
/* We should not use stale free nids created by build_free_nids */
- if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
- f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
- i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
+ if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
+ i = list_first_entry(&nm_i->free_nid_list,
struct free_nid, list);
*nid = i->nid;
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
- i->state = NID_ALLOC;
- __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
+ __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
nm_i->available_nids--;
update_free_nid_bitmap(sbi, *nid, false, false);
@@ -2110,7 +2147,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(sbi, !i);
- __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
+ __remove_free_nid(sbi, i, PREALLOC_NID);
spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
@@ -2133,12 +2170,10 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_bug_on(sbi, !i);
if (!available_free_memory(sbi, FREE_NIDS)) {
- __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
+ __remove_free_nid(sbi, i, PREALLOC_NID);
need_free = true;
} else {
- __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
- i->state = NID_NEW;
- __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
+ __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
}
nm_i->available_nids++;
@@ -2157,20 +2192,19 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
struct free_nid *i, *next;
int nr = nr_shrink;
- if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
+ if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
return 0;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->nid_list_lock);
- list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
- list) {
+ list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 ||
- nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
+ nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
break;
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ __remove_free_nid(sbi, i, FREE_NID);
kmem_cache_free(free_nid_slab, i);
nr_shrink--;
}
@@ -2196,8 +2230,8 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
goto update_inode;
}
- dst_addr = inline_xattr_addr(ipage);
- src_addr = inline_xattr_addr(page);
+ dst_addr = inline_xattr_addr(inode, ipage);
+ src_addr = inline_xattr_addr(inode, page);
inline_size = inline_xattr_size(inode);
f2fs_wait_on_page_writeback(ipage, NODE, true);
@@ -2286,6 +2320,12 @@ retry:
dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
+
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+ F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
+ i_inline_xattr_size))
+ dst->i_inline_xattr_size = src->i_inline_xattr_size;
+
if (f2fs_sb_has_project_quota(sbi->sb) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
@@ -2357,8 +2397,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- ne = grab_nat_entry(nm_i, nid, true);
- node_info_from_raw_nat(&ne->ni, &raw_ne);
+ ne = __alloc_nat_entry(nid, true);
+ __init_nat_entry(nm_i, ne, &raw_ne, true);
}
/*
@@ -2404,15 +2444,17 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
struct f2fs_nat_block *nat_blk = page_address(page);
int valid = 0;
- int i;
+ int i = 0;
if (!enabled_nat_bits(sbi, NULL))
return;
- for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
- if (start_nid == 0 && i == 0)
- valid++;
- if (nat_blk->entries[i].block_addr)
+ if (nat_index == 0) {
+ valid = 1;
+ i = 1;
+ }
+ for (; i < NAT_ENTRY_PER_BLOCK; i++) {
+ if (nat_blk->entries[i].block_addr != NULL_ADDR)
valid++;
}
if (valid == 0) {
@@ -2607,7 +2649,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
__set_bit_le(i, nm_i->nat_block_bitmap);
nid = i * NAT_ENTRY_PER_BLOCK;
- last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+ last_nid = nid + NAT_ENTRY_PER_BLOCK;
spin_lock(&NM_I(sbi)->nid_list_lock);
for (; nid < last_nid; nid++)
@@ -2642,16 +2684,15 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
F2FS_RESERVED_NODE_NUM;
- nm_i->nid_cnt[FREE_NID_LIST] = 0;
- nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
+ nm_i->nid_cnt[FREE_NID] = 0;
+ nm_i->nid_cnt[PREALLOC_NID] = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
- INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
- INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
+ INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -2743,16 +2784,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy free nid list */
spin_lock(&nm_i->nid_list_lock);
- list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
- list) {
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
+ __remove_free_nid(sbi, i, FREE_NID);
spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
spin_lock(&nm_i->nid_list_lock);
}
- f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
- f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
- f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
+ f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
+ f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
+ f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
spin_unlock(&nm_i->nid_list_lock);
/* destroy nat cache */
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index bb53e9955ff2..0ee3e5ff49a3 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -140,6 +140,7 @@ enum mem_type {
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
EXTENT_CACHE, /* indicates extent cache */
+ INMEM_PAGES, /* indicates inmemory pages */
BASE_CHECK, /* check kernel status */
};
@@ -150,18 +151,10 @@ struct nat_entry_set {
unsigned int entry_cnt; /* the # of nat entries in set */
};
-/*
- * For free nid mangement
- */
-enum nid_state {
- NID_NEW, /* newly added to free nid list */
- NID_ALLOC /* it is allocated */
-};
-
struct free_nid {
struct list_head list; /* for free node id list */
nid_t nid; /* node id */
- int state; /* in use or not: NID_NEW or NID_ALLOC */
+ int state; /* in use or not: FREE_NID or PREALLOC_NID */
};
static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
@@ -170,12 +163,11 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *fnid;
spin_lock(&nm_i->nid_list_lock);
- if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) {
+ if (nm_i->nid_cnt[FREE_NID] <= 0) {
spin_unlock(&nm_i->nid_list_lock);
return;
}
- fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
- struct free_nid, list);
+ fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->nid_list_lock);
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9626758bc762..92c57ace1939 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -594,6 +594,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+#ifdef CONFIG_QUOTA
+ int quota_enabled;
+#endif
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
@@ -604,7 +607,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
- f2fs_enable_quota_files(sbi);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -665,7 +668,8 @@ skip:
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
- f2fs_quota_off_umount(sbi->sb);
+ if (quota_enabled)
+ f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f5c494389483..94939a5a96c8 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
return true;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
- 2 * reserved_sections(sbi));
+ SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
}
void register_inmem_page(struct inode *inode, struct page *page)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
@@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page)
mutex_lock(&fi->inmem_lock);
get_page(page);
list_add_tail(&new->list, &fi->inmem_pages);
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (list_empty(&fi->inmem_ilist))
+ list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
mutex_unlock(&fi->inmem_lock);
@@ -262,12 +267,41 @@ next:
return err;
}
+void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
+{
+ struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
+ struct inode *inode;
+ struct f2fs_inode_info *fi;
+next:
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (list_empty(head)) {
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ return;
+ }
+ fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ if (inode) {
+ drop_inmem_pages(inode);
+ iput(inode);
+ }
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ cond_resched();
+ goto next;
+}
+
void drop_inmem_pages(struct inode *inode)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_FILE);
@@ -313,6 +347,7 @@ static int __commit_inmem_pages(struct inode *inode,
struct inmem_pages *cur, *tmp;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_PRIO,
@@ -398,6 +433,10 @@ int commit_inmem_pages(struct inode *inode)
/* drop all uncommitted pages */
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
}
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -472,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
- struct bio *bio = f2fs_bio_alloc(0);
+ struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
int ret;
bio->bi_rw = REQ_OP_WRITE;
@@ -485,15 +524,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
return ret;
}
-static int submit_flush_wait(struct f2fs_sb_info *sbi)
+static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
{
- int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev);
+ int ret = 0;
int i;
- if (!sbi->s_ndevs || ret)
- return ret;
+ if (!sbi->s_ndevs)
+ return __submit_flush_wait(sbi, sbi->sb->s_bdev);
- for (i = 1; i < sbi->s_ndevs; i++) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
+ continue;
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
if (ret)
break;
@@ -519,7 +560,9 @@ repeat:
fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
- ret = submit_flush_wait(sbi);
+ cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
+
+ ret = submit_flush_wait(sbi, cmd->ino);
atomic_inc(&fcc->issued_flush);
llist_for_each_entry_safe(cmd, next,
@@ -537,7 +580,7 @@ repeat:
goto repeat;
}
-int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
{
struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
struct flush_cmd cmd;
@@ -547,19 +590,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
- ret = submit_flush_wait(sbi);
+ ret = submit_flush_wait(sbi, ino);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1) {
- ret = submit_flush_wait(sbi);
+ if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ ret = submit_flush_wait(sbi, ino);
atomic_dec(&fcc->issing_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
+ cmd.ino = ino;
init_completion(&cmd.wait);
llist_add(&cmd.llnode, &fcc->issue_list);
@@ -583,7 +627,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
} else {
struct flush_cmd *tmp, *next;
- ret = submit_flush_wait(sbi);
+ ret = submit_flush_wait(sbi, ino);
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
@@ -653,6 +697,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
}
}
+int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
+{
+ int ret = 0, i;
+
+ if (!sbi->s_ndevs)
+ return 0;
+
+ for (i = 1; i < sbi->s_ndevs; i++) {
+ if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
+ continue;
+ ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+ if (ret)
+ break;
+
+ spin_lock(&sbi->dev_lock);
+ f2fs_clear_bit(i, (char *)&sbi->dirty_device);
+ spin_unlock(&sbi->dev_lock);
+ }
+
+ return ret;
+}
+
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -794,6 +860,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
@@ -875,7 +943,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (ret)
return ret;
}
- bio = f2fs_bio_alloc(1);
+ bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, 1);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_op_attrs(bio, op, 0);
@@ -926,10 +994,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi,
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
- struct discard_cmd *dc)
+ struct discard_policy *dpolicy,
+ struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+ &(dcc->fstrim_list) : &(dcc->wait_list);
struct bio *bio = NULL;
+ int flag = dpolicy->sync ? REQ_SYNC : 0;
if (dc->state != D_PREP)
return;
@@ -948,8 +1020,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (bio) {
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
- submit_bio(REQ_SYNC, bio);
- list_move_tail(&dc->list, &dcc->wait_list);
+ submit_bio(flag, bio);
+ list_move_tail(&dc->list, wait_list);
__check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
f2fs_update_iostat(sbi, FS_DISCARD, 1);
@@ -966,7 +1038,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
struct rb_node *insert_parent)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct rb_node **p = &dcc->root.rb_node;
+ struct rb_node **p;
struct rb_node *parent = NULL;
struct discard_cmd *dc = NULL;
@@ -1134,58 +1206,107 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
return 0;
}
-static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy,
+ unsigned int start, unsigned int end)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ int issued;
+
+next:
+ issued = 0;
+
+ mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+ dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ NULL, start,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc && dc->lstart <= end) {
+ struct rb_node *node;
+
+ if (dc->len < dpolicy->granularity)
+ goto skip;
+
+ if (dc->state != D_PREP) {
+ list_move_tail(&dc->list, &dcc->fstrim_list);
+ goto skip;
+ }
+
+ __submit_discard_cmd(sbi, dpolicy, dc);
+
+ if (++issued >= dpolicy->max_requests) {
+ start = dc->lstart + dc->len;
+
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+
+ schedule();
+
+ goto next;
+ }
+skip:
+ node = rb_next(&dc->rb_node);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+ if (fatal_signal_pending(current))
+ break;
+ }
+
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+}
+
+static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int iter = 0, issued = 0;
- int i;
+ int i, iter = 0, issued = 0;
bool io_interrupted = false;
- mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
- blk_start_plug(&plug);
- for (i = MAX_PLIST_NUM - 1;
- i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
+ for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (i + 1 < dpolicy->granularity)
+ break;
pend_list = &dcc->pend_list[i];
+
+ mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+ blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
- /* Hurry up to finish fstrim */
- if (dcc->pend_list_tag[i] & P_TRIM) {
- __submit_discard_cmd(sbi, dc);
- issued++;
-
- if (fatal_signal_pending(current))
- break;
- continue;
- }
-
- if (!issue_cond) {
- __submit_discard_cmd(sbi, dc);
- issued++;
- continue;
- }
-
- if (is_idle(sbi)) {
- __submit_discard_cmd(sbi, dc);
- issued++;
- } else {
+ if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
+ !is_idle(sbi)) {
io_interrupted = true;
+ goto skip;
}
- if (++iter >= DISCARD_ISSUE_RATE)
- goto out;
+ __submit_discard_cmd(sbi, dpolicy, dc);
+ issued++;
+skip:
+ if (++iter >= dpolicy->max_requests)
+ break;
}
- if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM)
- dcc->pend_list_tag[i] &= (~P_TRIM);
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (iter >= dpolicy->max_requests)
+ break;
}
-out:
- blk_finish_plug(&plug);
- mutex_unlock(&dcc->cmd_lock);
if (!issued && io_interrupted)
issued = -1;
@@ -1193,12 +1314,13 @@ out:
return issued;
}
-static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
+static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
int i;
+ bool dropped = false;
mutex_lock(&dcc->cmd_lock);
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
@@ -1206,39 +1328,58 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
__remove_discard_cmd(sbi, dc);
+ dropped = true;
}
}
mutex_unlock(&dcc->cmd_lock);
+
+ return dropped;
}
-static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned int len = 0;
wait_for_completion_io(&dc->wait);
mutex_lock(&dcc->cmd_lock);
f2fs_bug_on(sbi, dc->state != D_DONE);
dc->ref--;
- if (!dc->ref)
+ if (!dc->ref) {
+ if (!dc->error)
+ len = dc->len;
__remove_discard_cmd(sbi, dc);
+ }
mutex_unlock(&dcc->cmd_lock);
+
+ return len;
}
-static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
+static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy,
+ block_t start, block_t end)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct list_head *wait_list = &(dcc->wait_list);
+ struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+ &(dcc->fstrim_list) : &(dcc->wait_list);
struct discard_cmd *dc, *tmp;
bool need_wait;
+ unsigned int trimmed = 0;
next:
need_wait = false;
mutex_lock(&dcc->cmd_lock);
list_for_each_entry_safe(dc, tmp, wait_list, list) {
- if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
+ if (dc->lstart + dc->len <= start || end <= dc->lstart)
+ continue;
+ if (dc->len < dpolicy->granularity)
+ continue;
+ if (dc->state == D_DONE && !dc->ref) {
wait_for_completion_io(&dc->wait);
+ if (!dc->error)
+ trimmed += dc->len;
__remove_discard_cmd(sbi, dc);
} else {
dc->ref++;
@@ -1249,9 +1390,17 @@ next:
mutex_unlock(&dcc->cmd_lock);
if (need_wait) {
- __wait_one_discard_bio(sbi, dc);
+ trimmed += __wait_one_discard_bio(sbi, dc);
goto next;
}
+
+ return trimmed;
+}
+
+static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1289,23 +1438,19 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
}
}
-/* This comes from f2fs_put_super and f2fs_trim_fs */
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount)
-{
- __issue_discard_cmd(sbi, false);
- __drop_discard_cmd(sbi);
- __wait_discard_cmd(sbi, !umount);
-}
-
-static void mark_discard_range_all(struct f2fs_sb_info *sbi)
+/* This comes from f2fs_put_super */
+bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- int i;
+ struct discard_policy dpolicy;
+ bool dropped;
- mutex_lock(&dcc->cmd_lock);
- for (i = 0; i < MAX_PLIST_NUM; i++)
- dcc->pend_list_tag[i] |= P_TRIM;
- mutex_unlock(&dcc->cmd_lock);
+ init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
+ __issue_discard_cmd(sbi, &dpolicy);
+ dropped = __drop_discard_cmd(sbi);
+ __wait_all_discard_cmd(sbi, &dpolicy);
+
+ return dropped;
}
static int issue_discard_thread(void *data)
@@ -1313,12 +1458,16 @@ static int issue_discard_thread(void *data)
struct f2fs_sb_info *sbi = data;
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
+ struct discard_policy dpolicy;
unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
int issued;
set_freezable();
do {
+ init_discard_policy(&dpolicy, DPOLICY_BG,
+ dcc->discard_granularity);
+
wait_event_interruptible_timeout(*q,
kthread_should_stop() || freezing(current) ||
dcc->discard_wake,
@@ -1331,17 +1480,18 @@ static int issue_discard_thread(void *data)
if (dcc->discard_wake) {
dcc->discard_wake = 0;
if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
- mark_discard_range_all(sbi);
+ init_discard_policy(&dpolicy,
+ DPOLICY_FORCE, 1);
}
sb_start_intwrite(sbi->sb);
- issued = __issue_discard_cmd(sbi, true);
+ issued = __issue_discard_cmd(sbi, &dpolicy);
if (issued) {
- __wait_discard_cmd(sbi, true);
- wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+ __wait_all_discard_cmd(sbi, &dpolicy);
+ wait_ms = dpolicy.min_interval;
} else {
- wait_ms = DEF_MAX_DISCARD_ISSUE_TIME;
+ wait_ms = dpolicy.max_interval;
}
sb_end_intwrite(sbi->sb);
@@ -1605,7 +1755,6 @@ find_next:
f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
len);
- cpc->trimmed += len;
total_len += len;
} else {
next_pos = find_next_bit_le(entry->discard_map,
@@ -1626,6 +1775,37 @@ skip:
wake_up_discard_thread(sbi, false);
}
+void init_discard_policy(struct discard_policy *dpolicy,
+ int discard_type, unsigned int granularity)
+{
+ /* common policy */
+ dpolicy->type = discard_type;
+ dpolicy->sync = true;
+ dpolicy->granularity = granularity;
+
+ if (discard_type == DPOLICY_BG) {
+ dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = true;
+ } else if (discard_type == DPOLICY_FORCE) {
+ dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = true;
+ } else if (discard_type == DPOLICY_FSTRIM) {
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = false;
+ } else if (discard_type == DPOLICY_UMOUNT) {
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = false;
+ }
+}
+
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -1643,12 +1823,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
INIT_LIST_HEAD(&dcc->entry_list);
- for (i = 0; i < MAX_PLIST_NUM; i++) {
+ for (i = 0; i < MAX_PLIST_NUM; i++)
INIT_LIST_HEAD(&dcc->pend_list[i]);
- if (i >= dcc->discard_granularity - 1)
- dcc->pend_list_tag[i] |= P_ACTIVE;
- }
INIT_LIST_HEAD(&dcc->wait_list);
+ INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
atomic_set(&dcc->issing_discard, 0);
@@ -1796,16 +1974,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
-{
- update_sit_entry(sbi, new, 1);
- if (GET_SEGNO(sbi, old) != NULL_SEGNO)
- update_sit_entry(sbi, old, -1);
-
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
- locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
-}
-
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
{
unsigned int segno = GET_SEGNO(sbi, addr);
@@ -1816,14 +1984,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
return;
/* add it into sit main buffer */
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
update_sit_entry(sbi, addr, -1);
/* add it into dirty seglist */
locate_dirty_segment(sbi, segno);
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
}
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
@@ -1836,7 +2004,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return true;
- mutex_lock(&sit_i->sentry_lock);
+ down_read(&sit_i->sentry_lock);
segno = GET_SEGNO(sbi, blkaddr);
se = get_seg_entry(sbi, segno);
@@ -1845,7 +2013,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
if (f2fs_test_bit(offset, se->ckpt_valid_map))
is_cp = true;
- mutex_unlock(&sit_i->sentry_lock);
+ up_read(&sit_i->sentry_lock);
return is_cp;
}
@@ -1903,12 +2071,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
{
struct page *page = grab_meta_page(sbi, blk_addr);
- void *dst = page_address(page);
- if (src)
- memcpy(dst, src, PAGE_SIZE);
- else
- memset(dst, 0, PAGE_SIZE);
+ memcpy(page_address(page), src, PAGE_SIZE);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
@@ -2007,7 +2171,6 @@ find_other_zone:
}
secno = left_start;
skip_left:
- hint = secno;
segno = GET_SEG_FROM_SEC(sbi, secno);
zoneno = GET_ZONE_FROM_SEC(sbi, secno);
@@ -2242,12 +2405,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
unsigned int old_segno;
int i;
+ down_write(&SIT_I(sbi)->sentry_lock);
+
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
locate_dirty_segment(sbi, old_segno);
}
+
+ up_write(&SIT_I(sbi)->sentry_lock);
}
static const struct segment_allocation default_salloc_ops = {
@@ -2259,14 +2426,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u64 trim_start = cpc->trim_start;
bool has_candidate = false;
- mutex_lock(&SIT_I(sbi)->sentry_lock);
+ down_write(&SIT_I(sbi)->sentry_lock);
for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
if (add_discard_addrs(sbi, cpc, true)) {
has_candidate = true;
break;
}
}
- mutex_unlock(&SIT_I(sbi)->sentry_lock);
+ up_write(&SIT_I(sbi)->sentry_lock);
cpc->trim_start = trim_start;
return has_candidate;
@@ -2276,14 +2443,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = F2FS_BYTES_TO_BLK(range->start);
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
- unsigned int start_segno, end_segno;
+ unsigned int start_segno, end_segno, cur_segno;
+ block_t start_block, end_block;
struct cp_control cpc;
+ struct discard_policy dpolicy;
+ unsigned long long trimmed = 0;
int err = 0;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- cpc.trimmed = 0;
if (end <= MAIN_BLKADDR(sbi))
goto out;
@@ -2297,12 +2466,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
/* do checkpoint to issue discard commands safely */
- for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
- cpc.trim_start = start_segno;
+ for (cur_segno = start_segno; cur_segno <= end_segno;
+ cur_segno = cpc.trim_end + 1) {
+ cpc.trim_start = cur_segno;
if (sbi->discard_blks == 0)
break;
@@ -2310,7 +2481,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
cpc.trim_end = end_segno;
else
cpc.trim_end = min_t(unsigned int,
- rounddown(start_segno +
+ rounddown(cur_segno +
BATCHED_TRIM_SEGMENTS(sbi),
sbi->segs_per_sec) - 1, end_segno);
@@ -2322,11 +2493,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
schedule();
}
- /* It's time to issue all the filed discards */
- mark_discard_range_all(sbi);
- f2fs_wait_discard_bios(sbi, false);
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
+
+ init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
+ trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
out:
- range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2338,6 +2514,20 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
return false;
}
+#if 0
+int rw_hint_to_seg_type(enum rw_hint hint)
+{
+ switch (hint) {
+ case WRITE_LIFE_SHORT:
+ return CURSEG_HOT_DATA;
+ case WRITE_LIFE_EXTREME:
+ return CURSEG_COLD_DATA;
+ default:
+ return CURSEG_WARM_DATA;
+ }
+}
+#endif
+
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
@@ -2372,6 +2562,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return CURSEG_COLD_DATA;
if (is_inode_flag_set(inode, FI_HOT_DATA))
return CURSEG_HOT_DATA;
+ /* rw_hint_to_seg_type(inode->i_write_hint); */
return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(fio->page))
@@ -2416,8 +2607,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ down_read(&SM_I(sbi)->curseg_lock);
+
mutex_lock(&curseg->curseg_mutex);
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
@@ -2434,15 +2627,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
+ /*
+ * SIT information should be updated before segment allocation,
+ * since SSR needs latest valid block information.
+ */
+ update_sit_entry(sbi, *new_blkaddr, 1);
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ update_sit_entry(sbi, old_blkaddr, -1);
+
if (!__has_curseg_space(sbi, type))
sit_i->s_ops->allocate_segment(sbi, type, false);
+
/*
- * SIT information should be updated after segment allocation,
- * since we need to keep dirty segments precisely under SSR.
+ * segment dirty status should be updated after segment allocation,
+ * so we just need to update status only one time after previous
+ * segment being closed.
*/
- refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
if (page && IS_NODESEG(type)) {
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
@@ -2462,6 +2666,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
}
mutex_unlock(&curseg->curseg_mutex);
+
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
+static void update_device_state(struct f2fs_io_info *fio)
+{
+ struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int devidx;
+
+ if (!sbi->s_ndevs)
+ return;
+
+ devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+
+ /* update device state for fsync */
+ set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+
+ /* update device state for checkpoint */
+ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+ spin_lock(&sbi->dev_lock);
+ f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+ spin_unlock(&sbi->dev_lock);
+ }
}
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
@@ -2478,6 +2705,8 @@ reallocate:
if (err == -EAGAIN) {
fio->old_blkaddr = fio->new_blkaddr;
goto reallocate;
+ } else if (!err) {
+ update_device_state(fio);
}
}
@@ -2538,12 +2767,26 @@ int rewrite_data_page(struct f2fs_io_info *fio)
stat_inc_inplace_blocks(fio->sbi);
err = f2fs_submit_page_bio(fio);
+ if (!err)
+ update_device_state(fio);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
return err;
}
+static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+ if (CURSEG_I(sbi, i)->segno == segno)
+ break;
+ }
+ return i;
+}
+
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr)
@@ -2559,6 +2802,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
se = get_seg_entry(sbi, segno);
type = se->type;
+ down_write(&SM_I(sbi)->curseg_lock);
+
if (!recover_curseg) {
/* for recovery flow */
if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
@@ -2568,14 +2813,19 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
type = CURSEG_WARM_DATA;
}
} else {
- if (!IS_CURSEG(sbi, segno))
+ if (IS_CURSEG(sbi, segno)) {
+ /* se->type is volatile as SSR allocation */
+ type = __f2fs_get_curseg(sbi, segno);
+ f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
+ } else {
type = CURSEG_WARM_DATA;
+ }
}
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
old_cursegno = curseg->segno;
old_blkoff = curseg->next_blkoff;
@@ -2607,8 +2857,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
curseg->next_blkoff = old_blkoff;
}
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
+ up_write(&SM_I(sbi)->curseg_lock);
}
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -3062,7 +3313,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
bool to_journal = true;
struct seg_entry *se;
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
if (!sit_i->dirty_sentries)
goto out;
@@ -3156,7 +3407,7 @@ out:
cpc->trim_start = trim_start;
}
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
set_prefree_as_free_segments(sbi);
}
@@ -3249,7 +3500,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
- mutex_init(&sit_i->sentry_lock);
+ init_rwsem(&sit_i->sentry_lock);
return 0;
}
@@ -3490,7 +3741,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno;
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
sit_i->min_mtime = LLONG_MAX;
@@ -3507,7 +3758,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
sit_i->min_mtime = mtime;
}
sit_i->max_mtime = get_mtime(sbi);
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
}
int build_segment_manager(struct f2fs_sb_info *sbi)
@@ -3540,11 +3791,14 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
+ sm_info->min_ssr_sections = reserved_sections(sbi);
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
INIT_LIST_HEAD(&sm_info->sit_entry_set);
+ init_rwsem(&sm_info->curseg_lock);
+
if (!f2fs_readonly(sbi->sb)) {
err = create_flush_cmd_control(sbi);
if (err)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ffa11274b0ce..5264b6ed120c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -231,7 +231,7 @@ struct sit_info {
unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
unsigned int dirty_sentries; /* # of dirty sentries */
unsigned int sents_per_block; /* # of SIT entries per block */
- struct mutex sentry_lock; /* to protect SIT cache */
+ struct rw_semaphore sentry_lock; /* to protect SIT cache */
struct seg_entry *sentries; /* SIT segment-level cache */
struct sec_entry *sec_entries; /* SIT section-level cache */
@@ -497,6 +497,33 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
}
+static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
+{
+ unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
+ get_pages(sbi, F2FS_DIRTY_DENTS);
+ unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
+ unsigned int segno, left_blocks;
+ int i;
+
+ /* check current node segment */
+ for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
+ segno = CURSEG_I(sbi, i)->segno;
+ left_blocks = sbi->blocks_per_seg -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+
+ if (node_blocks > left_blocks)
+ return false;
+ }
+
+ /* check current data segment */
+ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
+ left_blocks = sbi->blocks_per_seg -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ if (dent_blocks > left_blocks)
+ return false;
+ return true;
+}
+
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
int freed, int needed)
{
@@ -507,6 +534,9 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
+ if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
+ has_curseg_enough_space(sbi))
+ return false;
return (free_sections(sbi) + freed) <=
(node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + needed);
@@ -730,7 +760,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
unsigned int secno)
{
- if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >=
+ if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >
sbi->fggc_threshold)
return true;
return false;
@@ -795,8 +825,9 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
goto wake_up;
mutex_lock(&dcc->cmd_lock);
- for (i = MAX_PLIST_NUM - 1;
- i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
+ for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (i + 1 < dcc->discard_granularity)
+ break;
if (!list_empty(&dcc->pend_list[i])) {
wakeup = true;
break;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 5c60fc28ec75..0b5664a1a6cc 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -28,7 +28,7 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
- long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS;
+ long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS;
return count > 0 ? count : 0;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 482bb0333806..187cead7bd37 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,6 +44,8 @@ static struct kmem_cache *f2fs_inode_cachep;
char *fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_BIO] = "alloc bio",
[FAULT_ALLOC_NID] = "alloc nid",
[FAULT_ORPHAN] = "orphan",
[FAULT_BLOCK] = "no more block",
@@ -92,6 +94,7 @@ enum {
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_noinline_xattr,
+ Opt_inline_xattr_size,
Opt_inline_data,
Opt_inline_dentry,
Opt_noinline_dentry,
@@ -141,6 +144,7 @@ static match_table_t f2fs_tokens = {
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_noinline_xattr, "noinline_xattr"},
+ {Opt_inline_xattr_size, "inline_xattr_size=%u"},
{Opt_inline_data, "inline_data"},
{Opt_inline_dentry, "inline_dentry"},
{Opt_noinline_dentry, "noinline_dentry"},
@@ -209,6 +213,12 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
"quota options when quota turned on");
return -EINVAL;
}
+ if (f2fs_sb_has_quota_ino(sb)) {
+ f2fs_msg(sb, KERN_INFO,
+ "QUOTA feature is enabled, so ignore qf_name");
+ return 0;
+ }
+
qname = match_strdup(args);
if (!qname) {
f2fs_msg(sb, KERN_ERR,
@@ -287,6 +297,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
return -1;
}
}
+
+ if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "QUOTA feature is enabled, so ignore jquota_fmt");
+ sbi->s_jquota_fmt = 0;
+ }
+ if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return -1;
+ }
return 0;
}
#endif
@@ -383,6 +405,12 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_noinline_xattr:
clear_opt(sbi, INLINE_XATTR);
break;
+ case Opt_inline_xattr_size:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ set_opt(sbi, INLINE_XATTR_SIZE);
+ sbi->inline_xattr_size = arg;
+ break;
#else
case Opt_user_xattr:
f2fs_msg(sb, KERN_INFO,
@@ -604,6 +632,24 @@ static int parse_options(struct super_block *sb, char *options)
F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
+
+ if (test_opt(sbi, INLINE_XATTR_SIZE)) {
+ if (!test_opt(sbi, INLINE_XATTR)) {
+ f2fs_msg(sb, KERN_ERR,
+ "inline_xattr_size option should be "
+ "set with inline_xattr option");
+ return -EINVAL;
+ }
+ if (!sbi->inline_xattr_size ||
+ sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE -
+ F2FS_TOTAL_EXTRA_ATTR_SIZE -
+ DEF_INLINE_RESERVED_SIZE -
+ DEF_MIN_INLINE_SIZE) {
+ f2fs_msg(sb, KERN_ERR,
+ "inline xattr size is out of range");
+ return -EINVAL;
+ }
+ }
return 0;
}
@@ -618,13 +664,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
init_once((void *) fi);
/* Initialize f2fs-specific inode info */
- fi->vfs_inode.i_version = 1;
atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
+ INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
init_rwsem(&fi->dio_rwsem[READ]);
@@ -673,7 +719,6 @@ static int f2fs_drop_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
- fscrypt_put_encryption_info(inode, NULL);
spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
}
@@ -781,6 +826,7 @@ static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int i;
+ bool dropped;
f2fs_quota_off_umount(sb);
@@ -801,9 +847,9 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
- f2fs_wait_discard_bios(sbi, true);
+ dropped = f2fs_wait_discard_bios(sbi);
- if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
+ if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
@@ -859,6 +905,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int err = 0;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return 0;
+
trace_f2fs_sync_fs(sb, sync);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -958,7 +1007,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
- sbi->reserved_blocks;
+ sbi->current_reserved_blocks;
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
@@ -1047,6 +1096,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",inline_xattr");
else
seq_puts(seq, ",noinline_xattr");
+ if (test_opt(sbi, INLINE_XATTR_SIZE))
+ seq_printf(seq, ",inline_xattr_size=%u",
+ sbi->inline_xattr_size);
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -1109,6 +1161,7 @@ static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
sbi->active_logs = NR_CURSEG_TYPE;
+ sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1137,6 +1190,9 @@ static void default_options(struct f2fs_sb_info *sbi)
#endif
}
+#ifdef CONFIG_QUOTA
+static int f2fs_enable_quotas(struct super_block *sb);
+#endif
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1203,6 +1259,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
+#ifdef CONFIG_QUOTA
if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
err = dquot_suspend(sb, -1);
if (err < 0)
@@ -1210,9 +1267,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
} else {
/* dquot_resume needs RW */
sb->s_flags &= ~MS_RDONLY;
- dquot_resume(sb, -1);
+ if (sb_any_quota_suspended(sb)) {
+ dquot_resume(sb, -1);
+ } else if (f2fs_sb_has_quota_ino(sb)) {
+ err = f2fs_enable_quotas(sb);
+ if (err)
+ goto restore_opts;
+ }
}
-
+#endif
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
@@ -1321,8 +1384,13 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
repeat:
page = read_mapping_page(mapping, blkidx, NULL);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto repeat;
+ }
return PTR_ERR(page);
+ }
lock_page(page);
@@ -1365,11 +1433,16 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
while (towrite > 0) {
tocopy = min_t(unsigned long, sb->s_blocksize - offset,
towrite);
-
+retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
&page, NULL);
- if (unlikely(err))
+ if (unlikely(err)) {
+ if (err == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
break;
+ }
kaddr = kmap_atomic(page);
memcpy(kaddr + offset, data, tocopy);
@@ -1386,8 +1459,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
}
if (len == towrite)
- return 0;
- inode->i_version++;
+ return err;
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return len - towrite;
@@ -1409,19 +1481,91 @@ static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
sbi->s_jquota_fmt, type);
}
-void f2fs_enable_quota_files(struct f2fs_sb_info *sbi)
+int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
{
- int i, ret;
+ int enabled = 0;
+ int i, err;
+
+ if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+ err = f2fs_enable_quotas(sbi->sb);
+ if (err) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Cannot turn on quota_ino: %d", err);
+ return 0;
+ }
+ return 1;
+ }
for (i = 0; i < MAXQUOTAS; i++) {
if (sbi->s_qf_names[i]) {
- ret = f2fs_quota_on_mount(sbi, i);
- if (ret < 0)
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on journaled "
- "quota: error %d", ret);
+ err = f2fs_quota_on_mount(sbi, i);
+ if (!err) {
+ enabled = 1;
+ continue;
+ }
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Cannot turn on quotas: %d on %d", err, i);
+ }
+ }
+ return enabled;
+}
+
+static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags)
+{
+ struct inode *qf_inode;
+ unsigned long qf_inum;
+ int err;
+
+ BUG_ON(!f2fs_sb_has_quota_ino(sb));
+
+ qf_inum = f2fs_qf_ino(sb, type);
+ if (!qf_inum)
+ return -EPERM;
+
+ qf_inode = f2fs_iget(sb, qf_inum);
+ if (IS_ERR(qf_inode)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Bad quota inode %u:%lu", type, qf_inum);
+ return PTR_ERR(qf_inode);
+ }
+
+ /* Don't account quota for quota files to avoid recursion */
+ qf_inode->i_flags |= S_NOQUOTA;
+ err = dquot_enable(qf_inode, type, format_id, flags);
+ iput(qf_inode);
+ return err;
+}
+
+static int f2fs_enable_quotas(struct super_block *sb)
+{
+ int type, err = 0;
+ unsigned long qf_inum;
+ bool quota_mopt[MAXQUOTAS] = {
+ test_opt(F2FS_SB(sb), USRQUOTA),
+ test_opt(F2FS_SB(sb), GRPQUOTA),
+ test_opt(F2FS_SB(sb), PRJQUOTA),
+ };
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ for (type = 0; type < MAXQUOTAS; type++) {
+ qf_inum = f2fs_qf_ino(sb, type);
+ if (qf_inum) {
+ err = f2fs_quota_enable(sb, type, QFMT_VFS_V1,
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Failed to enable quota tracking "
+ "(type=%d, err=%d). Please run "
+ "fsck to fix.", type, err);
+ for (type--; type >= 0; type--)
+ dquot_quota_off(sb, type);
+ return err;
+ }
}
}
+ return 0;
}
static int f2fs_quota_sync(struct super_block *sb, int type)
@@ -1492,7 +1636,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
f2fs_quota_sync(sb, type);
err = dquot_quota_off(sb, type);
- if (err)
+ if (err || f2fs_sb_has_quota_ino(sb))
goto out_put;
inode_lock(inode);
@@ -1598,14 +1742,9 @@ static const struct fscrypt_operations f2fs_cryptops = {
.key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
- .is_encrypted = f2fs_encrypted_inode,
.empty_dir = f2fs_empty_dir,
.max_namelen = f2fs_max_namelen,
};
-#else
-static const struct fscrypt_operations f2fs_cryptops = {
- .is_encrypted = f2fs_encrypted_inode,
-};
#endif
static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
@@ -1660,7 +1799,7 @@ static loff_t max_file_blocks(void)
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
- * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more
+ * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more
* space in inode.i_addr, it will be more safe to reassign
* result as zero.
*/
@@ -1969,6 +2108,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (j = HOT; j < NR_TEMP_TYPE; j++)
mutex_init(&sbi->wio_mutex[i][j]);
spin_lock_init(&sbi->cp_lock);
+
+ sbi->dirty_device = 0;
+ spin_lock_init(&sbi->dev_lock);
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -2323,12 +2465,17 @@ try_onemore:
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- sb->s_qcop = &f2fs_quotactl_ops;
+ if (f2fs_sb_has_quota_ino(sb))
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
+ else
+ sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
sb->s_op = &f2fs_sops;
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
sb->s_cop = &f2fs_cryptops;
+#endif
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
@@ -2419,6 +2566,7 @@ try_onemore:
le64_to_cpu(sbi->ckpt->valid_block_count);
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->reserved_blocks = 0;
+ sbi->current_reserved_blocks = 0;
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2493,10 +2641,24 @@ try_onemore:
if (err)
goto free_root_inode;
+#ifdef CONFIG_QUOTA
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
+ if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) {
+ err = f2fs_enable_quotas(sb);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Cannot turn on quotas: error %d", err);
+ goto free_sysfs;
+ }
+ }
+#endif
/* if there are nt orphan nodes free them */
err = recover_orphan_inodes(sbi);
if (err)
- goto free_sysfs;
+ goto free_meta;
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -2530,7 +2692,7 @@ try_onemore:
err = -EINVAL;
f2fs_msg(sb, KERN_ERR,
"Need to recover fsync data");
- goto free_sysfs;
+ goto free_meta;
}
}
skip_recovery:
@@ -2564,6 +2726,10 @@ skip_recovery:
return 0;
free_meta:
+#ifdef CONFIG_QUOTA
+ if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb))
+ f2fs_quota_off_umount(sbi->sb);
+#endif
f2fs_sync_inode_meta(sbi);
/*
* Some dirty meta pages can be produced by recover_orphan_inodes()
@@ -2572,7 +2738,9 @@ free_meta:
* falls into an infinite loop in sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
+#ifdef CONFIG_QUOTA
free_sysfs:
+#endif
f2fs_unregister_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index e2c258f717cd..9835348b6e5d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -30,7 +30,7 @@ enum {
FAULT_INFO_RATE, /* struct f2fs_fault_info */
FAULT_INFO_TYPE, /* struct f2fs_fault_info */
#endif
- RESERVED_BLOCKS,
+ RESERVED_BLOCKS, /* struct f2fs_sb_info */
};
struct f2fs_attr {
@@ -63,6 +63,13 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return NULL;
}
+static ssize_t dirty_segments_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)(dirty_segments(sbi)));
+}
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -100,10 +107,22 @@ static ssize_t features_show(struct f2fs_attr *a,
if (f2fs_sb_has_inode_chksum(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
+ if (f2fs_sb_has_flexible_inline_xattr(sb))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "flexible_inline_xattr");
+ if (f2fs_sb_has_quota_ino(sb))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "quota_ino");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
+static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks);
+}
+
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -143,34 +162,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
#endif
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
- if ((unsigned long)sbi->total_valid_block_count + t >
- (unsigned long)sbi->user_block_count) {
+ if (t > (unsigned long)sbi->user_block_count) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
*ui = t;
+ sbi->current_reserved_blocks = min(sbi->reserved_blocks,
+ sbi->user_block_count - valid_user_blocks(sbi));
spin_unlock(&sbi->stat_lock);
return count;
}
if (!strcmp(a->attr.name, "discard_granularity")) {
- struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- int i;
-
if (t == 0 || t > MAX_PLIST_NUM)
return -EINVAL;
if (t == *ui)
return count;
-
- mutex_lock(&dcc->cmd_lock);
- for (i = 0; i < MAX_PLIST_NUM; i++) {
- if (i >= t - 1)
- dcc->pend_list_tag[i] |= P_ACTIVE;
- else
- dcc->pend_list_tag[i] &= (~P_ACTIVE);
- }
- mutex_unlock(&dcc->cmd_lock);
-
*ui = t;
return count;
}
@@ -222,6 +229,8 @@ enum feat_id {
FEAT_EXTRA_ATTR,
FEAT_PROJECT_QUOTA,
FEAT_INODE_CHECKSUM,
+ FEAT_FLEXIBLE_INLINE_XATTR,
+ FEAT_QUOTA_INO,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -234,6 +243,8 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
case FEAT_EXTRA_ATTR:
case FEAT_PROJECT_QUOTA:
case FEAT_INODE_CHECKSUM:
+ case FEAT_FLEXIBLE_INLINE_XATTR:
+ case FEAT_QUOTA_INO:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
@@ -279,6 +290,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
@@ -291,8 +303,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
+F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
+F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -304,6 +318,8 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE);
F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR);
F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA);
F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
+F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
+F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -321,6 +337,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_hot_blocks),
+ ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
@@ -333,9 +350,11 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
#endif
+ ATTR_LIST(dirty_segments),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
+ ATTR_LIST(current_reserved_blocks),
NULL,
};
@@ -350,6 +369,8 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(extra_attr),
ATTR_LIST(project_quota),
ATTR_LIST(inode_checksum),
+ ATTR_LIST(flexible_inline_xattr),
+ ATTR_LIST(quota_ino),
NULL,
};
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index ab658419552b..7acf56ebda65 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -264,12 +264,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
-static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
- void **last_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
+ void *base_addr, void **last_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
- unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
+ unsigned int inline_size = inline_xattr_size(inode);
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
@@ -288,12 +288,54 @@ static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
return entry;
}
+static int read_inline_xattr(struct inode *inode, struct page *ipage,
+ void *txattr_addr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int inline_size = inline_xattr_size(inode);
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(inode, ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ inline_addr = inline_xattr_addr(inode, page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ return 0;
+}
+
+static int read_xattr_block(struct inode *inode, void *txattr_addr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+ unsigned int inline_size = inline_xattr_size(inode);
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, xnid);
+ if (IS_ERR(xpage))
+ return PTR_ERR(xpage);
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE);
+ f2fs_put_page(xpage, 1);
+
+ return 0;
+}
+
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
@@ -310,23 +352,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
/* read from inline xattr */
if (inline_size) {
- struct page *page = NULL;
- void *inline_addr;
-
- if (ipage) {
- inline_addr = inline_xattr_addr(ipage);
- } else {
- page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto out;
- }
- inline_addr = inline_xattr_addr(page);
- }
- memcpy(txattr_addr, inline_addr, inline_size);
- f2fs_put_page(page, 1);
+ err = read_inline_xattr(inode, ipage, txattr_addr);
+ if (err)
+ goto out;
- *xe = __find_inline_xattr(txattr_addr, &last_addr,
+ *xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
index, len, name);
if (*xe)
goto check;
@@ -334,19 +364,9 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
/* read from xattr node block */
if (xnid) {
- struct page *xpage;
- void *xattr_addr;
-
- /* The inode already has an extended attribute block. */
- xpage = get_node_page(sbi, xnid);
- if (IS_ERR(xpage)) {
- err = PTR_ERR(xpage);
+ err = read_xattr_block(inode, txattr_addr);
+ if (err)
goto out;
- }
-
- xattr_addr = page_address(xpage);
- memcpy(txattr_addr + inline_size, xattr_addr, size);
- f2fs_put_page(xpage, 1);
}
if (last_addr)
@@ -371,7 +391,6 @@ out:
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = VALID_XATTR_BLOCK_SIZE;
@@ -386,38 +405,16 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
/* read from inline xattr */
if (inline_size) {
- struct page *page = NULL;
- void *inline_addr;
-
- if (ipage) {
- inline_addr = inline_xattr_addr(ipage);
- } else {
- page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto fail;
- }
- inline_addr = inline_xattr_addr(page);
- }
- memcpy(txattr_addr, inline_addr, inline_size);
- f2fs_put_page(page, 1);
+ err = read_inline_xattr(inode, ipage, txattr_addr);
+ if (err)
+ goto fail;
}
/* read from xattr node block */
if (xnid) {
- struct page *xpage;
- void *xattr_addr;
-
- /* The inode already has an extended attribute block. */
- xpage = get_node_page(sbi, xnid);
- if (IS_ERR(xpage)) {
- err = PTR_ERR(xpage);
+ err = read_xattr_block(inode, txattr_addr);
+ if (err)
goto fail;
- }
-
- xattr_addr = page_address(xpage);
- memcpy(txattr_addr + inline_size, xattr_addr, size);
- f2fs_put_page(xpage, 1);
}
header = XATTR_HDR(txattr_addr);
@@ -439,10 +436,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t inline_size = inline_xattr_size(inode);
+ struct page *in_page = NULL;
void *xattr_addr;
+ void *inline_addr = NULL;
struct page *xpage;
nid_t new_nid = 0;
- int err;
+ int err = 0;
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
@@ -450,30 +449,30 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
/* write to inline xattr */
if (inline_size) {
- struct page *page = NULL;
- void *inline_addr;
-
if (ipage) {
- inline_addr = inline_xattr_addr(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
- set_page_dirty(ipage);
+ inline_addr = inline_xattr_addr(inode, ipage);
} else {
- page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page)) {
+ in_page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(in_page)) {
alloc_nid_failed(sbi, new_nid);
- return PTR_ERR(page);
+ return PTR_ERR(in_page);
}
- inline_addr = inline_xattr_addr(page);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ inline_addr = inline_xattr_addr(inode, in_page);
}
- memcpy(inline_addr, txattr_addr, inline_size);
- f2fs_put_page(page, 1);
+ f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
+ NODE, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
- err = truncate_xattr_node(inode, ipage);
+ err = truncate_xattr_node(inode);
alloc_nid_failed(sbi, new_nid);
- return err;
+ if (err) {
+ f2fs_put_page(in_page, 1);
+ return err;
+ }
+ memcpy(inline_addr, txattr_addr, inline_size);
+ set_page_dirty(ipage ? ipage : in_page);
+ goto in_page_out;
}
}
@@ -482,7 +481,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
- return PTR_ERR(xpage);
+ goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
f2fs_wait_on_page_writeback(xpage, NODE, true);
@@ -492,17 +491,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
- return PTR_ERR(xpage);
+ goto in_page_out;
}
alloc_nid_done(sbi, new_nid);
}
-
xattr_addr = page_address(xpage);
+
+ if (inline_size)
+ memcpy(inline_addr, txattr_addr, inline_size);
memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
+
+ if (inline_size)
+ set_page_dirty(ipage ? ipage : in_page);
set_page_dirty(xpage);
- f2fs_put_page(xpage, 1);
- return 0;
+ f2fs_put_page(xpage, 1);
+in_page_out:
+ f2fs_put_page(in_page, 1);
+ return err;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
@@ -721,6 +727,10 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
/* this case is only from init_inode_metadata */
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index de11206dda63..48fe91e86c2a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
spin_unlock_bh(&wb->work_lock);
}
+static void finish_writeback_work(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
+{
+ struct wb_completion *done = work->done;
+
+ if (work->auto_free)
+ kfree(work);
+ if (done && atomic_dec_and_test(&done->cnt))
+ wake_up_all(&wb->bdi->wb_waitq);
+}
+
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);
- spin_lock_bh(&wb->work_lock);
- if (!test_bit(WB_registered, &wb->state))
- goto out_unlock;
if (work->done)
atomic_inc(&work->done->cnt);
- list_add_tail(&work->list, &wb->work_list);
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+ spin_lock_bh(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ } else
+ finish_writeback_work(wb, work);
+
spin_unlock_bh(&wb->work_lock);
}
@@ -1839,16 +1853,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
set_bit(WB_writeback_running, &wb->state);
while ((work = get_next_work_item(wb)) != NULL) {
- struct wb_completion *done = work->done;
-
trace_writeback_exec(wb, work);
-
wrote += wb_writeback(wb, work);
-
- if (work->auto_free)
- kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ finish_writeback_work(wb, work);
}
/*
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 5e425469f0c2..1543aa1b2a93 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -255,7 +255,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
goto out;
}
if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
- if (flags & GFS2_DIF_JDATA)
+ if (new_flags & GFS2_DIF_JDATA)
gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
error = filemap_fdatawrite(inode->i_mapping);
if (error)
@@ -263,6 +263,8 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
error = filemap_fdatawait(inode->i_mapping);
if (error)
goto out;
+ if (new_flags & GFS2_DIF_JDATA)
+ gfs2_ordered_del_inode(ip);
}
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
diff --git a/fs/locks.c b/fs/locks.c
index 8eddae23e10b..b515e65f1376 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2220,10 +2220,12 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- if (!error && file_lock->fl_type != F_UNLCK) {
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
@@ -2362,10 +2364,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- if (!error && file_lock->fl_type != F_UNLCK) {
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 348e0a05bd18..c690a1c0c4e5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1260,7 +1260,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
- error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ error = nfs_lookup_verify_inode(inode, flags);
dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
__func__, inode->i_ino, error ? "invalid" : "valid");
return !error;
@@ -1420,6 +1420,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
+ .d_weak_revalidate = nfs_weak_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
@@ -2050,7 +2051,7 @@ out:
if (new_inode != NULL)
nfs_drop_nlink(new_inode);
d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
+ nfs_set_verifier(old_dentry,
nfs_save_change_attribute(new_dir));
} else if (error == -ENOENT)
nfs_dentry_handle_enoent(old_dentry);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 10410e8b5853..63498e1a542a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -895,9 +895,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
- if (server->rsize > server_resp_sz)
+ if (!server->rsize || server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
- if (server->wsize > server_rqst_sz)
+ if (!server->wsize || server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6fef53f18dcf..8ef6f70c9e25 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -38,7 +38,6 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/errno.h>
-#include <linux/file.h>
#include <linux/string.h>
#include <linux/ratelimit.h>
#include <linux/printk.h>
@@ -5738,7 +5737,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->server = server;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
- get_file(fl->fl_file);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
out_free_seqid:
@@ -5851,7 +5849,6 @@ static void nfs4_lock_release(void *calldata)
nfs_free_seqid(data->arg.lock_seqid);
nfs4_put_lock_state(data->lsp);
put_nfs_open_context(data->ctx);
- fput(data->fl.fl_file);
kfree(data);
dprintk("%s: done!\n", __func__);
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e8d1d6c5000c..9a0b219ff74d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1680,7 +1680,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
break;
case -NFS4ERR_STALE_CLIENTID:
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
break;
case -NFS4ERR_EXPIRED:
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 9d46a0bdd9f9..a260060042ad 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -59,6 +59,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
GROUP_AT(gi, i) = exp->ex_anon_gid;
else
GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+
+ /* Each thread allocates its own gi, no race */
+ groups_sort(gi);
}
} else {
gi = get_group_info(rqgi);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 421935f3d909..11c67e8b939d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3379,7 +3379,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
continue;
- if (local->st_stateowner == &oo->oo_owner) {
+ if (local->st_stateowner != &oo->oo_owner)
+ continue;
+ if (local->st_stid.sc_type == NFS4_OPEN_STID) {
ret = local;
atomic_inc(&ret->st_stid.sc_count);
break;
@@ -3388,6 +3390,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
return ret;
}
+static __be32
+nfsd4_verify_open_stid(struct nfs4_stid *s)
+{
+ __be32 ret = nfs_ok;
+
+ switch (s->sc_type) {
+ default:
+ break;
+ case NFS4_CLOSED_STID:
+ case NFS4_CLOSED_DELEG_STID:
+ ret = nfserr_bad_stateid;
+ break;
+ case NFS4_REVOKED_DELEG_STID:
+ ret = nfserr_deleg_revoked;
+ }
+ return ret;
+}
+
+/* Lock the stateid st_mutex, and deal with races with CLOSE */
+static __be32
+nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
+{
+ __be32 ret;
+
+ mutex_lock(&stp->st_mutex);
+ ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret != nfs_ok)
+ mutex_unlock(&stp->st_mutex);
+ return ret;
+}
+
+static struct nfs4_ol_stateid *
+nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
+{
+ struct nfs4_ol_stateid *stp;
+ for (;;) {
+ spin_lock(&fp->fi_lock);
+ stp = nfsd4_find_existing_open(fp, open);
+ spin_unlock(&fp->fi_lock);
+ if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok)
+ break;
+ nfs4_put_stid(&stp->st_stid);
+ }
+ return stp;
+}
+
static struct nfs4_openowner *
alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
struct nfsd4_compound_state *cstate)
@@ -3420,23 +3468,27 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
}
static struct nfs4_ol_stateid *
-init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
- struct nfsd4_open *open)
+init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
{
struct nfs4_openowner *oo = open->op_openowner;
struct nfs4_ol_stateid *retstp = NULL;
+ struct nfs4_ol_stateid *stp;
+ stp = open->op_stp;
/* We are moving these outside of the spinlocks to avoid the warnings */
mutex_init(&stp->st_mutex);
mutex_lock(&stp->st_mutex);
+retry:
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
goto out_unlock;
+
+ open->op_stp = NULL;
atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
@@ -3453,11 +3505,16 @@ out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
if (retstp) {
- mutex_lock(&retstp->st_mutex);
- /* Not that we need to, just for neatness */
+ /* Handle races with CLOSE */
+ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
+ nfs4_put_stid(&retstp->st_stid);
+ goto retry;
+ }
+ /* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
+ stp = retstp;
}
- return retstp;
+ return stp;
}
/*
@@ -4260,9 +4317,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
struct nfs4_file *fp = NULL;
struct nfs4_ol_stateid *stp = NULL;
- struct nfs4_ol_stateid *swapstp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
+ bool new_stp = false;
/*
* Lookup file; if found, lookup stateid and check open request,
@@ -4274,9 +4331,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
- spin_lock(&fp->fi_lock);
- stp = nfsd4_find_existing_open(fp, open);
- spin_unlock(&fp->fi_lock);
+ stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
@@ -4284,41 +4339,31 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
goto out;
}
+ if (!stp) {
+ stp = init_open_stateid(fp, open);
+ if (!open->op_stp)
+ new_stp = true;
+ }
+
/*
* OPEN the file, or upgrade an existing OPEN.
* If truncate fails, the OPEN fails.
+ *
+ * stp is already locked.
*/
- if (stp) {
+ if (!new_stp) {
/* Stateid was found, this is an OPEN upgrade */
- mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
mutex_unlock(&stp->st_mutex);
goto out;
}
} else {
- stp = open->op_stp;
- open->op_stp = NULL;
- /*
- * init_open_stateid() either returns a locked stateid
- * it found, or initializes and locks the new one we passed in
- */
- swapstp = init_open_stateid(stp, fp, open);
- if (swapstp) {
- nfs4_put_stid(&stp->st_stid);
- stp = swapstp;
- status = nfs4_upgrade_open(rqstp, fp, current_fh,
- stp, open);
- if (status) {
- mutex_unlock(&stp->st_mutex);
- goto out;
- }
- goto upgrade_out;
- }
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- mutex_unlock(&stp->st_mutex);
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
@@ -4327,7 +4372,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
-upgrade_out:
+
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
@@ -5153,7 +5198,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
bool unhashed;
LIST_HEAD(reaplist);
- s->st_stid.sc_type = NFS4_CLOSED_STID;
spin_lock(&clp->cl_lock);
unhashed = unhash_open_stateid(s, &reaplist);
@@ -5192,10 +5236,12 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
+
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- mutex_unlock(&stp->st_mutex);
nfsd4_close_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
/* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5be1fa6b676d..b6eb56d18568 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -151,7 +151,8 @@ int nfsd_vers(int vers, enum vers_op change)
int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
- if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+ if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
+ change != NFSD_AVAIL)
return -1;
switch(change) {
case NFSD_SET:
@@ -329,23 +330,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
void nfsd_reset_versions(void)
{
- int found_one = 0;
int i;
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
- if (nfsd_program.pg_vers[i])
- found_one = 1;
- }
-
- if (!found_one) {
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
- nfsd_program.pg_vers[i] = nfsd_version[i];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
- nfsd_acl_program.pg_vers[i] =
- nfsd_acl_version[i];
-#endif
- }
+ for (i = 0; i < NFSD_NRVERS; i++)
+ if (nfsd_vers(i, NFSD_TEST))
+ return;
+
+ for (i = 0; i < NFSD_NRVERS; i++)
+ if (i != 4)
+ nfsd_vers(i, NFSD_SET);
+ else {
+ int minor = 0;
+ while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+ minor++;
+ }
}
/*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 501ecc4a1ac4..1d738723a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1166,13 +1166,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
if (size_change) {
- /*
- * Here we should wait dio to finish before inode lock
- * to avoid a deadlock between ocfs2_setattr() and
- * ocfs2_dio_end_io_write()
- */
- inode_dio_wait(inode);
-
status = ocfs2_rw_lock(inode, 1);
if (status < 0) {
mlog_errno(status);
@@ -1193,6 +1186,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (status)
goto bail_unlock;
+ inode_dio_wait(inode);
+
if (i_size_read(inode) >= attr->ia_size) {
if (ocfs2_should_order_data(inode)) {
status = ocfs2_begin_ordered_truncate(inode,
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 15f327bed8c6..7340c36978a3 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -14,6 +14,7 @@
#include <linux/tty.h>
#include <linux/seq_file.h>
#include <linux/bitops.h>
+#include "internal.h"
/*
* The /proc/tty directory inodes...
@@ -164,7 +165,7 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
if (!ent)
return;
- remove_proc_entry(driver->driver_name, proc_tty_driver);
+ remove_proc_entry(ent->name, proc_tty_driver);
driver->proc_entry = NULL;
}
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
index 5ac0b0bbb0ec..dd76ecf33cf3 100644
--- a/fs/sdcardfs/file.c
+++ b/fs/sdcardfs/file.c
@@ -18,6 +18,7 @@
* General Public License.
*/
+#include <linux/fsnotify.h>
#include "sdcardfs.h"
#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
#include <linux/backing-dev.h>
@@ -259,6 +260,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
fput(lower_file); /* fput calls dput for lower_dentry */
}
} else {
+ fsnotify_open(lower_file);
sdcardfs_set_lower_file(file, lower_file);
}
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 85eb8ebb5372..36c36590bcea 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -641,7 +641,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma
*/
copy_attrs(&tmp, inode);
tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
- tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+ tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, inode->i_sb, top));
tmp.i_mode = (inode->i_mode & S_IFMT)
| get_mode(mnt, SDCARDFS_I(inode), top);
data_put(top);
@@ -718,7 +718,7 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct
*/
copy_attrs(&tmp, inode);
tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
- tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+ tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, dentry->d_sb, top));
tmp.i_mode = (inode->i_mode & S_IFMT)
| get_mode(mnt, SDCARDFS_I(inode), top);
tmp.i_size = i_size_read(inode);
@@ -819,6 +819,7 @@ static int sdcardfs_fillattr(struct vfsmount *mnt,
{
struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
struct sdcardfs_inode_data *top = top_data_get(info);
+ struct super_block *sb = inode->i_sb;
if (!top)
return -EINVAL;
@@ -828,7 +829,7 @@ static int sdcardfs_fillattr(struct vfsmount *mnt,
stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, info, top);
stat->nlink = inode->i_nlink;
stat->uid = make_kuid(&init_user_ns, top->d_uid);
- stat->gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+ stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top));
stat->rdev = inode->i_rdev;
stat->size = i_size_read(inode);
stat->atime = inode->i_atime;
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 0a2b5167e9a2..ac27bb301c5e 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -33,6 +33,7 @@ enum {
Opt_userid,
Opt_reserved_mb,
Opt_gid_derivation,
+ Opt_default_normal,
Opt_err,
};
@@ -45,6 +46,7 @@ static const match_table_t sdcardfs_tokens = {
{Opt_userid, "userid=%d"},
{Opt_multiuser, "multiuser"},
{Opt_gid_derivation, "derive_gid"},
+ {Opt_default_normal, "default_normal"},
{Opt_reserved_mb, "reserved_mb=%u"},
{Opt_err, NULL}
};
@@ -68,6 +70,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
opts->reserved_mb = 0;
/* by default, gid derivation is off */
opts->gid_derivation = false;
+ opts->default_normal = false;
*debug = 0;
@@ -122,6 +125,9 @@ static int parse_options(struct super_block *sb, char *options, int silent,
case Opt_gid_derivation:
opts->gid_derivation = true;
break;
+ case Opt_default_normal:
+ opts->default_normal = true;
+ break;
/* unknown option */
default:
if (!silent)
@@ -175,6 +181,7 @@ int parse_options_remount(struct super_block *sb, char *options, int silent,
return 0;
vfsopts->mask = option;
break;
+ case Opt_default_normal:
case Opt_multiuser:
case Opt_userid:
case Opt_fsuid:
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index 88b92b2f1872..c6c49ea182ec 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -220,6 +220,7 @@ struct sdcardfs_mount_options {
userid_t fs_user_id;
bool multiuser;
bool gid_derivation;
+ bool default_normal;
unsigned int reserved_mb;
};
@@ -413,11 +414,13 @@ static inline void set_top(struct sdcardfs_inode_info *info,
}
static inline int get_gid(struct vfsmount *mnt,
+ struct super_block *sb,
struct sdcardfs_inode_data *data)
{
- struct sdcardfs_vfsmount_options *opts = mnt->data;
+ struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(sb);
- if (opts->gid == AID_SDCARD_RW)
+ if (vfsopts->gid == AID_SDCARD_RW && !sbi->options.default_normal)
/* As an optimization, certain trusted system components only run
* as owner but operate across all users. Since we're now handing
* out the sdcard_rw GID only to trusted apps, we're okay relaxing
@@ -426,7 +429,7 @@ static inline int get_gid(struct vfsmount *mnt,
*/
return AID_SDCARD_RW;
else
- return multiuser_get_uid(data->userid, opts->gid);
+ return multiuser_get_uid(data->userid, vfsopts->gid);
}
static inline int get_mode(struct vfsmount *mnt,
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index b89947d878e3..87d6f836592e 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -304,6 +304,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
seq_printf(m, ",userid=%u", opts->fs_user_id);
if (opts->gid_derivation)
seq_puts(m, ",derive_gid");
+ if (opts->default_normal)
+ seq_puts(m, ",default_normal");
if (opts->reserved_mb != 0)
seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 81155b9b445b..ee09c97f3ab2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -705,7 +705,7 @@ static loff_t udf_check_vsd(struct super_block *sb)
else
sectorsize = sb->s_blocksize;
- sector += (sbi->s_session << sb->s_blocksize_bits);
+ sector += (((loff_t)sbi->s_session) << sb->s_blocksize_bits);
udf_debug("Starting at sector %u (%ld byte sectors)\n",
(unsigned int)(sector >> sb->s_blocksize_bits),
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d473e6e07a7e..d859d8bd1f96 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -386,7 +386,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
* in such case.
*/
down_read(&mm->mmap_sem);
- ret = 0;
+ ret = VM_FAULT_NOPAGE;
}
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 75884aecf920..d98ba57ef01a 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2670,7 +2670,7 @@ xfs_bmap_add_extent_unwritten_real(
&i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
- cur->bc_rec.b.br_state = XFS_EXT_NORM;
+ cur->bc_rec.b.br_state = new->br_state;
if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8cab78eeb0c2..b34d1685936d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -738,7 +738,7 @@ xlog_find_head(
* in the in-core log. The following number can be made tighter if
* we actually look at the block size of the filesystem.
*/
- num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
+ num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
if (head_blk >= num_scan_bblks) {
/*
* We are guaranteed that the entire check can be performed
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index b58fd667f87b..598f8c7d719f 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -24,6 +24,8 @@
* __kprobes_text_start, __kprobes_text_end
* __entry_text_start, __entry_text_end
* __ctors_start, __ctors_end
+ * __irqentry_text_start, __irqentry_text_end
+ * __softirqentry_text_start, __softirqentry_text_end
*/
extern char _text[], _stext[], _etext[];
extern char _data[], _sdata[], _edata[];
@@ -35,6 +37,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __entry_text_start[], __entry_text_end[];
extern char __start_rodata[], __end_rodata[];
+extern char __irqentry_text_start[], __irqentry_text_end[];
+extern char __softirqentry_text_start[], __softirqentry_text_end[];
/* Start and end of .ctors section - used for constructor calls. */
extern char __ctors_start[], __ctors_end[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a65eedc15e93..a97f28205dfa 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -465,15 +465,17 @@
*(.entry.text) \
VMLINUX_SYMBOL(__entry_text_end) = .;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define IRQENTRY_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__irqentry_text_start) = .; \
*(.irqentry.text) \
VMLINUX_SYMBOL(__irqentry_text_end) = .;
-#else
-#define IRQENTRY_TEXT
-#endif
+
+#define SOFTIRQENTRY_TEXT \
+ ALIGN_FUNCTION(); \
+ VMLINUX_SYMBOL(__softirqentry_text_start) = .; \
+ *(.softirqentry.text) \
+ VMLINUX_SYMBOL(__softirqentry_text_end) = .;
/* Section used for early init (in .S files) */
#define HEAD_TEXT *(.head.text)
@@ -734,7 +736,14 @@
*/
#define PERCPU_INPUT(cacheline) \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
+ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
*(.data..percpu..first) \
+ . = ALIGN(cacheline); \
+ *(.data..percpu..user_mapped) \
+ *(.data..percpu..user_mapped..shared_aligned) \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..user_mapped..page_aligned) \
+ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 611b3d3bbab5..0ebdb4f2f0c8 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -83,6 +83,14 @@ int ahash_register_instance(struct crypto_template *tmpl,
struct ahash_instance *inst);
void ahash_free_instance(struct crypto_instance *inst);
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen);
+
+static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+ return alg->setkey != shash_no_setkey;
+}
+
int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
struct hash_alg_common *alg,
struct crypto_instance *inst);
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index c23ee1f7ee80..c2ff077168d3 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -26,6 +26,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
struct mcryptd_cpu_queue {
struct crypto_queue queue;
+ spinlock_t q_lock;
struct work_struct work;
};
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 083cf0f2d729..04edcd32b409 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -163,6 +163,26 @@ void drm_printk(const char *level, unsigned int category,
/** \name Macros to make printk easier */
/*@{*/
+#define _DRM_PRINTK(once, level, fmt, ...) \
+ do { \
+ printk##once(KERN_##level "[" DRM_NAME "] " fmt, \
+ ##__VA_ARGS__); \
+ } while (0)
+
+#define DRM_INFO(fmt, ...) \
+ _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__)
+#define DRM_NOTE(fmt, ...) \
+ _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__)
+#define DRM_WARN(fmt, ...) \
+ _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__)
+
+#define DRM_INFO_ONCE(fmt, ...) \
+ _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__)
+#define DRM_NOTE_ONCE(fmt, ...) \
+ _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__)
+#define DRM_WARN_ONCE(fmt, ...) \
+ _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__)
+
/**
* Error output.
*
@@ -197,8 +217,6 @@ void drm_printk(const char *level, unsigned int category,
#define DRM_DEV_INFO(dev, fmt, ...) \
drm_dev_printk(dev, KERN_INFO, DRM_UT_NONE, __func__, "", fmt, \
##__VA_ARGS__)
-#define DRM_INFO(fmt, ...) \
- drm_printk(KERN_INFO, DRM_UT_NONE, __func__, "", fmt, ##__VA_ARGS__)
#define DRM_DEV_INFO_ONCE(dev, fmt, ...) \
({ \
@@ -208,7 +226,6 @@ void drm_printk(const char *level, unsigned int category,
DRM_DEV_INFO(dev, fmt, ##__VA_ARGS__); \
} \
})
-#define DRM_INFO_ONCE(fmt, ...) DRM_DEV_INFO_ONCE(NULL, fmt, ##__VA_ARGS__)
/**
* Debug output.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b9bbbdf84c9..e4c22be9108e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1695,43 +1695,26 @@ static const u_int64_t latency_x_axis_us[] = {
#define BLK_IO_LAT_HIST_ZERO 2
struct io_latency_state {
- u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1];
- u_int64_t latency_reads_elems;
- u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1];
- u_int64_t latency_writes_elems;
+ u_int64_t latency_y_axis[ARRAY_SIZE(latency_x_axis_us) + 1];
+ u_int64_t latency_elems;
+ u_int64_t latency_sum;
};
static inline void
-blk_update_latency_hist(struct io_latency_state *s,
- int read,
- u_int64_t delta_us)
+blk_update_latency_hist(struct io_latency_state *s, u_int64_t delta_us)
{
int i;
- for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) {
- if (delta_us < (u_int64_t)latency_x_axis_us[i]) {
- if (read)
- s->latency_y_axis_read[i]++;
- else
- s->latency_y_axis_write[i]++;
+ for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++)
+ if (delta_us < (u_int64_t)latency_x_axis_us[i])
break;
- }
- }
- if (i == ARRAY_SIZE(latency_x_axis_us)) {
- /* Overflowed the histogram */
- if (read)
- s->latency_y_axis_read[i]++;
- else
- s->latency_y_axis_write[i]++;
- }
- if (read)
- s->latency_reads_elems++;
- else
- s->latency_writes_elems++;
+ s->latency_y_axis[i]++;
+ s->latency_elems++;
+ s->latency_sum += delta_us;
}
-void blk_zero_latency_hist(struct io_latency_state *s);
-ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf);
+ssize_t blk_latency_hist_show(char* name, struct io_latency_state *s,
+ char *buf, int buf_size);
#else /* CONFIG_BLOCK */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4f6d29c8e3d8..f2157159b26f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -37,6 +37,7 @@ struct bpf_map {
u32 value_size;
u32 max_entries;
u32 pages;
+ bool unpriv_array;
struct user_struct *user;
const struct bpf_map_ops *ops;
struct work_struct work;
@@ -141,6 +142,7 @@ struct bpf_prog_aux {
struct bpf_array {
struct bpf_map map;
u32 elem_size;
+ u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 89d9aa9e79bf..6fe974dbe741 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -234,12 +234,10 @@ static inline int block_page_mkwrite_return(int err)
{
if (err == 0)
return VM_FAULT_LOCKED;
- if (err == -EFAULT)
+ if (err == -EFAULT || err == -EAGAIN)
return VM_FAULT_NOPAGE;
if (err == -ENOMEM)
return VM_FAULT_OOM;
- if (err == -EAGAIN)
- return VM_FAULT_RETRY;
/* -ENOSPC, -EDQUOT, -EIO ... */
return VM_FAULT_SIGBUS;
}
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 1c95ef5a6ed9..99f7f5a13d92 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -40,6 +40,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
+extern ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
const struct attribute_group **groups,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 257db64562e5..9e120c92551b 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -87,6 +87,7 @@ extern int set_current_groups(struct group_info *);
extern void set_groups(struct cred *, struct group_info *);
extern int groups_search(const struct group_info *, kgid_t);
extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
/* access the groups "array" with this macro */
#define GROUP_AT(gi, i) \
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index e71cb70a1ac2..b7c1e1a7ebac 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <linux/completion.h>
/*
* Autoloaded crypto modules should only use a prefixed name to avoid allowing
@@ -470,6 +471,45 @@ struct crypto_alg {
} CRYPTO_MINALIGN_ATTR;
/*
+ * A helper struct for waiting for completion of async crypto ops
+ */
+struct crypto_wait {
+ struct completion completion;
+ int err;
+};
+
+/*
+ * Macro for declaring a crypto op async wait object on stack
+ */
+#define DECLARE_CRYPTO_WAIT(_wait) \
+ struct crypto_wait _wait = { \
+ COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 }
+
+/*
+ * Async ops completion helper functioons
+ */
+void crypto_req_done(struct crypto_async_request *req, int err);
+
+static inline int crypto_wait_req(int err, struct crypto_wait *wait)
+{
+ switch (err) {
+ case -EINPROGRESS:
+ case -EBUSY:
+ wait_for_completion(&wait->completion);
+ reinit_completion(&wait->completion);
+ err = wait->err;
+ break;
+ };
+
+ return err;
+}
+
+static inline void crypto_init_wait(struct crypto_wait *wait)
+{
+ init_completion(&wait->completion);
+}
+
+/*
* Algorithm registration interface.
*/
int crypto_register_alg(struct crypto_alg *alg);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index c2a975e4a711..fef1caeddf54 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,8 @@
#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
+#define F2FS_MAX_QUOTAS 3
+
#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */
@@ -108,7 +110,8 @@ struct f2fs_super_block {
__u8 encryption_level; /* versioning level for encryption */
__u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
struct f2fs_device devs[MAX_DEVICES]; /* device list */
- __u8 reserved[327]; /* valid reserved region */
+ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */
+ __u8 reserved[315]; /* valid reserved region */
} __packed;
/*
@@ -184,7 +187,8 @@ struct f2fs_extent {
} __packed;
#define F2FS_NAME_LEN 255
-#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
+/* 200 bytes for inline xattrs by default */
+#define DEFAULT_INLINE_XATTR_ADDRS 50
#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
get_extra_isize(inode))
@@ -238,7 +242,7 @@ struct f2fs_inode {
union {
struct {
__le16 i_extra_isize; /* extra inode attribute size */
- __le16 i_padding; /* padding */
+ __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */
__le32 i_projid; /* project id */
__le32 i_inode_checksum;/* inode meta checksum */
__le32 i_extra_end[0]; /* for attribute size calculation */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ccb98b459c59..677fa3b42194 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -466,6 +466,9 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
void bpf_int_jit_compile(struct bpf_prog *fp);
bool bpf_helper_changes_skb_data(void *func);
+struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
+ const struct bpf_insn *patch, u32 len);
+
#ifdef CONFIG_BPF_JIT
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b79adb9782e..19a5337f9dab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1798,6 +1798,7 @@ struct super_operations {
#else
#define S_DAX 0 /* Make all the DAX code disappear */
#endif
+#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -1836,6 +1837,7 @@ struct super_operations {
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
+#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 115bb81912cc..94a8aae8f9e2 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -764,7 +764,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
{
if (fscache_cookie_valid(cookie) && PageFsCache(page))
return __fscache_maybe_release_page(cookie, page, gfp);
- return false;
+ return true;
}
/**
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
new file mode 100644
index 000000000000..8641e56b8f8a
--- /dev/null
+++ b/include/linux/fscrypt.h
@@ -0,0 +1,290 @@
+/*
+ * fscrypt.h: declarations for per-file encryption
+ *
+ * Filesystems that implement per-file encryption include this header
+ * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem
+ * is being built with encryption support or not.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#ifndef _LINUX_FSCRYPT_H
+#define _LINUX_FSCRYPT_H
+
+#include <linux/key.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <crypto/skcipher.h>
+#include <uapi/linux/fs.h>
+
+#define FS_CRYPTO_BLOCK_SIZE 16
+
+struct fscrypt_info;
+
+struct fscrypt_ctx {
+ union {
+ struct {
+ struct page *bounce_page; /* Ciphertext page */
+ struct page *control_page; /* Original page */
+ } w;
+ struct {
+ struct bio *bio;
+ struct work_struct work;
+ } r;
+ struct list_head free_list; /* Free list */
+ };
+ u8 flags; /* Flags */
+};
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct fscrypt_symlink_data {
+ __le16 len;
+ char encrypted_path[1];
+} __packed;
+
+struct fscrypt_str {
+ unsigned char *name;
+ u32 len;
+};
+
+struct fscrypt_name {
+ const struct qstr *usr_fname;
+ struct fscrypt_str disk_name;
+ u32 hash;
+ u32 minor_hash;
+ struct fscrypt_str crypto_buf;
+};
+
+#define FSTR_INIT(n, l) { .name = n, .len = l }
+#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
+#define fname_name(p) ((p)->disk_name.name)
+#define fname_len(p) ((p)->disk_name.len)
+
+/*
+ * fscrypt superblock flags
+ */
+#define FS_CFLG_OWN_PAGES (1U << 1)
+
+/*
+ * crypto opertions for filesystems
+ */
+struct fscrypt_operations {
+ unsigned int flags;
+ const char *key_prefix;
+ int (*get_context)(struct inode *, void *, size_t);
+ int (*set_context)(struct inode *, const void *, size_t, void *);
+ bool (*dummy_context)(struct inode *);
+ bool (*empty_dir)(struct inode *);
+ unsigned (*max_namelen)(struct inode *);
+};
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+ if (inode->i_sb->s_cop->dummy_context &&
+ inode->i_sb->s_cop->dummy_context(inode))
+ return true;
+ return false;
+}
+
+static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
+ u32 filenames_mode)
+{
+ if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
+ filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
+ return true;
+
+ if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
+ filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
+ return true;
+
+ return false;
+}
+
+static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
+{
+ if (str->len == 1 && str->name[0] == '.')
+ return true;
+
+ if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+ return true;
+
+ return false;
+}
+
+#if __FS_HAS_ENCRYPTION
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+ return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+}
+
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+ return (inode->i_crypt_info != NULL);
+}
+
+#include <linux/fscrypt_supp.h>
+
+#else /* !__FS_HAS_ENCRYPTION */
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+}
+
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+ return 0;
+}
+
+#include <linux/fscrypt_notsupp.h>
+#endif /* __FS_HAS_ENCRYPTION */
+
+/**
+ * fscrypt_require_key - require an inode's encryption key
+ * @inode: the inode we need the key for
+ *
+ * If the inode is encrypted, set up its encryption key if not already done.
+ * Then require that the key be present and return -ENOKEY otherwise.
+ *
+ * No locks are needed, and the key will live as long as the struct inode --- so
+ * it won't go away from under you.
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ * if a problem occurred while setting up the encryption key.
+ */
+static inline int fscrypt_require_key(struct inode *inode)
+{
+ if (IS_ENCRYPTED(inode)) {
+ int err = fscrypt_get_encryption_info(inode);
+
+ if (err)
+ return err;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory
+ * @old_dentry: an existing dentry for the inode being linked
+ * @dir: the target directory
+ * @dentry: negative dentry for the target filename
+ *
+ * A new link can only be added to an encrypted directory if the directory's
+ * encryption key is available --- since otherwise we'd have no way to encrypt
+ * the filename. Therefore, we first set up the directory's encryption key (if
+ * not already done) and return an error if it's unavailable.
+ *
+ * We also verify that the link will not violate the constraint that all files
+ * in an encrypted directory tree use the same encryption policy.
+ *
+ * Return: 0 on success, -ENOKEY if the directory's encryption key is missing,
+ * -EPERM if the link would result in an inconsistent encryption policy, or
+ * another -errno code.
+ */
+static inline int fscrypt_prepare_link(struct dentry *old_dentry,
+ struct inode *dir,
+ struct dentry *dentry)
+{
+ if (IS_ENCRYPTED(dir))
+ return __fscrypt_prepare_link(d_inode(old_dentry), dir);
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories
+ * @old_dir: source directory
+ * @old_dentry: dentry for source file
+ * @new_dir: target directory
+ * @new_dentry: dentry for target location (may be negative unless exchanging)
+ * @flags: rename flags (we care at least about %RENAME_EXCHANGE)
+ *
+ * Prepare for ->rename() where the source and/or target directories may be
+ * encrypted. A new link can only be added to an encrypted directory if the
+ * directory's encryption key is available --- since otherwise we'd have no way
+ * to encrypt the filename. A rename to an existing name, on the other hand,
+ * *is* cryptographically possible without the key. However, we take the more
+ * conservative approach and just forbid all no-key renames.
+ *
+ * We also verify that the rename will not violate the constraint that all files
+ * in an encrypted directory tree use the same encryption policy.
+ *
+ * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the
+ * rename would cause inconsistent encryption policies, or another -errno code.
+ */
+static inline int fscrypt_prepare_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir))
+ return __fscrypt_prepare_rename(old_dir, old_dentry,
+ new_dir, new_dentry, flags);
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory
+ * @dir: directory being searched
+ * @dentry: filename being looked up
+ * @flags: lookup flags
+ *
+ * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be
+ * done with or without the directory's encryption key; without the key,
+ * filenames are presented in encrypted form. Therefore, we'll try to set up
+ * the directory's encryption key, but even without it the lookup can continue.
+ *
+ * To allow invalidating stale dentries if the directory's encryption key is
+ * added later, we also install a custom ->d_revalidate() method and use the
+ * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a
+ * plaintext name (flag set) or a ciphertext name (flag cleared).
+ *
+ * Return: 0 on success, -errno if a problem occurred while setting up the
+ * encryption key
+ */
+static inline int fscrypt_prepare_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ if (IS_ENCRYPTED(dir))
+ return __fscrypt_prepare_lookup(dir, dentry);
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes
+ * @dentry: dentry through which the inode is being changed
+ * @attr: attributes to change
+ *
+ * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file,
+ * most attribute changes are allowed even without the encryption key. However,
+ * without the encryption key we do have to forbid truncates. This is needed
+ * because the size being truncated to may not be a multiple of the filesystem
+ * block size, and in that case we'd have to decrypt the final block, zero the
+ * portion past i_size, and re-encrypt it. (We *could* allow truncating to a
+ * filesystem block boundary, but it's simpler to just forbid all truncates ---
+ * and we already forbid all other contents modifications without the key.)
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ * if a problem occurred while setting up the encryption key.
+ */
+static inline int fscrypt_prepare_setattr(struct dentry *dentry,
+ struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_SIZE)
+ return fscrypt_require_key(d_inode(dentry));
+ return 0;
+}
+
+#endif /* _LINUX_FSCRYPT_H */
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
deleted file mode 100644
index 4022c61f7e9b..000000000000
--- a/include/linux/fscrypt_common.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * fscrypt_common.h: common declarations for per-file encryption
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-
-#ifndef _LINUX_FSCRYPT_COMMON_H
-#define _LINUX_FSCRYPT_COMMON_H
-
-#include <linux/key.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/bio.h>
-#include <linux/dcache.h>
-#include <crypto/skcipher.h>
-#include <uapi/linux/fs.h>
-
-#define FS_CRYPTO_BLOCK_SIZE 16
-
-struct fscrypt_info;
-
-struct fscrypt_ctx {
- union {
- struct {
- struct page *bounce_page; /* Ciphertext page */
- struct page *control_page; /* Original page */
- } w;
- struct {
- struct bio *bio;
- struct work_struct work;
- } r;
- struct list_head free_list; /* Free list */
- };
- u8 flags; /* Flags */
-};
-
-/**
- * For encrypted symlinks, the ciphertext length is stored at the beginning
- * of the string in little-endian format.
- */
-struct fscrypt_symlink_data {
- __le16 len;
- char encrypted_path[1];
-} __packed;
-
-struct fscrypt_str {
- unsigned char *name;
- u32 len;
-};
-
-struct fscrypt_name {
- const struct qstr *usr_fname;
- struct fscrypt_str disk_name;
- u32 hash;
- u32 minor_hash;
- struct fscrypt_str crypto_buf;
-};
-
-#define FSTR_INIT(n, l) { .name = n, .len = l }
-#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
-#define fname_name(p) ((p)->disk_name.name)
-#define fname_len(p) ((p)->disk_name.len)
-
-/*
- * fscrypt superblock flags
- */
-#define FS_CFLG_OWN_PAGES (1U << 1)
-
-/*
- * crypto opertions for filesystems
- */
-struct fscrypt_operations {
- unsigned int flags;
- const char *key_prefix;
- int (*get_context)(struct inode *, void *, size_t);
- int (*set_context)(struct inode *, const void *, size_t, void *);
- int (*dummy_context)(struct inode *);
- bool (*is_encrypted)(struct inode *);
- bool (*empty_dir)(struct inode *);
- unsigned (*max_namelen)(struct inode *);
-};
-
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
-{
- if (inode->i_sb->s_cop->dummy_context &&
- inode->i_sb->s_cop->dummy_context(inode))
- return true;
- return false;
-}
-
-static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
- u32 filenames_mode)
-{
- if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
- filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
- return true;
-
- if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
- filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
- return true;
-
- return false;
-}
-
-static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
-{
- if (str->len == 1 && str->name[0] == '.')
- return true;
-
- if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
- return true;
-
- return false;
-}
-
-static inline struct page *fscrypt_control_page(struct page *page)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
- return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
-#else
- WARN_ON_ONCE(1);
- return ERR_PTR(-EINVAL);
-#endif
-}
-
-static inline int fscrypt_has_encryption_key(const struct inode *inode)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
- return (inode->i_crypt_info != NULL);
-#else
- return 0;
-#endif
-}
-
-#endif /* _LINUX_FSCRYPT_COMMON_H */
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
index ec406aed2f2f..c4c6bf2c390e 100644
--- a/include/linux/fscrypt_notsupp.h
+++ b/include/linux/fscrypt_notsupp.h
@@ -3,13 +3,16 @@
*
* This stubs out the fscrypt functions for filesystems configured without
* encryption support.
+ *
+ * Do not include this file directly. Use fscrypt.h instead!
*/
+#ifndef _LINUX_FSCRYPT_H
+#error "Incorrect include of linux/fscrypt_notsupp.h!"
+#endif
#ifndef _LINUX_FSCRYPT_NOTSUPP_H
#define _LINUX_FSCRYPT_NOTSUPP_H
-#include <linux/fscrypt_common.h>
-
/* crypto.c */
static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
gfp_t gfp_flags)
@@ -97,7 +100,7 @@ static inline int fscrypt_setup_filename(struct inode *dir,
const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
- if (dir->i_sb->s_cop->is_encrypted(dir))
+ if (IS_ENCRYPTED(dir))
return -EOPNOTSUPP;
memset(fname, 0, sizeof(struct fscrypt_name));
@@ -174,4 +177,34 @@ static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
return -EOPNOTSUPP;
}
+/* hooks.c */
+
+static inline int fscrypt_file_open(struct inode *inode, struct file *filp)
+{
+ if (IS_ENCRYPTED(inode))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static inline int __fscrypt_prepare_link(struct inode *inode,
+ struct inode *dir)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry,
+ unsigned int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_lookup(struct inode *dir,
+ struct dentry *dentry)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* _LINUX_FSCRYPT_NOTSUPP_H */
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index 32e2fcf13b01..2db5e9706f60 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -1,14 +1,15 @@
/*
* fscrypt_supp.h
*
- * This is included by filesystems configured with encryption support.
+ * Do not include this file directly. Use fscrypt.h instead!
*/
+#ifndef _LINUX_FSCRYPT_H
+#error "Incorrect include of linux/fscrypt_supp.h!"
+#endif
#ifndef _LINUX_FSCRYPT_SUPP_H
#define _LINUX_FSCRYPT_SUPP_H
-#include <linux/fscrypt_common.h>
-
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
@@ -142,4 +143,14 @@ extern void fscrypt_pullback_bio_page(struct page **, bool);
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
+/* hooks.c */
+extern int fscrypt_file_open(struct inode *inode, struct file *filp);
+extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir);
+extern int __fscrypt_prepare_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry,
+ unsigned int flags);
+extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry);
+
#endif /* _LINUX_FSCRYPT_SUPP_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ed94cea9eaff..312a4ef093e3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -781,16 +781,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
*/
#define __notrace_funcgraph notrace
-/*
- * We want to which function is an entrypoint of a hardirq.
- * That will help us to put a signal on output.
- */
-#define __irq_entry __attribute__((__section__(".irqentry.text")))
-
-/* Limits of hardirq entrypoints */
-extern char __irqentry_text_start[];
-extern char __irqentry_text_end[];
-
#define FTRACE_NOTRACE_DEPTH 65536
#define FTRACE_RETFUNC_DEPTH 50
#define FTRACE_RETSTACK_ALLOC_SIZE 32
@@ -827,7 +817,6 @@ static inline void unpause_graph_tracing(void)
#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
#define __notrace_funcgraph
-#define __irq_entry
#define INIT_FTRACE_GRAPH
static inline void ftrace_graph_init_task(struct task_struct *t) { }
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 7ff168d06967..46156ff5b01d 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -31,6 +31,7 @@
#define __GENALLOC_H__
#include <linux/spinlock_types.h>
+#include <linux/atomic.h>
struct device;
struct device_node;
@@ -68,7 +69,7 @@ struct gen_pool {
*/
struct gen_pool_chunk {
struct list_head next_chunk; /* next chunk in pool */
- atomic_t avail;
+ atomic_long_t avail;
phys_addr_t phys_addr; /* physical starting address of memory chunk */
unsigned long start_addr; /* start address of memory chunk */
unsigned long end_addr; /* end address of memory chunk (inclusive) */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ad16809c8596..9ddd6a0ce3a2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
#include <linux/atomic.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
+#include <asm/sections.h>
/*
* These correspond to the IORESOURCE_IRQ_* defines in
@@ -672,4 +673,11 @@ extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);
+/*
+ * We want to know which function is an entrypoint of a hardirq or a softirq.
+ */
+#define __irq_entry __attribute__((__section__(".irqentry.text")))
+#define __softirq_entry \
+ __attribute__((__section__(".softirqentry.text")))
+
#endif
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a0fc3cf932af..e4e22ed3fc0c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -218,7 +218,8 @@ struct ipv6_pinfo {
* 100: prefer care-of address
*/
dontfrag:1,
- autoflowlabel:1;
+ autoflowlabel:1,
+ autoflowlabel_set:1;
__u8 min_hopcount;
__u8 tclass;
__be32 rcv_flowinfo;
diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h
new file mode 100644
index 000000000000..58c55b1589d0
--- /dev/null
+++ b/include/linux/kaiser.h
@@ -0,0 +1,52 @@
+#ifndef _LINUX_KAISER_H
+#define _LINUX_KAISER_H
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#include <asm/kaiser.h>
+
+static inline int kaiser_map_thread_stack(void *stack)
+{
+ /*
+ * Map that page of kernel stack on which we enter from user context.
+ */
+ return kaiser_add_mapping((unsigned long)stack +
+ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL);
+}
+
+static inline void kaiser_unmap_thread_stack(void *stack)
+{
+ /*
+ * Note: may be called even when kaiser_map_thread_stack() failed.
+ */
+ kaiser_remove_mapping((unsigned long)stack +
+ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE);
+}
+#else
+
+/*
+ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which
+ * includes architectures that support KAISER, but have it disabled.
+ */
+
+static inline void kaiser_init(void)
+{
+}
+static inline int kaiser_add_mapping(unsigned long addr,
+ unsigned long size, unsigned long flags)
+{
+ return 0;
+}
+static inline void kaiser_remove_mapping(unsigned long start,
+ unsigned long size)
+{
+}
+static inline int kaiser_map_thread_stack(void *stack)
+{
+ return 0;
+}
+static inline void kaiser_unmap_thread_stack(void *stack)
+{
+}
+
+#endif /* !CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* _LINUX_KAISER_H */
diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h
new file mode 100644
index 000000000000..b7f8aced7870
--- /dev/null
+++ b/include/linux/kasan-checks.h
@@ -0,0 +1,12 @@
+#ifndef _LINUX_KASAN_CHECKS_H
+#define _LINUX_KASAN_CHECKS_H
+
+#ifdef CONFIG_KASAN
+void kasan_check_read(const void *p, unsigned int size);
+void kasan_check_write(const void *p, unsigned int size);
+#else
+static inline void kasan_check_read(const void *p, unsigned int size) { }
+static inline void kasan_check_write(const void *p, unsigned int size) { }
+#endif
+
+#endif
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 0fdc798e3ff7..b37afd197eed 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -30,45 +30,60 @@ static inline void *kasan_mem_to_shadow(const void *addr)
}
/* Enable reporting bugs after kasan_disable_current() */
-static inline void kasan_enable_current(void)
-{
- current->kasan_depth++;
-}
+extern void kasan_enable_current(void);
/* Disable reporting bugs for current task */
-static inline void kasan_disable_current(void)
-{
- current->kasan_depth--;
-}
+extern void kasan_disable_current(void);
void kasan_unpoison_shadow(const void *address, size_t size);
void kasan_unpoison_task_stack(struct task_struct *task);
+void kasan_unpoison_stack_above_sp_to(const void *watermark);
void kasan_alloc_pages(struct page *page, unsigned int order);
void kasan_free_pages(struct page *page, unsigned int order);
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+ unsigned long *flags);
+void kasan_cache_shrink(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
+
void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
void kasan_poison_object_data(struct kmem_cache *cache, void *object);
+void kasan_init_slab_obj(struct kmem_cache *cache, const void *object);
-void kasan_kmalloc_large(const void *ptr, size_t size);
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
void kasan_kfree_large(const void *ptr);
-void kasan_kfree(void *ptr);
-void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size);
-void kasan_krealloc(const void *object, size_t new_size);
+void kasan_poison_kfree(void *ptr);
+void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
+ gfp_t flags);
+void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
+
+void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
+bool kasan_slab_free(struct kmem_cache *s, void *object);
-void kasan_slab_alloc(struct kmem_cache *s, void *object);
-void kasan_slab_free(struct kmem_cache *s, void *object);
+struct kasan_cache {
+ int alloc_meta_offset;
+ int free_meta_offset;
+};
int kasan_module_alloc(void *addr, size_t size);
void kasan_free_shadow(const struct vm_struct *vm);
+size_t ksize(const void *);
+static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
+size_t kasan_metadata_size(struct kmem_cache *cache);
+
+bool kasan_save_enable_multi_shot(void);
+void kasan_restore_multi_shot(bool enabled);
+
#else /* CONFIG_KASAN */
static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_stack_above_sp_to(const void *watermark) {}
static inline void kasan_enable_current(void) {}
static inline void kasan_disable_current(void) {}
@@ -76,25 +91,41 @@ static inline void kasan_disable_current(void) {}
static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
static inline void kasan_free_pages(struct page *page, unsigned int order) {}
+static inline void kasan_cache_create(struct kmem_cache *cache,
+ size_t *size,
+ unsigned long *flags) {}
+static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
+
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
static inline void kasan_poison_object_data(struct kmem_cache *cache,
void *object) {}
+static inline void kasan_init_slab_obj(struct kmem_cache *cache,
+ const void *object) {}
-static inline void kasan_kmalloc_large(void *ptr, size_t size) {}
+static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_kfree(void *ptr) {}
+static inline void kasan_poison_kfree(void *ptr) {}
static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
- size_t size) {}
-static inline void kasan_krealloc(const void *object, size_t new_size) {}
+ size_t size, gfp_t flags) {}
+static inline void kasan_krealloc(const void *object, size_t new_size,
+ gfp_t flags) {}
-static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
-static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
+static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
+ gfp_t flags) {}
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
+{
+ return false;
+}
static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
+static inline void kasan_unpoison_slab(const void *ptr) { }
+static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
+
#endif /* CONFIG_KASAN */
#endif /* LINUX_KASAN_H */
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
new file mode 100644
index 000000000000..87e2a44f1bab
--- /dev/null
+++ b/include/linux/kcov.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_KCOV_H
+#define _LINUX_KCOV_H
+
+#include <uapi/linux/kcov.h>
+
+struct task_struct;
+
+#ifdef CONFIG_KCOV
+
+enum kcov_mode {
+ /* Coverage collection is not enabled yet. */
+ KCOV_MODE_DISABLED = 0,
+ /* KCOV was initialized, but tracing mode hasn't been chosen yet. */
+ KCOV_MODE_INIT = 1,
+ /*
+ * Tracing coverage collection mode.
+ * Covered PCs are collected in a per-task buffer.
+ */
+ KCOV_MODE_TRACE_PC = 2,
+ /* Collecting comparison operands mode. */
+ KCOV_MODE_TRACE_CMP = 3,
+};
+
+void kcov_task_init(struct task_struct *t);
+void kcov_task_exit(struct task_struct *t);
+
+#else
+
+static inline void kcov_task_init(struct task_struct *t) {}
+static inline void kcov_task_exit(struct task_struct *t) {}
+
+#endif /* CONFIG_KCOV */
+#endif /* _LINUX_KCOV_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d3133be12d92..7fde8af9b87e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -460,6 +460,7 @@ enum {
enum {
MLX4_INTERFACE_STATE_UP = 1 << 0,
MLX4_INTERFACE_STATE_DELETION = 1 << 1,
+ MLX4_INTERFACE_STATE_NOWAIT = 1 << 2,
};
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f29e8aa76e39..652e73967707 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -70,6 +70,10 @@ extern int mmap_rnd_compat_bits __read_mostly;
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif
+#ifndef lm_alias
+#define lm_alias(x) __va(__pa_symbol(x))
+#endif
+
/*
* To prevent common memory management code establishing
* a zero page mapping on a read fault.
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 16373c8f5f57..369bc3405a6d 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -63,8 +63,9 @@ static inline int arch_validate_prot(unsigned long prot)
* ("bit1" and "bit2" must be single bits)
*/
#define _calc_vm_trans(x, bit1, bit2) \
+ ((!(bit1) || !(bit2)) ? 0 : \
((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
- : ((x) & (bit1)) / ((bit1) / (bit2)))
+ : ((x) & (bit1)) / ((bit1) / (bit2))))
/*
* Combine the mmap "prot" argument into "vm_flags" used internally.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 3f29ba41c025..e45832f7b5d7 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -394,7 +394,8 @@ struct mmc_host {
#ifdef CONFIG_BLOCK
int latency_hist_enabled;
- struct io_latency_state io_lat_s;
+ struct io_latency_state io_lat_read;
+ struct io_latency_state io_lat_write;
#endif
unsigned long private[0] ____cacheline_aligned;
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index 70fffeba7495..a4441784503b 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -1,9 +1,16 @@
#ifndef _LINUX_MMU_CONTEXT_H
#define _LINUX_MMU_CONTEXT_H
+#include <asm/mmu_context.h>
+
struct mm_struct;
void use_mm(struct mm_struct *mm);
void unuse_mm(struct mm_struct *mm);
+/* Architectures that care about IRQ state in switch_mm can override this. */
+#ifndef switch_mm_irqs_off
+# define switch_mm_irqs_off switch_mm
+#endif
+
#endif
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a1a210d59961..38c5eb21883e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pmd; \
})
-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
-({ \
- unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
- pmd_t ___pmd; \
- \
- ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \
- mmu_notifier_invalidate_range(__mm, ___haddr, \
- ___haddr + HPAGE_PMD_SIZE); \
- \
- ___pmd; \
-})
-
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -475,7 +463,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define pmdp_clear_young_notify pmdp_test_and_clear_young
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5fab999b320b..5d8b0349fb26 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -133,8 +133,9 @@ enum zone_stat_item {
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
- NR_KERNEL_STACK,
/* Second 128 byte cacheline */
+ NR_KERNEL_STACK,
+ NR_KAISERTABLE,
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
NR_VMSCAN_WRITE,
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 639e9b8b0e4d..0b41959aab9f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
struct netlink_callback {
struct sk_buff *skb;
const struct nlmsghdr *nlh;
+ int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff * skb,
struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
@@ -153,6 +154,7 @@ struct nlmsghdr *
__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags);
struct netlink_dump_control {
+ int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *skb, struct netlink_callback *);
int (*done)(struct netlink_callback *);
void *data;
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 7dee00143afd..c201e31e9d7e 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -191,10 +191,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
#endif
#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
#else
#define board_onenand_data NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
{
+ return 0;
}
#endif
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f16299ca068..8902f23bb770 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -35,6 +35,12 @@
#endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#define USER_MAPPED_SECTION "..user_mapped"
+#else
+#define USER_MAPPED_SECTION ""
+#endif
+
/*
* Base implementations of per-CPU variable declarations and definitions, where
* the section in which the variable is to be placed is provided by the
@@ -115,6 +121,12 @@
#define DEFINE_PER_CPU(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "")
+#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
/*
* Declaration/definition used for per-CPU variables that must come first in
* the set of variables.
@@ -144,6 +156,14 @@
DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
____cacheline_aligned_in_smp
+#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
#define DECLARE_PER_CPU_ALIGNED(type, name) \
DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \
____cacheline_aligned
@@ -162,11 +182,21 @@
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \
__aligned(PAGE_SIZE)
+/*
+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
+ */
+#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+ __aligned(PAGE_SIZE)
+
+#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+ __aligned(PAGE_SIZE)
/*
* Declaration/definition used for per-CPU variables that must be read mostly.
*/
-#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
+#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9d1db198509e..b0cef11769f3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -681,6 +681,17 @@ static inline bool phy_is_internal(struct phy_device *phydev)
}
/**
+ * phy_interface_mode_is_rgmii - Convenience function for testing if a
+ * PHY interface mode is RGMII (all variants)
+ * @mode: the phy_interface_t enum
+ */
+static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
+{
+ return mode >= PHY_INTERFACE_MODE_RGMII &&
+ mode <= PHY_INTERFACE_MODE_RGMII_TXID;
+};
+
+/**
* phy_interface_is_rgmii - Convenience function for testing if a PHY interface
* is RGMII (all variants)
* @phydev: the phy_device struct
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 60af9d2fa922..de6b3edd835b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -51,6 +51,7 @@ struct sched_param {
#include <linux/resource.h>
#include <linux/timer.h>
#include <linux/hrtimer.h>
+#include <linux/kcov.h>
#include <linux/task_io_accounting.h>
#include <linux/latencytop.h>
#include <linux/cred.h>
@@ -1973,6 +1974,16 @@ struct task_struct {
/* bitmask and counter of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
+#ifdef CONFIG_KCOV
+ /* Coverage collection mode enabled for this task (0 if disabled). */
+ enum kcov_mode kcov_mode;
+ /* Size of the kcov_area. */
+ unsigned kcov_size;
+ /* Buffer for coverage collection. */
+ void *kcov_area;
+ /* kcov desciptor wired with this task or NULL. */
+ struct kcov *kcov;
+#endif
#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg_in_oom;
gfp_t memcg_oom_gfp_mask;
diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
index 8c9131db2b25..b050ef51e27e 100644
--- a/include/linux/sh_eth.h
+++ b/include/linux/sh_eth.h
@@ -16,7 +16,6 @@ struct sh_eth_plat_data {
unsigned char mac_addr[ETH_ALEN];
unsigned no_ether_link:1;
unsigned ether_link_active_low:1;
- unsigned needs_init:1;
};
#endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b4e739f04ee6..16dc1e4a91f3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -87,6 +87,12 @@
# define SLAB_FAILSLAB 0x00000000UL
#endif
+#ifdef CONFIG_KASAN
+#define SLAB_KASAN 0x08000000UL
+#else
+#define SLAB_KASAN 0x00000000UL
+#endif
+
/* The following flags affect the page allocator grouping pages by mobility */
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
@@ -368,7 +374,7 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
{
void *ret = kmem_cache_alloc(s, flags);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, flags);
return ret;
}
@@ -379,7 +385,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
{
void *ret = kmem_cache_alloc_node(s, gfpflags, node);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
#endif /* CONFIG_TRACING */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 33d049066c3d..bf16ba9f6fdb 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -60,6 +60,9 @@ struct kmem_cache {
atomic_t allocmiss;
atomic_t freehit;
atomic_t freemiss;
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+ atomic_t store_user_clean;
+#endif
/*
* If debugging is enabled, then the allocator can add additional
@@ -72,8 +75,23 @@ struct kmem_cache {
#ifdef CONFIG_MEMCG_KMEM
struct memcg_cache_params memcg_params;
#endif
+#ifdef CONFIG_KASAN
+ struct kasan_cache kasan_info;
+#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+ void *x)
+{
+ void *object = x - (x - page->s_mem) % cache->size;
+ void *last_object = page->s_mem + (cache->num - 1) * cache->size;
+
+ if (unlikely(object > last_object))
+ return last_object;
+ else
+ return object;
+}
+
#endif /* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f4e857e920cd..fd720e3dd1b8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -99,6 +99,11 @@ struct kmem_cache {
*/
int remote_node_defrag_ratio;
#endif
+
+#ifdef CONFIG_KASAN
+ struct kasan_cache kasan_info;
+#endif
+
struct kmem_cache_node *node[MAX_NUMNODES];
};
@@ -130,4 +135,15 @@ static inline void *virt_to_obj(struct kmem_cache *s,
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+ void *x) {
+ void *object = x - (x - page_address(page)) % cache->size;
+ void *last_object = page_address(page) +
+ (page->objects - 1) * cache->size;
+ if (unlikely(object > last_object))
+ return last_object;
+ else
+ return object;
+}
+
#endif /* _LINUX_SLUB_DEF_H */
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
new file mode 100644
index 000000000000..7978b3e2c1e1
--- /dev/null
+++ b/include/linux/stackdepot.h
@@ -0,0 +1,32 @@
+/*
+ * A generic stack depot implementation
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_STACKDEPOT_H
+#define _LINUX_STACKDEPOT_H
+
+typedef u32 depot_stack_handle_t;
+
+struct stack_trace;
+
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
+
+#endif
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c6f0f0d0e17e..00a1f330f93a 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -116,6 +116,12 @@ struct attribute_group {
.show = _name##_show, \
}
+#define __ATTR_RO_MODE(_name, _mode) { \
+ .attr = { .name = __stringify(_name), \
+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
+ .show = _name##_show, \
+}
+
#define __ATTR_WO(_name) { \
.attr = { .name = __stringify(_name), .mode = S_IWUSR }, \
.store = _name##_store, \
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 0f175b8f6456..cb889afe576b 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -28,6 +28,7 @@
#define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */
#define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */
+struct device;
struct tee_device;
struct tee_shm;
struct tee_shm_pool;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 115216ec7cfe..3a5af09af18b 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -50,7 +50,7 @@ struct tk_read_base {
* @tai_offset: The current UTC to TAI offset in seconds
* @clock_was_set_seq: The sequence number of clock was set events
* @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_time: Monotonic raw base time in timespec64 format
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
* interval.
@@ -91,7 +91,7 @@ struct timekeeper {
s32 tai_offset;
unsigned int clock_was_set_seq;
ktime_t next_leap_ktime;
- struct timespec64 raw_time;
+ u64 raw_sec;
/* The following members are for timekeeping internal use */
cycle_t cycle_interval;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8c75af6b7d5b..092b5658b9c3 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -330,6 +330,7 @@ struct usb_host_bos {
struct usb_ss_cap_descriptor *ss_cap;
struct usb_ssp_cap_descriptor *ssp_cap;
struct usb_ss_container_id_descriptor *ss_id;
+ struct usb_ptm_cap_descriptor *ptm_cap;
};
int __usb_get_extra_descriptor(char *buffer, unsigned size,
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index e623d392db0c..8ef3a61fdc74 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -80,10 +80,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#endif
#endif
#ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
-#endif /* CONFIG_SMP */
NR_TLB_LOCAL_FLUSH_ALL,
NR_TLB_LOCAL_FLUSH_ONE,
#endif /* CONFIG_DEBUG_TLBFLUSH */
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 1b6b6dcb018d..43c0e771f417 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
* @flags: flags
* @policy: attribute validation policy
* @doit: standard command callback
+ * @start: start callback for dumps
* @dumpit: callback for dumpers
* @done: completion callback for dumps
* @ops_list: operations list
@@ -122,6 +123,7 @@ struct genl_ops {
const struct nla_policy *policy;
int (*doit)(struct sk_buff *skb,
struct genl_info *info);
+ int (*start)(struct netlink_callback *cb);
int (*dumpit)(struct sk_buff *skb,
struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
diff --git a/include/net/ip.h b/include/net/ip.h
index c5d8ee796b38..119c6cae8380 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -33,6 +33,8 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
+#define IPV4_MIN_MTU 68 /* RFC 791 */
+
struct sock;
struct inet_skb_parm {
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index dae99d7d2bc0..706a7017885c 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -165,11 +165,11 @@ struct expander_device {
struct sata_device {
unsigned int class;
- struct smp_resp rps_resp; /* report_phy_sata_resp */
u8 port_no; /* port number, if this is a PM (Port) */
struct ata_port *ap;
struct ata_host ata_host;
+ struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */
u8 fis[ATA_RESP_FIS_SIZE];
};
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 8555321306fb..0eed9fd79ea5 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp {
struct list_head tg_pt_gp_lun_list;
struct se_lun *tg_pt_gp_alua_lun;
struct se_node_acl *tg_pt_gp_alua_nacl;
- struct delayed_work tg_pt_gp_transition_work;
+ struct work_struct tg_pt_gp_transition_work;
struct completion *tg_pt_gp_transition_complete;
};
@@ -496,6 +496,7 @@ struct se_cmd {
#define CMD_T_BUSY (1 << 9)
#define CMD_T_TAS (1 << 10)
#define CMD_T_FABRIC_STOP (1 << 11)
+#define CMD_T_PRE_EXECUTE (1 << 12)
spinlock_t t_state_lock;
struct kref cmd_kref;
struct completion t_transport_stop_comp;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7063bbcca03b..589df6f73789 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -128,6 +128,18 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
{ CP_DISCARD, "Discard" }, \
{ CP_UMOUNT | CP_TRIMMED, "Umount,Trimmed" })
+#define show_fsync_cpreason(type) \
+ __print_symbolic(type, \
+ { CP_NO_NEEDED, "no needed" }, \
+ { CP_NON_REGULAR, "non regular" }, \
+ { CP_HARDLINK, "hardlink" }, \
+ { CP_SB_NEED_CP, "sb needs cp" }, \
+ { CP_WRONG_PINO, "wrong pino" }, \
+ { CP_NO_SPC_ROLL, "no space roll forward" }, \
+ { CP_NODE_NEED_CP, "node needs cp" }, \
+ { CP_FASTBOOT_MODE, "fastboot mode" }, \
+ { CP_SPEC_LOG_NUM, "log type is 2" })
+
struct victim_sel_policy;
struct f2fs_map_blocks;
@@ -202,14 +214,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter,
TRACE_EVENT(f2fs_sync_file_exit,
- TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret),
+ TP_PROTO(struct inode *inode, int cp_reason, int datasync, int ret),
- TP_ARGS(inode, need_cp, datasync, ret),
+ TP_ARGS(inode, cp_reason, datasync, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
- __field(int, need_cp)
+ __field(int, cp_reason)
__field(int, datasync)
__field(int, ret)
),
@@ -217,15 +229,15 @@ TRACE_EVENT(f2fs_sync_file_exit,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->need_cp = need_cp;
+ __entry->cp_reason = cp_reason;
__entry->datasync = datasync;
__entry->ret = ret;
),
- TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, "
+ TP_printk("dev = (%d,%d), ino = %lu, cp_reason: %s, "
"datasync = %d, ret = %d",
show_dev_ino(__entry),
- __entry->need_cp ? "needed" : "not needed",
+ show_fsync_cpreason(__entry->cp_reason),
__entry->datasync,
__entry->ret)
);
@@ -716,6 +728,91 @@ TRACE_EVENT(f2fs_get_victim,
__entry->free)
);
+TRACE_EVENT(f2fs_lookup_start,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
+
+ TP_ARGS(dir, dentry, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(const char *, name)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->name = dentry->d_name.name;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u",
+ show_dev_ino(__entry),
+ __entry->name,
+ __entry->flags)
+);
+
+TRACE_EVENT(f2fs_lookup_end,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, nid_t ino,
+ int err),
+
+ TP_ARGS(dir, dentry, ino, err),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(const char *, name)
+ __field(nid_t, cino)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->name = dentry->d_name.name;
+ __entry->cino = ino;
+ __entry->err = err;
+ ),
+
+ TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d",
+ show_dev_ino(__entry),
+ __entry->name,
+ __entry->cino,
+ __entry->err)
+);
+
+TRACE_EVENT(f2fs_readdir,
+
+ TP_PROTO(struct inode *dir, loff_t start_pos, loff_t end_pos, int err),
+
+ TP_ARGS(dir, start_pos, end_pos, err),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->start = start_pos;
+ __entry->end = end_pos;
+ __entry->err = err;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, start_pos:%llu, end_pos:%llu, err:%d",
+ show_dev_ino(__entry),
+ __entry->start,
+ __entry->end,
+ __entry->err)
+);
+
TRACE_EVENT(f2fs_fallocate,
TP_PROTO(struct inode *inode, int mode,
@@ -1274,6 +1371,13 @@ DEFINE_EVENT(f2fs_discard, f2fs_issue_discard,
TP_ARGS(dev, blkstart, blklen)
);
+DEFINE_EVENT(f2fs_discard, f2fs_remove_discard,
+
+ TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
+
+ TP_ARGS(dev, blkstart, blklen)
+);
+
TRACE_EVENT(f2fs_issue_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index d6f83222a6a1..67ff6555967f 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -204,7 +204,7 @@ TRACE_EVENT(kvm_ack_irq,
{ KVM_TRACE_MMIO_WRITE, "write" }
TRACE_EVENT(kvm_mmio,
- TP_PROTO(int type, int len, u64 gpa, u64 val),
+ TP_PROTO(int type, int len, u64 gpa, void *val),
TP_ARGS(type, len, gpa, val),
TP_STRUCT__entry(
@@ -218,7 +218,10 @@ TRACE_EVENT(kvm_mmio,
__entry->type = type;
__entry->len = len;
__entry->gpa = gpa;
- __entry->val = val;
+ __entry->val = 0;
+ if (val)
+ memcpy(&__entry->val, val,
+ min_t(u32, sizeof(__entry->val), len));
),
TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 22b6ad31c706..8562b1cb776b 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -90,7 +90,7 @@ PTR_FIELD(PTR_GEN, 0, 8)
#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1)
-#define PTR(gen, offset, dev) \
+#define MAKE_PTR(gen, offset, dev) \
((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
/* Bkey utility code */
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
new file mode 100644
index 000000000000..33b826b9946e
--- /dev/null
+++ b/include/uapi/linux/kcov.h
@@ -0,0 +1,34 @@
+#ifndef _LINUX_KCOV_IOCTLS_H
+#define _LINUX_KCOV_IOCTLS_H
+
+#include <linux/types.h>
+
+#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
+#define KCOV_ENABLE _IO('c', 100)
+#define KCOV_DISABLE _IO('c', 101)
+
+enum {
+ /*
+ * Tracing coverage collection mode.
+ * Covered PCs are collected in a per-task buffer.
+ * In new KCOV version the mode is chosen by calling
+ * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument
+ * was supposed to be 0 in such a call. So, for reasons of backward
+ * compatibility, we have chosen the value KCOV_TRACE_PC to be 0.
+ */
+ KCOV_TRACE_PC = 0,
+ /* Collecting comparison operands mode. */
+ KCOV_TRACE_CMP = 1,
+};
+
+/*
+ * The format for the types of collected comparisons.
+ *
+ * Bit 0 shows whether one of the arguments is a compile-time constant.
+ * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes.
+ */
+#define KCOV_CMP_CONST (1 << 0)
+#define KCOV_CMP_SIZE(n) ((n) << 1)
+#define KCOV_CMP_MASK KCOV_CMP_SIZE(3)
+
+#endif /* _LINUX_KCOV_IOCTLS_H */
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
index 370d8845ab21..688782e90140 100644
--- a/include/uapi/linux/tee.h
+++ b/include/uapi/linux/tee.h
@@ -49,6 +49,7 @@
#define TEE_MAX_ARG_SIZE 1024
#define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */
+#define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */
/*
* TEE Implementation ID
diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index 91ab75c1013c..ec6c8543732f 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -812,6 +812,8 @@ struct usb_wireless_cap_descriptor { /* Ultra Wide Band */
__u8 bReserved;
} __attribute__((packed));
+#define USB_DT_USB_WIRELESS_CAP_SIZE 11
+
/* USB 2.0 Extension descriptor */
#define USB_CAP_TYPE_EXT 2
@@ -895,6 +897,22 @@ struct usb_ssp_cap_descriptor {
#define USB_SSP_SUBLINK_SPEED_LSM (0xff << 16) /* Lanespeed mantissa */
} __attribute__((packed));
+/*
+ * Precision time measurement capability descriptor: advertised by devices and
+ * hubs that support PTM
+ */
+#define USB_PTM_CAP_TYPE 0xb
+struct usb_ptm_cap_descriptor {
+ __u8 bLength;
+ __u8 bDescriptorType;
+ __u8 bDevCapabilityType;
+} __attribute__((packed));
+
+/*
+ * The size of the descriptor for the Sublink Speed Attribute Count
+ * (SSAC) specified in bmAttributes[4:0].
+ */
+#define USB_DT_USB_SSP_CAP_SIZE(ssac) (16 + ssac * 4)
/*-------------------------------------------------------------------------*/
@@ -991,6 +1009,7 @@ enum usb3_link_state {
USB3_LPM_U3
};
+#define USB_DT_USB_PTM_ID_SIZE 3
/*
* A U1 timeout of 0x0 means the parent hub will reject any transitions to U1.
* 0xff means the parent hub will accept transitions to U1, but will not
diff --git a/init/initramfs.c b/init/initramfs.c
index 69805244732b..3340bf219a20 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -626,8 +626,11 @@ static int __init populate_rootfs(void)
{
char *err;
- if (do_skip_initramfs)
+ if (do_skip_initramfs) {
+ if (initrd_start)
+ free_initrd();
return default_rootfs();
+ }
err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
diff --git a/init/main.c b/init/main.c
index 86f5ce9ede86..db6b6cbb846b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -81,6 +81,7 @@
#include <linux/integrity.h>
#include <linux/proc_ns.h>
#include <linux/io.h>
+#include <linux/kaiser.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -489,6 +490,7 @@ static void __init mm_init(void)
pgtable_init();
vmalloc_init();
ioremap_huge_init();
+ kaiser_init();
}
asmlinkage __visible void __init start_kernel(void)
diff --git a/kernel/Makefile b/kernel/Makefile
index 53abf008ecb3..2dea801370f2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -19,6 +19,17 @@ CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
endif
+# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
+# in coverage traces.
+KCOV_INSTRUMENT_softirq.o := n
+# These are called from save_stack_trace() on slub debug path,
+# and produce insane amounts of uninteresting coverage.
+KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_extable.o := n
+# Don't self-instrument.
+KCOV_INSTRUMENT_kcov.o := n
+KASAN_SANITIZE_kcov.o := n
+
# cond_syscall is currently not LTO compatible
CFLAGS_sys_ni.o = $(DISABLE_LTO)
@@ -69,6 +80,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
+obj-$(CONFIG_KCOV) += kcov.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 74963d192c5d..37f1dc696fbd 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -99,7 +99,7 @@ static int check_free_space(struct bsd_acct_struct *acct)
{
struct kstatfs sbuf;
- if (time_is_before_jiffies(acct->needcheck))
+ if (time_is_after_jiffies(acct->needcheck))
goto out;
/* May block */
diff --git a/kernel/audit.c b/kernel/audit.c
index 34f690b9213a..e228b88dfd23 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -80,13 +80,13 @@ static int audit_initialized;
#define AUDIT_OFF 0
#define AUDIT_ON 1
#define AUDIT_LOCKED 2
-u32 audit_enabled;
-u32 audit_ever_enabled;
+u32 audit_enabled = AUDIT_OFF;
+u32 audit_ever_enabled = !!AUDIT_OFF;
EXPORT_SYMBOL_GPL(audit_enabled);
/* Default state when kernel boots without any parameters. */
-static u32 audit_default;
+static u32 audit_default = AUDIT_OFF;
/* If auditing cannot proceed, audit_failure selects what happens. */
static u32 audit_failure = AUDIT_FAIL_PRINTK;
@@ -1185,8 +1185,6 @@ static int __init audit_init(void)
skb_queue_head_init(&audit_skb_queue);
skb_queue_head_init(&audit_skb_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
- audit_enabled = audit_default;
- audit_ever_enabled |= !!audit_default;
audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
@@ -1203,6 +1201,8 @@ static int __init audit_enable(char *str)
audit_default = !!simple_strtol(str, NULL, 0);
if (!audit_default)
audit_initialized = AUDIT_DISABLED;
+ audit_enabled = audit_default;
+ audit_ever_enabled = !!audit_enabled;
pr_info("%s\n", audit_default ?
"enabled (after initialization)" : "disabled (until reboot)");
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b0799bced518..3608fa1aec8a 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -20,8 +20,10 @@
/* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
+ u32 elem_size, array_size, index_mask, max_entries;
+ bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
- u32 elem_size, array_size;
+ u64 mask64;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -36,12 +38,33 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
elem_size = round_up(attr->value_size, 8);
+ max_entries = attr->max_entries;
+
+ /* On 32 bit archs roundup_pow_of_two() with max_entries that has
+ * upper most bit set in u32 space is undefined behavior due to
+ * resulting 1U << 32, so do it manually here in u64 space.
+ */
+ mask64 = fls_long(max_entries - 1);
+ mask64 = 1ULL << mask64;
+ mask64 -= 1;
+
+ index_mask = mask64;
+ if (unpriv) {
+ /* round up array size to nearest power of 2,
+ * since cpu will speculate within index_mask limits
+ */
+ max_entries = index_mask + 1;
+ /* Check for overflows. */
+ if (max_entries < attr->max_entries)
+ return ERR_PTR(-E2BIG);
+ }
+
/* check round_up into zero and u32 overflow */
if (elem_size == 0 ||
- attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
+ max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
return ERR_PTR(-ENOMEM);
- array_size = sizeof(*array) + attr->max_entries * elem_size;
+ array_size = sizeof(*array) + max_entries * elem_size;
/* allocate all map elements and zero-initialize them */
array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
@@ -50,6 +73,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
if (!array)
return ERR_PTR(-ENOMEM);
}
+ array->index_mask = index_mask;
+ array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.key_size = attr->key_size;
@@ -70,7 +95,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (index >= array->map.max_entries)
return NULL;
- return array->value + array->elem_size * index;
+ return array->value + array->elem_size * (index & array->index_mask);
}
/* Called from syscall */
@@ -111,7 +136,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
/* all elements already exist */
return -EEXIST;
- memcpy(array->value + array->elem_size * index, value, map->value_size);
+ memcpy(array->value +
+ array->elem_size * (index & array->index_mask),
+ value, map->value_size);
return 0;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 334b1bdd572c..3fd76cf0c21e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -137,6 +137,77 @@ void __bpf_prog_free(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(__bpf_prog_free);
+static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_JMP &&
+ /* Call and Exit are both special jumps with no
+ * target inside the BPF instruction image.
+ */
+ BPF_OP(insn->code) != BPF_CALL &&
+ BPF_OP(insn->code) != BPF_EXIT;
+}
+
+static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 i, insn_cnt = prog->len;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (!bpf_is_jmp_and_has_target(insn))
+ continue;
+
+ /* Adjust offset of jmps if we cross boundaries. */
+ if (i < pos && i + insn->off + 1 > pos)
+ insn->off += delta;
+ else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
+ insn->off -= delta;
+ }
+}
+
+struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
+ const struct bpf_insn *patch, u32 len)
+{
+ u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
+ struct bpf_prog *prog_adj;
+
+ /* Since our patchlet doesn't expand the image, we're done. */
+ if (insn_delta == 0) {
+ memcpy(prog->insnsi + off, patch, sizeof(*patch));
+ return prog;
+ }
+
+ insn_adj_cnt = prog->len + insn_delta;
+
+ /* Several new instructions need to be inserted. Make room
+ * for them. Likely, there's no need for a new allocation as
+ * last page could have large enough tailroom.
+ */
+ prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
+ GFP_USER);
+ if (!prog_adj)
+ return NULL;
+
+ prog_adj->len = insn_adj_cnt;
+
+ /* Patching happens in 3 steps:
+ *
+ * 1) Move over tail of insnsi from next instruction onwards,
+ * so we can patch the single target insn with one or more
+ * new ones (patching is always from 1 to n insns, n > 0).
+ * 2) Inject new instructions at the target location.
+ * 3) Adjust branch offsets if necessary.
+ */
+ insn_rest = insn_adj_cnt - off - len;
+
+ memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
+ sizeof(*patch) * insn_rest);
+ memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
+
+ bpf_adj_branches(prog_adj, off, insn_delta);
+
+ return prog_adj;
+}
+
#ifdef CONFIG_BPF_JIT
struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e32cc94edd9..424accd20c2d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -447,57 +447,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl)
list_add(&tl->list_node, &bpf_prog_types);
}
-/* fixup insn->imm field of bpf_call instructions:
- * if (insn->imm == BPF_FUNC_map_lookup_elem)
- * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
- * else if (insn->imm == BPF_FUNC_map_update_elem)
- * insn->imm = bpf_map_update_elem - __bpf_call_base;
- * else ...
- *
- * this function is called after eBPF program passed verification
- */
-static void fixup_bpf_calls(struct bpf_prog *prog)
-{
- const struct bpf_func_proto *fn;
- int i;
-
- for (i = 0; i < prog->len; i++) {
- struct bpf_insn *insn = &prog->insnsi[i];
-
- if (insn->code == (BPF_JMP | BPF_CALL)) {
- /* we reach here when program has bpf_call instructions
- * and it passed bpf_check(), means that
- * ops->get_func_proto must have been supplied, check it
- */
- BUG_ON(!prog->aux->ops->get_func_proto);
-
- if (insn->imm == BPF_FUNC_get_route_realm)
- prog->dst_needed = 1;
- if (insn->imm == BPF_FUNC_get_prandom_u32)
- bpf_user_rnd_init_once();
- if (insn->imm == BPF_FUNC_tail_call) {
- /* mark bpf_tail_call as different opcode
- * to avoid conditional branch in
- * interpeter for every normal call
- * and to prevent accidental JITing by
- * JIT compiler that doesn't support
- * bpf_tail_call yet
- */
- insn->imm = 0;
- insn->code |= BPF_X;
- continue;
- }
-
- fn = prog->aux->ops->get_func_proto(insn->imm);
- /* all functions that have prototype and verifier allowed
- * programs to call them, must be real in-kernel functions
- */
- BUG_ON(!fn->func);
- insn->imm = fn->func - __bpf_call_base;
- }
- }
-}
-
/* drop refcnt on maps used by eBPF program and free auxilary data */
static void free_used_maps(struct bpf_prog_aux *aux)
{
@@ -680,9 +629,6 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_used_maps;
- /* fixup BPF_CALL->imm field */
- fixup_bpf_calls(prog);
-
/* eBPF program is ready to be JITed */
err = bpf_prog_select_runtime(prog);
if (err < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index eb759f5008b8..014c2d759916 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -186,6 +186,13 @@ struct verifier_stack_elem {
struct verifier_stack_elem *next;
};
+struct bpf_insn_aux_data {
+ union {
+ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
+ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
+ };
+};
+
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
/* single container for all structs
@@ -200,6 +207,7 @@ struct verifier_env {
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
bool allow_ptr_leaks;
+ struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
};
/* verbose verifier prints what it's seeing
@@ -945,7 +953,7 @@ error:
return -EINVAL;
}
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct verifier_env *env, int func_id, int insn_idx)
{
struct verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
@@ -981,6 +989,13 @@ static int check_call(struct verifier_env *env, int func_id)
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
if (err)
return err;
+ if (func_id == BPF_FUNC_tail_call) {
+ if (map == NULL) {
+ verbose("verifier bug\n");
+ return -EINVAL;
+ }
+ env->insn_aux_data[insn_idx].map_ptr = map;
+ }
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
if (err)
return err;
@@ -1784,7 +1799,7 @@ static int do_check(struct verifier_env *env)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type src_reg_type;
+ enum bpf_reg_type *prev_src_type, src_reg_type;
/* check for reserved fields is already done */
@@ -1813,16 +1828,18 @@ static int do_check(struct verifier_env *env)
continue;
}
- if (insn->imm == 0) {
+ prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_src_type == NOT_INIT) {
/* saw a valid insn
* dst_reg = *(u32 *)(src_reg + off)
- * use reserved 'imm' field to mark this insn
+ * save type to validate intersecting paths
*/
- insn->imm = src_reg_type;
+ *prev_src_type = src_reg_type;
- } else if (src_reg_type != insn->imm &&
+ } else if (src_reg_type != *prev_src_type &&
(src_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_src_type == PTR_TO_CTX)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@@ -1835,7 +1852,7 @@ static int do_check(struct verifier_env *env)
}
} else if (class == BPF_STX) {
- enum bpf_reg_type dst_reg_type;
+ enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@@ -1863,11 +1880,13 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (insn->imm == 0) {
- insn->imm = dst_reg_type;
- } else if (dst_reg_type != insn->imm &&
+ prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_dst_type == NOT_INIT) {
+ *prev_dst_type = dst_reg_type;
+ } else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_dst_type == PTR_TO_CTX)) {
verbose("same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -1902,7 +1921,7 @@ static int do_check(struct verifier_env *env)
return -EINVAL;
}
- err = check_call(env, insn->imm);
+ err = check_call(env, insn->imm, insn_idx);
if (err)
return err;
@@ -2098,24 +2117,39 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
insn->src_reg = 0;
}
-static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static int adjust_insn_aux_data(struct verifier_env *env, u32 prog_len,
+ u32 off, u32 cnt)
{
- struct bpf_insn *insn = prog->insnsi;
- int insn_cnt = prog->len;
- int i;
+ struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
- for (i = 0; i < insn_cnt; i++, insn++) {
- if (BPF_CLASS(insn->code) != BPF_JMP ||
- BPF_OP(insn->code) == BPF_CALL ||
- BPF_OP(insn->code) == BPF_EXIT)
- continue;
+ if (cnt == 1)
+ return 0;
+ new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
+ if (!new_data)
+ return -ENOMEM;
+ memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+ memcpy(new_data + off + cnt - 1, old_data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ env->insn_aux_data = new_data;
+ vfree(old_data);
+ return 0;
+}
- /* adjust offset of jmps if necessary */
- if (i < pos && i + insn->off + 1 > pos)
- insn->off += delta;
- else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
- insn->off -= delta;
- }
+static struct bpf_prog *bpf_patch_insn_data(struct verifier_env *env, u32 off,
+ const struct bpf_insn *patch, u32 len)
+{
+ struct bpf_prog *new_prog;
+
+ new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+ if (!new_prog)
+ return NULL;
+ if (adjust_insn_aux_data(env, new_prog->len, off, len))
+ return NULL;
+ return new_prog;
}
/* convert load instructions that access fields of 'struct __sk_buff'
@@ -2124,17 +2158,18 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
static int convert_ctx_accesses(struct verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
+ const int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
- u32 cnt;
- int i;
enum bpf_access_type type;
+ int i, delta = 0;
if (!env->prog->aux->ops->convert_ctx_access)
return 0;
for (i = 0; i < insn_cnt; i++, insn++) {
+ u32 cnt;
+
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
type = BPF_READ;
else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
@@ -2142,11 +2177,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
else
continue;
- if (insn->imm != PTR_TO_CTX) {
- /* clear internal mark */
- insn->imm = 0;
+ if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
- }
cnt = env->prog->aux->ops->
convert_ctx_access(type, insn->dst_reg, insn->src_reg,
@@ -2156,34 +2188,89 @@ static int convert_ctx_accesses(struct verifier_env *env)
return -EINVAL;
}
- if (cnt == 1) {
- memcpy(insn, insn_buf, sizeof(*insn));
- continue;
- }
-
- /* several new insns need to be inserted. Make room for them */
- insn_cnt += cnt - 1;
- new_prog = bpf_prog_realloc(env->prog,
- bpf_prog_size(insn_cnt),
- GFP_USER);
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
- new_prog->len = insn_cnt;
+ delta += cnt - 1;
+
+ /* keep walking new program and skip insns we just inserted */
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
- memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
- sizeof(*insn) * (insn_cnt - i - cnt));
+ return 0;
+}
- /* copy substitute insns in place of load instruction */
- memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
+/* fixup insn->imm field of bpf_call instructions
+ *
+ * this function is called after eBPF program passed verification
+ */
+static int fixup_bpf_calls(struct verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_insn *insn = prog->insnsi;
+ const struct bpf_func_proto *fn;
+ const int insn_cnt = prog->len;
+ struct bpf_insn insn_buf[16];
+ struct bpf_prog *new_prog;
+ struct bpf_map *map_ptr;
+ int i, cnt, delta = 0;
- /* adjust branches in the whole program */
- adjust_branches(new_prog, i, cnt - 1);
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL))
+ continue;
- /* keep walking new program and skip insns we just inserted */
- env->prog = new_prog;
- insn = new_prog->insnsi + i + cnt - 1;
- i += cnt - 1;
+ if (insn->imm == BPF_FUNC_get_route_realm)
+ prog->dst_needed = 1;
+ if (insn->imm == BPF_FUNC_get_prandom_u32)
+ bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_tail_call) {
+ /* mark bpf_tail_call as different opcode to avoid
+ * conditional branch in the interpeter for every normal
+ * call and to prevent accidental JITing by JIT compiler
+ * that doesn't support bpf_tail_call yet
+ */
+ insn->imm = 0;
+ insn->code |= BPF_X;
+
+ /* instead of changing every JIT dealing with tail_call
+ * emit two extra insns:
+ * if (index >= max_entries) goto out;
+ * index &= array->index_mask;
+ * to avoid out-of-bounds cpu speculation
+ */
+ map_ptr = env->insn_aux_data[i + delta].map_ptr;
+ if (!map_ptr->unpriv_array)
+ continue;
+ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+ map_ptr->max_entries, 2);
+ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+ container_of(map_ptr,
+ struct bpf_array,
+ map)->index_mask);
+ insn_buf[2] = *insn;
+ cnt = 3;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
+ fn = prog->aux->ops->get_func_proto(insn->imm);
+ /* all functions that have prototype and verifier allowed
+ * programs to call them, must be real in-kernel functions
+ */
+ if (!fn->func) {
+ verbose("kernel subsystem misconfigured func %d\n",
+ insn->imm);
+ return -EFAULT;
+ }
+ insn->imm = fn->func - __bpf_call_base;
}
return 0;
@@ -2227,6 +2314,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (!env)
return -ENOMEM;
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ (*prog)->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
@@ -2245,12 +2337,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* log_* values have to be sane */
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
log_level == 0 || log_ubuf == NULL)
- goto free_env;
+ goto err_unlock;
ret = -ENOMEM;
log_buf = vmalloc(log_size);
if (!log_buf)
- goto free_env;
+ goto err_unlock;
} else {
log_level = 0;
}
@@ -2282,6 +2374,9 @@ skip_full_check:
/* program is valid, convert *(u32*)(ctx + off) accesses */
ret = convert_ctx_accesses(env);
+ if (ret == 0)
+ ret = fixup_bpf_calls(env);
+
if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */
@@ -2319,14 +2414,16 @@ skip_full_check:
free_log_buf:
if (log_level)
vfree(log_buf);
-free_env:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
*prog = env->prog;
- kfree(env);
+err_unlock:
mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
+ kfree(env);
return ret;
}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 0b891286a150..3990c1f73e45 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -357,7 +357,7 @@ poll_again:
}
kdb_printf("\n");
for (i = 0; i < count; i++) {
- if (kallsyms_symbol_next(p_tmp, i) < 0)
+ if (WARN_ON(!kallsyms_symbol_next(p_tmp, i)))
break;
kdb_printf("%s ", p_tmp);
*(p_tmp + len) = '\0';
diff --git a/kernel/exit.c b/kernel/exit.c
index 62c4bd4abd3a..4ceeac2e39f4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -53,6 +53,7 @@
#include <linux/oom.h>
#include <linux/writeback.h>
#include <linux/shm.h>
+#include <linux/kcov.h>
#include "sched/tune.h"
@@ -659,6 +660,7 @@ void do_exit(long code)
TASKS_RCU(int tasks_rcu_i);
profile_task_exit(tsk);
+ kcov_task_exit(tsk);
WARN_ON(blk_needs_flush_plug(tsk));
diff --git a/kernel/fork.c b/kernel/fork.c
index 2633d5f61d3c..a24b96015538 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/freezer.h>
+#include <linux/kaiser.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
#include <linux/random.h>
@@ -75,6 +76,7 @@
#include <linux/aio.h>
#include <linux/compiler.h>
#include <linux/sysctl.h>
+#include <linux/kcov.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -171,6 +173,7 @@ static inline void free_thread_stack(unsigned long *stack)
{
struct page *page = virt_to_page(stack);
+ kaiser_unmap_thread_stack(stack);
__free_kmem_pages(page, THREAD_SIZE_ORDER);
}
# else
@@ -354,6 +357,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
goto free_stack;
tsk->stack = stack;
+
+ err = kaiser_map_thread_stack(tsk->stack);
+ if (err)
+ goto free_stack;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
@@ -387,6 +394,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
account_kernel_stack(stack, 1);
+ kcov_task_init(tsk);
+
return tsk;
free_stack:
diff --git a/kernel/futex.c b/kernel/futex.c
index af29863f3349..d3e7120f1c33 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1939,8 +1939,12 @@ static int unqueue_me(struct futex_q *q)
/* In the common case we don't take the spinlock, which is nice. */
retry:
- lock_ptr = q->lock_ptr;
- barrier();
+ /*
+ * q->lock_ptr can change between this read and the following spin_lock.
+ * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+ * optimizing lock_ptr out of the logic below.
+ */
+ lock_ptr = READ_ONCE(q->lock_ptr);
if (lock_ptr != NULL) {
spin_lock(lock_ptr);
/*
diff --git a/kernel/groups.c b/kernel/groups.c
index 74d431d25251..5ea9847f172f 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -101,7 +101,7 @@ static int groups_from_user(struct group_info *group_info,
}
/* a simple Shell sort */
-static void groups_sort(struct group_info *group_info)
+void groups_sort(struct group_info *group_info)
{
int base, max, stride;
int gidsetsize = group_info->ngroups;
@@ -128,6 +128,7 @@ static void groups_sort(struct group_info *group_info)
stride /= 3;
}
}
+EXPORT_SYMBOL(groups_sort);
/* a simple bsearch */
int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -159,7 +160,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
void set_groups(struct cred *new, struct group_info *group_info)
{
put_group_info(new->group_info);
- groups_sort(group_info);
get_group_info(group_info);
new->group_info = group_info;
}
@@ -243,6 +243,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 453ec4232852..e863b2339174 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -553,7 +553,7 @@ static __init int jump_label_test(void)
return 0;
}
-late_initcall(jump_label_test);
+early_initcall(jump_label_test);
#endif /* STATIC_KEYS_SELFTEST */
#endif /* HAVE_JUMP_LABEL */
diff --git a/kernel/kcov.c b/kernel/kcov.c
new file mode 100644
index 000000000000..5813e9375a93
--- /dev/null
+++ b/kernel/kcov.c
@@ -0,0 +1,431 @@
+#define pr_fmt(fmt) "kcov: " fmt
+
+#define DISABLE_BRANCH_PROFILING
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/preempt.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/kcov.h>
+#include <asm/setup.h>
+
+/* Number of 64-bit words written per one comparison: */
+#define KCOV_WORDS_PER_CMP 4
+
+/*
+ * kcov descriptor (one per opened debugfs file).
+ * State transitions of the descriptor:
+ * - initial state after open()
+ * - then there must be a single ioctl(KCOV_INIT_TRACE) call
+ * - then, mmap() call (several calls are allowed but not useful)
+ * - then, ioctl(KCOV_ENABLE, arg), where arg is
+ * KCOV_TRACE_PC - to trace only the PCs
+ * or
+ * KCOV_TRACE_CMP - to trace only the comparison operands
+ * - then, ioctl(KCOV_DISABLE) to disable the task.
+ * Enabling/disabling ioctls can be repeated (only one task a time allowed).
+ */
+struct kcov {
+ /*
+ * Reference counter. We keep one for:
+ * - opened file descriptor
+ * - task with enabled coverage (we can't unwire it from another task)
+ */
+ atomic_t refcount;
+ /* The lock protects mode, size, area and t. */
+ spinlock_t lock;
+ enum kcov_mode mode;
+ /* Size of arena (in long's for KCOV_MODE_TRACE). */
+ unsigned size;
+ /* Coverage buffer shared with user space. */
+ void *area;
+ /* Task for which we collect coverage, or NULL. */
+ struct task_struct *t;
+};
+
+static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
+{
+ enum kcov_mode mode;
+
+ /*
+ * We are interested in code coverage as a function of a syscall inputs,
+ * so we ignore code executed in interrupts.
+ */
+ if (!in_task())
+ return false;
+ mode = READ_ONCE(t->kcov_mode);
+ /*
+ * There is some code that runs in interrupts but for which
+ * in_interrupt() returns false (e.g. preempt_schedule_irq()).
+ * READ_ONCE()/barrier() effectively provides load-acquire wrt
+ * interrupts, there are paired barrier()/WRITE_ONCE() in
+ * kcov_ioctl_locked().
+ */
+ barrier();
+ return mode == needed_mode;
+}
+
+static unsigned long canonicalize_ip(unsigned long ip)
+{
+#ifdef CONFIG_RANDOMIZE_BASE
+ ip -= kaslr_offset();
+#endif
+ return ip;
+}
+
+/*
+ * Entry point from instrumented code.
+ * This is called once per basic-block/edge.
+ */
+void notrace __sanitizer_cov_trace_pc(void)
+{
+ struct task_struct *t;
+ unsigned long *area;
+ unsigned long ip = canonicalize_ip(_RET_IP_);
+ unsigned long pos;
+
+ t = current;
+ if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t))
+ return;
+
+ area = t->kcov_area;
+ /* The first 64-bit word is the number of subsequent PCs. */
+ pos = READ_ONCE(area[0]) + 1;
+ if (likely(pos < t->kcov_size)) {
+ area[pos] = ip;
+ WRITE_ONCE(area[0], pos);
+ }
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
+
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
+{
+ struct task_struct *t;
+ u64 *area;
+ u64 count, start_index, end_pos, max_pos;
+
+ t = current;
+ if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t))
+ return;
+
+ ip = canonicalize_ip(ip);
+
+ /*
+ * We write all comparison arguments and types as u64.
+ * The buffer was allocated for t->kcov_size unsigned longs.
+ */
+ area = (u64 *)t->kcov_area;
+ max_pos = t->kcov_size * sizeof(unsigned long);
+
+ count = READ_ONCE(area[0]);
+
+ /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */
+ start_index = 1 + count * KCOV_WORDS_PER_CMP;
+ end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64);
+ if (likely(end_pos <= max_pos)) {
+ area[start_index] = type;
+ area[start_index + 1] = arg1;
+ area[start_index + 2] = arg2;
+ area[start_index + 3] = ip;
+ WRITE_ONCE(area[0], count + 1);
+ }
+}
+
+void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1);
+
+void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2);
+
+void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4);
+
+void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8);
+
+void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1);
+
+void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2);
+
+void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4);
+
+void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8);
+
+void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
+{
+ u64 i;
+ u64 count = cases[0];
+ u64 size = cases[1];
+ u64 type = KCOV_CMP_CONST;
+
+ switch (size) {
+ case 8:
+ type |= KCOV_CMP_SIZE(0);
+ break;
+ case 16:
+ type |= KCOV_CMP_SIZE(1);
+ break;
+ case 32:
+ type |= KCOV_CMP_SIZE(2);
+ break;
+ case 64:
+ type |= KCOV_CMP_SIZE(3);
+ break;
+ default:
+ return;
+ }
+ for (i = 0; i < count; i++)
+ write_comp_data(type, cases[i + 2], val, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
+#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
+
+static void kcov_get(struct kcov *kcov)
+{
+ atomic_inc(&kcov->refcount);
+}
+
+static void kcov_put(struct kcov *kcov)
+{
+ if (atomic_dec_and_test(&kcov->refcount)) {
+ vfree(kcov->area);
+ kfree(kcov);
+ }
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+ t->kcov_mode = KCOV_MODE_DISABLED;
+ t->kcov_size = 0;
+ t->kcov_area = NULL;
+ t->kcov = NULL;
+}
+
+void kcov_task_exit(struct task_struct *t)
+{
+ struct kcov *kcov;
+
+ kcov = t->kcov;
+ if (kcov == NULL)
+ return;
+ spin_lock(&kcov->lock);
+ if (WARN_ON(kcov->t != t)) {
+ spin_unlock(&kcov->lock);
+ return;
+ }
+ /* Just to not leave dangling references behind. */
+ kcov_task_init(t);
+ kcov->t = NULL;
+ kcov->mode = KCOV_MODE_INIT;
+ spin_unlock(&kcov->lock);
+ kcov_put(kcov);
+}
+
+static int kcov_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+ int res = 0;
+ void *area;
+ struct kcov *kcov = vma->vm_file->private_data;
+ unsigned long size, off;
+ struct page *page;
+
+ area = vmalloc_user(vma->vm_end - vma->vm_start);
+ if (!area)
+ return -ENOMEM;
+
+ spin_lock(&kcov->lock);
+ size = kcov->size * sizeof(unsigned long);
+ if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 ||
+ vma->vm_end - vma->vm_start != size) {
+ res = -EINVAL;
+ goto exit;
+ }
+ if (!kcov->area) {
+ kcov->area = area;
+ vma->vm_flags |= VM_DONTEXPAND;
+ spin_unlock(&kcov->lock);
+ for (off = 0; off < size; off += PAGE_SIZE) {
+ page = vmalloc_to_page(kcov->area + off);
+ if (vm_insert_page(vma, vma->vm_start + off, page))
+ WARN_ONCE(1, "vm_insert_page() failed");
+ }
+ return 0;
+ }
+exit:
+ spin_unlock(&kcov->lock);
+ vfree(area);
+ return res;
+}
+
+static int kcov_open(struct inode *inode, struct file *filep)
+{
+ struct kcov *kcov;
+
+ kcov = kzalloc(sizeof(*kcov), GFP_KERNEL);
+ if (!kcov)
+ return -ENOMEM;
+ kcov->mode = KCOV_MODE_DISABLED;
+ atomic_set(&kcov->refcount, 1);
+ spin_lock_init(&kcov->lock);
+ filep->private_data = kcov;
+ return nonseekable_open(inode, filep);
+}
+
+static int kcov_close(struct inode *inode, struct file *filep)
+{
+ kcov_put(filep->private_data);
+ return 0;
+}
+
+static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
+ unsigned long arg)
+{
+ struct task_struct *t;
+ unsigned long size, unused;
+
+ switch (cmd) {
+ case KCOV_INIT_TRACE:
+ /*
+ * Enable kcov in trace mode and setup buffer size.
+ * Must happen before anything else.
+ */
+ if (kcov->mode != KCOV_MODE_DISABLED)
+ return -EBUSY;
+ /*
+ * Size must be at least 2 to hold current position and one PC.
+ * Later we allocate size * sizeof(unsigned long) memory,
+ * that must not overflow.
+ */
+ size = arg;
+ if (size < 2 || size > INT_MAX / sizeof(unsigned long))
+ return -EINVAL;
+ kcov->size = size;
+ kcov->mode = KCOV_MODE_INIT;
+ return 0;
+ case KCOV_ENABLE:
+ /*
+ * Enable coverage for the current task.
+ * At this point user must have been enabled trace mode,
+ * and mmapped the file. Coverage collection is disabled only
+ * at task exit or voluntary by KCOV_DISABLE. After that it can
+ * be enabled for another task.
+ */
+ if (kcov->mode != KCOV_MODE_INIT || !kcov->area)
+ return -EINVAL;
+ if (kcov->t != NULL)
+ return -EBUSY;
+ if (arg == KCOV_TRACE_PC)
+ kcov->mode = KCOV_MODE_TRACE_PC;
+ else if (arg == KCOV_TRACE_CMP)
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+ kcov->mode = KCOV_MODE_TRACE_CMP;
+#else
+ return -ENOTSUPP;
+#endif
+ else
+ return -EINVAL;
+ t = current;
+ /* Cache in task struct for performance. */
+ t->kcov_size = kcov->size;
+ t->kcov_area = kcov->area;
+ /* See comment in check_kcov_mode(). */
+ barrier();
+ WRITE_ONCE(t->kcov_mode, kcov->mode);
+ t->kcov = kcov;
+ kcov->t = t;
+ /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+ kcov_get(kcov);
+ return 0;
+ case KCOV_DISABLE:
+ /* Disable coverage for the current task. */
+ unused = arg;
+ if (unused != 0 || current->kcov != kcov)
+ return -EINVAL;
+ t = current;
+ if (WARN_ON(kcov->t != t))
+ return -EINVAL;
+ kcov_task_init(t);
+ kcov->t = NULL;
+ kcov->mode = KCOV_MODE_INIT;
+ kcov_put(kcov);
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
+
+static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ struct kcov *kcov;
+ int res;
+
+ kcov = filep->private_data;
+ spin_lock(&kcov->lock);
+ res = kcov_ioctl_locked(kcov, cmd, arg);
+ spin_unlock(&kcov->lock);
+ return res;
+}
+
+static const struct file_operations kcov_fops = {
+ .open = kcov_open,
+ .unlocked_ioctl = kcov_ioctl,
+ .compat_ioctl = kcov_ioctl,
+ .mmap = kcov_mmap,
+ .release = kcov_close,
+};
+
+static int __init kcov_init(void)
+{
+ if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {
+ pr_err("failed to create kcov in debugfs\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+device_initcall(kcov_init);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8e96f6cc2a4a..31322a4275cd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,3 +1,6 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index f42f83a36506..a70b90db3909 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -719,6 +719,7 @@ static inline void
__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
{
unsigned long flags;
+ WAKE_Q(wake_q);
/*
* As a performance measurement, release the lock before doing other
@@ -746,11 +747,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
-
- wake_up_process(waiter->task);
+ wake_q_add(&wake_q, waiter->task);
}
spin_unlock_mutex(&lock->wait_lock, flags);
+ wake_up_q(&wake_q);
}
/*
diff --git a/kernel/module.c b/kernel/module.c
index 63c54d644bd4..9d12218ddc47 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2870,8 +2870,12 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
return -ENOEXEC;
}
- if (!get_modinfo(info, "intree"))
+ if (!get_modinfo(info, "intree")) {
+ if (!test_taint(TAINT_OOT_MODULE))
+ pr_warn("%s: loading out-of-tree module taints kernel.\n",
+ mod->name);
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
+ }
if (get_modinfo(info, "staging")) {
add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
@@ -3036,6 +3040,8 @@ static int move_module(struct module *mod, struct load_info *info)
static int check_module_license_and_versions(struct module *mod)
{
+ int prev_taint = test_taint(TAINT_PROPRIETARY_MODULE);
+
/*
* ndiswrapper is under GPL by itself, but loads proprietary modules.
* Don't use add_taint_module(), as it would prevent ndiswrapper from
@@ -3054,6 +3060,9 @@ static int check_module_license_and_versions(struct module *mod)
add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
LOCKDEP_NOW_UNRELIABLE);
+ if (!prev_taint && test_taint(TAINT_PROPRIETARY_MODULE))
+ pr_warn("%s: module license taints kernel.\n", mod->name);
+
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !mod->crcs)
|| (mod->num_gpl_syms && !mod->gpl_crcs)
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 61a16569ffbf..032b2c015beb 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,3 +1,7 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
+
obj-y += update.o sync.o
obj-$(CONFIG_SRCU) += srcu.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index d7ec4f7dd0d9..99378130a42f 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -2,6 +2,10 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
endif
+# These files are disabled because they produce non-interesting flaky coverage
+# that is not a function of syscall inputs. E.g. involuntary context switches.
+KCOV_INSTRUMENT := n
+
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d3e77586657a..364ed6d775db 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -33,7 +33,7 @@
#include <linux/init.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
-#include <asm/mmu_context.h>
+#include <linux/mmu_context.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/completion.h>
@@ -2782,7 +2782,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
- switch_mm(oldmm, mm, next);
+ switch_mm_irqs_off(oldmm, mm, next);
if (!prev->mm) {
prev->active_mm = NULL;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d3765f0cb699..6c84b4d28914 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -130,8 +130,11 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
{
struct cpufreq_policy *policy = sg_policy->policy;
- if (sugov_up_down_rate_limit(sg_policy, time, next_freq))
+ if (sugov_up_down_rate_limit(sg_policy, time, next_freq)) {
+ /* Reset cached freq as next_freq isn't changed */
+ sg_policy->cached_raw_freq = 0;
return;
+ }
if (sg_policy->next_freq == next_freq)
return;
@@ -317,8 +320,12 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
- if (busy && next_f < sg_policy->next_freq)
+ if (busy && next_f < sg_policy->next_freq) {
next_f = sg_policy->next_freq;
+
+ /* Reset cached freq as next_freq has changed */
+ sg_policy->cached_raw_freq = 0;
+ }
}
sugov_update_commit(sg_policy, time, next_f);
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index cdf2a0b97611..0d0c8e954861 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -461,13 +461,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
*
* This function returns true if:
*
- * runtime / (deadline - t) > dl_runtime / dl_period ,
+ * runtime / (deadline - t) > dl_runtime / dl_deadline ,
*
* IOW we can't recycle current parameters.
*
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
* task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
*/
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, u64 t)
@@ -492,7 +492,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
* of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;).
*/
- left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+ left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) *
(pi_se->dl_runtime >> DL_SCALE);
@@ -533,10 +533,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
}
}
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+ return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
/*
* If the entity depleted all its runtime, and if we want it to sleep
* while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
* and try to activate it.
*
* Notice that it is important for the caller to know if the timer
@@ -558,7 +563,7 @@ static int start_dl_timer(struct task_struct *p)
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
- act = ns_to_ktime(dl_se->deadline);
+ act = ns_to_ktime(dl_next_period(dl_se));
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
@@ -722,6 +727,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
timer->function = dl_task_timer;
}
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+ struct task_struct *p = dl_task_of(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+ if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+ dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+ if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+ return;
+ dl_se->dl_throttled = 1;
+ }
+}
+
static
int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
{
@@ -979,6 +1015,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
__dequeue_dl_entity(dl_se);
}
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+ return dl_se->dl_deadline < dl_se->dl_period;
+}
+
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -1005,6 +1046,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
}
/*
+ * Check if a constrained deadline task was activated
+ * after the deadline but before the next period.
+ * If that is the case, the task will be throttled and
+ * the replenishment timer will be set to the next period.
+ */
+ if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+ dl_check_constrained_dl(&p->dl);
+
+ /*
* If p is throttled, we do nothing. In fact, if it exhausted
* its budget it needs a replenishment and, since it now is on
* its rq, the bandwidth timer callback (which clearly has not
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 98aaa95e5425..03303600ca2f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6662,7 +6662,8 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
#ifdef CONFIG_SCHED_WALT
- if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util &&
+ p->state == TASK_WAKING)
delta = task_util(p);
#endif
/* Not enough spare capacity on previous cpu */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0af072f64e52..0f21b12332df 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2025,8 +2025,9 @@ static void pull_rt_task(struct rq *this_rq)
bool resched = false;
struct task_struct *p;
struct rq *src_rq;
+ int rt_overload_count = rt_overloaded(this_rq);
- if (likely(!rt_overloaded(this_rq)))
+ if (likely(!rt_overload_count))
return;
/*
@@ -2035,6 +2036,11 @@ static void pull_rt_task(struct rq *this_rq)
*/
smp_rmb();
+ /* If we are the only overloaded CPU do nothing */
+ if (rt_overload_count == 1 &&
+ cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
+ return;
+
#ifdef HAVE_RT_PUSH_IPI
if (sched_feat(RT_PUSH_IPI)) {
tell_cpu_to_push(this_rq);
diff --git a/kernel/signal.c b/kernel/signal.c
index 4671da7e2222..4f8855412c6e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -72,7 +72,7 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force)
handler = sig_handler(t, sig);
if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
- handler == SIG_DFL && !force)
+ handler == SIG_DFL && !(force && sig_kernel_only(sig)))
return 1;
return sig_handler_ignored(handler, sig);
@@ -88,13 +88,15 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
return 0;
- if (!sig_task_ignored(t, sig, force))
- return 0;
-
/*
- * Tracers may want to know about even ignored signals.
+ * Tracers may want to know about even ignored signal unless it
+ * is SIGKILL which can't be reported anyway but can be ignored
+ * by SIGNAL_UNKILLABLE task.
*/
- return !t->ptrace;
+ if (t->ptrace && sig != SIGKILL)
+ return 0;
+
+ return sig_task_ignored(t, sig, force);
}
/*
@@ -917,9 +919,9 @@ static void complete_signal(int sig, struct task_struct *p, int group)
* then start taking the whole group down immediately.
*/
if (sig_fatal(p, sig) &&
- !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
+ !(signal->flags & SIGNAL_GROUP_EXIT) &&
!sigismember(&t->real_blocked, sig) &&
- (sig == SIGKILL || !t->ptrace)) {
+ (sig == SIGKILL || !p->ptrace)) {
/*
* This signal will be fatal to the whole group.
*/
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 479e4436f787..359be4f39986 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -227,7 +227,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
-asmlinkage __visible void __do_softirq(void)
+asmlinkage __visible void __softirq_entry __do_softirq(void)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 34f9a9c417d9..a935cbdc55a4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
+static inline bool local_timer_softirq_pending(void)
+{
+ return local_softirq_pending() & TIMER_SOFTIRQ;
+}
+
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
@@ -584,8 +589,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
} while (read_seqretry(&jiffies_lock, seq));
ts->last_jiffies = basejiff;
- if (rcu_needs_cpu(basemono, &next_rcu) ||
- arch_needs_cpu() || irq_work_needs_cpu()) {
+ /*
+ * Keep the periodic tick, when RCU, architecture or irq_work
+ * requests it.
+ * Aside of that check whether the local timer softirq is
+ * pending. If so its a bad idea to call get_next_timer_interrupt()
+ * because there is an already expired timer, so it will request
+ * immeditate expiry, which rearms the hardware timer with a
+ * minimal delta which brings us back to this place
+ * immediately. Lather, rinse and repeat...
+ */
+ if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+ irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5a514e6002d2..7902ecbce8ec 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -70,6 +70,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
+ while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
+ tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ tk->raw_sec++;
+ }
}
static inline struct timespec64 tk_xtime(struct timekeeper *tk)
@@ -282,12 +286,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
/* if changing clocks, convert xtime_nsec shift units */
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
- if (shift_change < 0)
+ if (shift_change < 0) {
tk->tkr_mono.xtime_nsec >>= -shift_change;
- else
+ tk->tkr_raw.xtime_nsec >>= -shift_change;
+ } else {
tk->tkr_mono.xtime_nsec <<= shift_change;
+ tk->tkr_raw.xtime_nsec <<= shift_change;
+ }
}
- tk->tkr_raw.xtime_nsec = 0;
tk->tkr_mono.shift = clock->shift;
tk->tkr_raw.shift = clock->shift;
@@ -616,9 +622,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
- /* Update the monotonic raw base */
- tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
-
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
@@ -628,6 +631,9 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
+
+ /* Update the monotonic raw base */
+ tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
/* must hold timekeeper_lock */
@@ -669,7 +675,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
static void timekeeping_forward_now(struct timekeeper *tk)
{
cycle_t cycle_now, delta;
- s64 nsec;
cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
@@ -681,10 +686,13 @@ static void timekeeping_forward_now(struct timekeeper *tk)
/* If arch requires, add in get_arch_timeoffset() */
tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
- tk_normalize_xtime(tk);
- nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
- timespec64_add_ns(&tk->raw_time, nsec);
+ tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift;
+
+ tk_normalize_xtime(tk);
}
/**
@@ -1178,19 +1186,18 @@ int timekeeping_notify(struct clocksource *clock)
void getrawmonotonic64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- struct timespec64 ts64;
unsigned long seq;
s64 nsecs;
do {
seq = read_seqcount_begin(&tk_core.seq);
+ ts->tv_sec = tk->raw_sec;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
- ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
- timespec64_add_ns(&ts64, nsecs);
- *ts = ts64;
+ ts->tv_nsec = 0;
+ timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(getrawmonotonic64);
@@ -1314,8 +1321,7 @@ void __init timekeeping_init(void)
tk_setup_internals(tk, clock);
tk_set_xtime(tk, &now);
- tk->raw_time.tv_sec = 0;
- tk->raw_time.tv_nsec = 0;
+ tk->raw_sec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
@@ -1810,15 +1816,12 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
- tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
tk->tkr_raw.xtime_nsec -= snsec_per_sec;
- tk->raw_time.tv_sec++;
+ tk->raw_sec++;
}
- tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
- tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1275175b0946..d9cd6191760b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
/* Missed count stored at end */
#define RB_MISSED_STORED (1 << 30)
+#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
+
struct buffer_data_page {
u64 time_stamp; /* page time stamp */
local_t commit; /* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
*/
size_t ring_buffer_page_len(void *page)
{
- return local_read(&((struct buffer_data_page *)page)->commit)
+ struct buffer_data_page *bpage = page;
+
+ return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
+ BUF_PAGE_HDR_SIZE;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 23de746b2195..c180e4dc0fb9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3430,37 +3430,30 @@ static const struct file_operations show_traces_fops = {
.llseek = seq_lseek,
};
-/*
- * The tracer itself will not take this lock, but still we want
- * to provide a consistent cpumask to user-space:
- */
-static DEFINE_MUTEX(tracing_cpumask_update_lock);
-
-/*
- * Temporary storage for the character representation of the
- * CPU bitmask (and one more byte for the newline):
- */
-static char mask_str[NR_CPUS + 1];
-
static ssize_t
tracing_cpumask_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct trace_array *tr = file_inode(filp)->i_private;
+ char *mask_str;
int len;
- mutex_lock(&tracing_cpumask_update_lock);
+ len = snprintf(NULL, 0, "%*pb\n",
+ cpumask_pr_args(tr->tracing_cpumask)) + 1;
+ mask_str = kmalloc(len, GFP_KERNEL);
+ if (!mask_str)
+ return -ENOMEM;
- len = snprintf(mask_str, count, "%*pb\n",
+ len = snprintf(mask_str, len, "%*pb\n",
cpumask_pr_args(tr->tracing_cpumask));
if (len >= count) {
count = -EINVAL;
goto out_err;
}
- count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
+ count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
out_err:
- mutex_unlock(&tracing_cpumask_update_lock);
+ kfree(mask_str);
return count;
}
@@ -3480,8 +3473,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
if (err)
goto err_unlock;
- mutex_lock(&tracing_cpumask_update_lock);
-
local_irq_disable();
arch_spin_lock(&tr->max_lock);
for_each_tracing_cpu(cpu) {
@@ -3504,8 +3495,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
local_irq_enable();
cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
-
- mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask_new);
return count;
@@ -5855,7 +5844,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
- int entries, size, i;
+ int entries, i;
ssize_t ret = 0;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5906,14 +5895,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
break;
}
- /*
- * zero out any left over data, this is going to
- * user land.
- */
- size = ring_buffer_page_len(ref->page);
- if (size < PAGE_SIZE)
- memset(ref->page + size, 0, PAGE_SIZE - size);
-
page = virt_to_page(ref->page);
spd.pages[i] = page;
@@ -6640,6 +6621,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
buf->data = alloc_percpu(struct trace_array_cpu);
if (!buf->data) {
ring_buffer_free(buf->buffer);
+ buf->buffer = NULL;
return -ENOMEM;
}
@@ -6663,7 +6645,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
allocate_snapshot ? size : 1);
if (WARN_ON(ret)) {
ring_buffer_free(tr->trace_buffer.buffer);
+ tr->trace_buffer.buffer = NULL;
free_percpu(tr->trace_buffer.data);
+ tr->trace_buffer.data = NULL;
return -ENOMEM;
}
tr->allocated_snapshot = allocate_snapshot;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 96c75b0e9831..a804ee1b3ec6 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -8,6 +8,7 @@
*/
#include <linux/uaccess.h>
#include <linux/ftrace.h>
+#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/fs.h>
diff --git a/kernel/uid16.c b/kernel/uid16.c
index d58cc4d8f0d1..651aaa5221ec 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -190,6 +190,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 95cc76785a12..85555eb4d3cb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1479,6 +1479,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
+ WARN_ON_ONCE(!wq);
WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
timer->data != (unsigned long)dwork);
WARN_ON_ONCE(timer_pending(timer));
diff --git a/lib/Kconfig b/lib/Kconfig
index 1a48744253d7..48635688046f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -531,4 +531,8 @@ config ARCH_HAS_PMEM_API
config ARCH_HAS_MMIO_FLUSH
bool
+config STACKDEPOT
+ bool
+ select STACKTRACE
+
endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c879d72bf9f7..6899c7cd7898 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -676,6 +676,27 @@ source "lib/Kconfig.kasan"
endmenu # "Memory Debugging"
+config ARCH_HAS_KCOV
+ bool
+ help
+ KCOV does not have any arch-specific code, but currently it is enabled
+ only for x86_64. KCOV requires testing on other archs, and most likely
+ disabling of instrumentation for some early boot code.
+
+config KCOV
+ bool "Code coverage for fuzzing"
+ depends on ARCH_HAS_KCOV
+ select DEBUG_FS
+ help
+ KCOV exposes kernel code coverage information in a form suitable
+ for coverage-guided fuzzing (randomized testing).
+
+ If RANDOMIZE_BASE is enabled, PC values will not be stable across
+ different machines and across reboots. If you need stable PC values,
+ disable RANDOMIZE_BASE.
+
+ For more details, see Documentation/kcov.txt.
+
config DEBUG_SHIRQ
bool "Debug shared IRQ handlers"
depends on DEBUG_KERNEL
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0fee5acd5aa0..bd38aab05929 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,8 +5,9 @@ if HAVE_ARCH_KASAN
config KASAN
bool "KASan: runtime memory debugger"
- depends on SLUB_DEBUG
+ depends on SLUB || (SLAB && !DEBUG_SLAB)
select CONSTRUCTORS
+ select STACKDEPOT
help
Enables kernel address sanitizer - runtime memory debugger,
designed to find out-of-bounds accesses and use-after-free bugs.
@@ -16,6 +17,8 @@ config KASAN
This feature consumes about 1/8 of available memory and brings about
~x3 performance slowdown.
For better error detection enable CONFIG_STACKTRACE.
+ Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
+ (the resulting kernel does not boot).
choice
prompt "Instrumentation type"
diff --git a/lib/Makefile b/lib/Makefile
index cb4f6aa95013..b9ad86add71d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -7,6 +7,18 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
endif
+# These files are disabled because they produce lots of non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. For example,
+# rbtree can be global and individual rotations don't correlate with inputs.
+KCOV_INSTRUMENT_string.o := n
+KCOV_INSTRUMENT_rbtree.o := n
+KCOV_INSTRUMENT_list_debug.o := n
+KCOV_INSTRUMENT_debugobjects.o := n
+KCOV_INSTRUMENT_dynamic_debug.o := n
+# Kernel does not boot if we instrument this file as it uses custom calling
+# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
+KCOV_INSTRUMENT_hweight.o := n
+
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
idr.o int_sqrt.o extable.o \
@@ -163,6 +175,9 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+obj-$(CONFIG_STACKDEPOT) += stackdepot.o
+KASAN_SANITIZE_stackdepot.o := n
+
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
fdt_empty_tree.o
$(foreach file, $(libfdt_files), \
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 4fa2e54b3f59..76d110301251 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -312,42 +312,47 @@ next_op:
/* Decide how to handle the operation */
switch (op) {
- case ASN1_OP_MATCH_ANY_ACT:
- case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
- case ASN1_OP_COND_MATCH_ANY_ACT:
- case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
- ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len);
- if (ret < 0)
- return ret;
- goto skip_data;
-
- case ASN1_OP_MATCH_ACT:
- case ASN1_OP_MATCH_ACT_OR_SKIP:
- case ASN1_OP_COND_MATCH_ACT_OR_SKIP:
- ret = actions[machine[pc + 2]](context, hdr, tag, data + dp, len);
- if (ret < 0)
- return ret;
- goto skip_data;
-
case ASN1_OP_MATCH:
case ASN1_OP_MATCH_OR_SKIP:
+ case ASN1_OP_MATCH_ACT:
+ case ASN1_OP_MATCH_ACT_OR_SKIP:
case ASN1_OP_MATCH_ANY:
case ASN1_OP_MATCH_ANY_OR_SKIP:
+ case ASN1_OP_MATCH_ANY_ACT:
+ case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
case ASN1_OP_COND_MATCH_OR_SKIP:
+ case ASN1_OP_COND_MATCH_ACT_OR_SKIP:
case ASN1_OP_COND_MATCH_ANY:
case ASN1_OP_COND_MATCH_ANY_OR_SKIP:
- skip_data:
+ case ASN1_OP_COND_MATCH_ANY_ACT:
+ case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
+
if (!(flags & FLAG_CONS)) {
if (flags & FLAG_INDEFINITE_LENGTH) {
+ size_t tmp = dp;
+
ret = asn1_find_indefinite_length(
- data, datalen, &dp, &len, &errmsg);
+ data, datalen, &tmp, &len, &errmsg);
if (ret < 0)
goto error;
- } else {
- dp += len;
}
pr_debug("- LEAF: %zu\n", len);
}
+
+ if (op & ASN1_OP_MATCH__ACT) {
+ unsigned char act;
+
+ if (op & ASN1_OP_MATCH__ANY)
+ act = machine[pc + 1];
+ else
+ act = machine[pc + 2];
+ ret = actions[act](context, hdr, tag, data + dp, len);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!(flags & FLAG_CONS))
+ dp += len;
pc += asn1_op_lengths[op];
goto next_op;
@@ -433,6 +438,8 @@ next_op:
else
act = machine[pc + 1];
ret = actions[act](context, hdr, 0, data + tdp, len);
+ if (ret < 0)
+ return ret;
}
pc += asn1_op_lengths[op];
goto next_op;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e3952e9c8ec0..c6368ae93fe6 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -353,6 +353,10 @@ static int ddebug_parse_query(char *words[], int nwords,
if (parse_lineno(last, &query->last_lineno) < 0)
return -EINVAL;
+ /* special case for last lineno not specified */
+ if (query->last_lineno == 0)
+ query->last_lineno = UINT_MAX;
+
if (query->last_lineno < query->first_lineno) {
pr_err("last-line:%d < 1st-line:%d\n",
query->last_lineno,
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 27aa9c629d13..e4303fb2a7b2 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
chunk->phys_addr = phys;
chunk->start_addr = virt;
chunk->end_addr = virt + size - 1;
- atomic_set(&chunk->avail, size);
+ atomic_long_set(&chunk->avail, size);
spin_lock(&pool->lock);
list_add_rcu(&chunk->next_chunk, &pool->chunks);
@@ -285,7 +285,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
nbits = (size + (1UL << order) - 1) >> order;
rcu_read_lock();
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
- if (size > atomic_read(&chunk->avail))
+ if (size > atomic_long_read(&chunk->avail))
continue;
start_bit = 0;
@@ -305,7 +305,7 @@ retry:
addr = chunk->start_addr + ((unsigned long)start_bit << order);
size = nbits << order;
- atomic_sub(size, &chunk->avail);
+ atomic_long_sub(size, &chunk->avail);
break;
}
rcu_read_unlock();
@@ -371,7 +371,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
BUG_ON(remain);
size = nbits << order;
- atomic_add(size, &chunk->avail);
+ atomic_long_add(size, &chunk->avail);
rcu_read_unlock();
return;
}
@@ -445,7 +445,7 @@ size_t gen_pool_avail(struct gen_pool *pool)
rcu_read_lock();
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
- avail += atomic_read(&chunk->avail);
+ avail += atomic_long_read(&chunk->avail);
rcu_read_unlock();
return avail;
}
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
new file mode 100644
index 000000000000..f87d138e9672
--- /dev/null
+++ b/lib/stackdepot.c
@@ -0,0 +1,287 @@
+/*
+ * Generic stack depot for storing stack traces.
+ *
+ * Some debugging tools need to save stack traces of certain events which can
+ * be later presented to the user. For example, KASAN needs to safe alloc and
+ * free stacks for each object, but storing two stack traces per object
+ * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
+ * that).
+ *
+ * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
+ * and free stacks repeat a lot, we save about 100x space.
+ * Stacks are never removed from depot, so we store them contiguously one after
+ * another in a contiguos memory allocation.
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
+
+#define STACK_ALLOC_NULL_PROTECTION_BITS 1
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
+ STACK_ALLOC_ALIGN)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
+ STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
+#define STACK_ALLOC_SLABS_CAP 8192
+#define STACK_ALLOC_MAX_SLABS \
+ (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
+ (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
+
+/* The compact structure to store the reference to stacks. */
+union handle_parts {
+ depot_stack_handle_t handle;
+ struct {
+ u32 slabindex : STACK_ALLOC_INDEX_BITS;
+ u32 offset : STACK_ALLOC_OFFSET_BITS;
+ u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS;
+ };
+};
+
+struct stack_record {
+ struct stack_record *next; /* Link in the hashtable */
+ u32 hash; /* Hash in the hastable */
+ u32 size; /* Number of frames in the stack */
+ union handle_parts handle;
+ unsigned long entries[1]; /* Variable-sized array of entries. */
+};
+
+static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
+
+static int depot_index;
+static int next_slab_inited;
+static size_t depot_offset;
+static DEFINE_SPINLOCK(depot_lock);
+
+static bool init_stack_slab(void **prealloc)
+{
+ if (!*prealloc)
+ return false;
+ /*
+ * This smp_load_acquire() pairs with smp_store_release() to
+ * |next_slab_inited| below and in depot_alloc_stack().
+ */
+ if (smp_load_acquire(&next_slab_inited))
+ return true;
+ if (stack_slabs[depot_index] == NULL) {
+ stack_slabs[depot_index] = *prealloc;
+ } else {
+ stack_slabs[depot_index + 1] = *prealloc;
+ /*
+ * This smp_store_release pairs with smp_load_acquire() from
+ * |next_slab_inited| above and in depot_save_stack().
+ */
+ smp_store_release(&next_slab_inited, 1);
+ }
+ *prealloc = NULL;
+ return true;
+}
+
+/* Allocation of a new stack in raw storage */
+static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
+ u32 hash, void **prealloc, gfp_t alloc_flags)
+{
+ int required_size = offsetof(struct stack_record, entries) +
+ sizeof(unsigned long) * size;
+ struct stack_record *stack;
+
+ required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
+
+ if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
+ if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
+ WARN_ONCE(1, "Stack depot reached limit capacity");
+ return NULL;
+ }
+ depot_index++;
+ depot_offset = 0;
+ /*
+ * smp_store_release() here pairs with smp_load_acquire() from
+ * |next_slab_inited| in depot_save_stack() and
+ * init_stack_slab().
+ */
+ if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
+ smp_store_release(&next_slab_inited, 0);
+ }
+ init_stack_slab(prealloc);
+ if (stack_slabs[depot_index] == NULL)
+ return NULL;
+
+ stack = stack_slabs[depot_index] + depot_offset;
+
+ stack->hash = hash;
+ stack->size = size;
+ stack->handle.slabindex = depot_index;
+ stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
+ stack->handle.valid = 1;
+ memcpy(stack->entries, entries, size * sizeof(unsigned long));
+ depot_offset += required_size;
+
+ return stack;
+}
+
+#define STACK_HASH_ORDER 20
+#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
+#define STACK_HASH_SEED 0x9747b28c
+
+static struct stack_record *stack_table[STACK_HASH_SIZE] = {
+ [0 ... STACK_HASH_SIZE - 1] = NULL
+};
+
+/* Calculate hash for a stack */
+static inline u32 hash_stack(unsigned long *entries, unsigned int size)
+{
+ return jhash2((u32 *)entries,
+ size * sizeof(unsigned long) / sizeof(u32),
+ STACK_HASH_SEED);
+}
+
+/* Find a stack that is equal to the one stored in entries in the hash */
+static inline struct stack_record *find_stack(struct stack_record *bucket,
+ unsigned long *entries, int size,
+ u32 hash)
+{
+ struct stack_record *found;
+
+ for (found = bucket; found; found = found->next) {
+ if (found->hash == hash &&
+ found->size == size &&
+ !memcmp(entries, found->entries,
+ size * sizeof(unsigned long))) {
+ return found;
+ }
+ }
+ return NULL;
+}
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
+{
+ union handle_parts parts = { .handle = handle };
+ void *slab = stack_slabs[parts.slabindex];
+ size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+ struct stack_record *stack = slab + offset;
+
+ trace->nr_entries = trace->max_entries = stack->size;
+ trace->entries = stack->entries;
+ trace->skip = 0;
+}
+EXPORT_SYMBOL_GPL(depot_fetch_stack);
+
+/**
+ * depot_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ *
+ * Returns the handle of the stack struct stored in depot.
+ */
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
+ gfp_t alloc_flags)
+{
+ u32 hash;
+ depot_stack_handle_t retval = 0;
+ struct stack_record *found = NULL, **bucket;
+ unsigned long flags;
+ struct page *page = NULL;
+ void *prealloc = NULL;
+
+ if (unlikely(trace->nr_entries == 0))
+ goto fast_exit;
+
+ hash = hash_stack(trace->entries, trace->nr_entries);
+ bucket = &stack_table[hash & STACK_HASH_MASK];
+
+ /*
+ * Fast path: look the stack trace up without locking.
+ * The smp_load_acquire() here pairs with smp_store_release() to
+ * |bucket| below.
+ */
+ found = find_stack(smp_load_acquire(bucket), trace->entries,
+ trace->nr_entries, hash);
+ if (found)
+ goto exit;
+
+ /*
+ * Check if the current or the next stack slab need to be initialized.
+ * If so, allocate the memory - we won't be able to do that under the
+ * lock.
+ *
+ * The smp_load_acquire() here pairs with smp_store_release() to
+ * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
+ */
+ if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+ /*
+ * Zero out zone modifiers, as we don't have specific zone
+ * requirements. Keep the flags related to allocation in atomic
+ * contexts and I/O.
+ */
+ alloc_flags &= ~GFP_ZONEMASK;
+ alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+ alloc_flags |= __GFP_NOWARN;
+ page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
+ if (page)
+ prealloc = page_address(page);
+ }
+
+ spin_lock_irqsave(&depot_lock, flags);
+
+ found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+ if (!found) {
+ struct stack_record *new =
+ depot_alloc_stack(trace->entries, trace->nr_entries,
+ hash, &prealloc, alloc_flags);
+ if (new) {
+ new->next = *bucket;
+ /*
+ * This smp_store_release() pairs with
+ * smp_load_acquire() from |bucket| above.
+ */
+ smp_store_release(bucket, new);
+ found = new;
+ }
+ } else if (prealloc) {
+ /*
+ * We didn't need to store this stack trace, but let's keep
+ * the preallocated memory for the future.
+ */
+ WARN_ON(!init_stack_slab(&prealloc));
+ }
+
+ spin_unlock_irqrestore(&depot_lock, flags);
+exit:
+ if (prealloc) {
+ /* Nobody used this memory, ok to free it. */
+ free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
+ }
+ if (found)
+ retval = found->handle.handle;
+fast_exit:
+ return retval;
+}
+EXPORT_SYMBOL_GPL(depot_save_stack);
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index c32f3b0048dc..0e70ecc12fe2 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -12,10 +12,19 @@
#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/uaccess.h>
#include <linux/module.h>
+#include <linux/kasan.h>
+
+/*
+ * Note: test functions are marked noinline so that their names appear in
+ * reports.
+ */
static noinline void __init kmalloc_oob_right(void)
{
@@ -65,11 +74,34 @@ static noinline void __init kmalloc_node_oob_right(void)
kfree(ptr);
}
-static noinline void __init kmalloc_large_oob_right(void)
+#ifdef CONFIG_SLUB
+static noinline void __init kmalloc_pagealloc_oob_right(void)
{
char *ptr;
size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+ /* Allocate a chunk that does not fit into a SLUB cache to trigger
+ * the page allocator fallback.
+ */
+ pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
+ ptr = kmalloc(size, GFP_KERNEL);
+ if (!ptr) {
+ pr_err("Allocation failed\n");
+ return;
+ }
+
+ ptr[size] = 0;
+ kfree(ptr);
+}
+#endif
+
+static noinline void __init kmalloc_large_oob_right(void)
+{
+ char *ptr;
+ size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
+ /* Allocate a chunk that is large enough, but still fits into a slab
+ * and does not trigger the page allocator fallback in SLUB.
+ */
pr_info("kmalloc large allocation: out-of-bounds to right\n");
ptr = kmalloc(size, GFP_KERNEL);
if (!ptr) {
@@ -271,6 +303,8 @@ static noinline void __init kmalloc_uaf2(void)
}
ptr1[40] = 'x';
+ if (ptr1 == ptr2)
+ pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
kfree(ptr2);
}
@@ -319,11 +353,107 @@ static noinline void __init kasan_stack_oob(void)
*(volatile char *)p;
}
+static noinline void __init ksize_unpoisons_memory(void)
+{
+ char *ptr;
+ size_t size = 123, real_size = size;
+
+ pr_info("ksize() unpoisons the whole allocated chunk\n");
+ ptr = kmalloc(size, GFP_KERNEL);
+ if (!ptr) {
+ pr_err("Allocation failed\n");
+ return;
+ }
+ real_size = ksize(ptr);
+ /* This access doesn't trigger an error. */
+ ptr[size] = 'x';
+ /* This one does. */
+ ptr[real_size] = 'y';
+ kfree(ptr);
+}
+
+static noinline void __init copy_user_test(void)
+{
+ char *kmem;
+ char __user *usermem;
+ size_t size = 10;
+ int unused;
+
+ kmem = kmalloc(size, GFP_KERNEL);
+ if (!kmem)
+ return;
+
+ usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
+ if (IS_ERR(usermem)) {
+ pr_err("Failed to allocate user memory\n");
+ kfree(kmem);
+ return;
+ }
+
+ pr_info("out-of-bounds in copy_from_user()\n");
+ unused = copy_from_user(kmem, usermem, size + 1);
+
+ pr_info("out-of-bounds in copy_to_user()\n");
+ unused = copy_to_user(usermem, kmem, size + 1);
+
+ pr_info("out-of-bounds in __copy_from_user()\n");
+ unused = __copy_from_user(kmem, usermem, size + 1);
+
+ pr_info("out-of-bounds in __copy_to_user()\n");
+ unused = __copy_to_user(usermem, kmem, size + 1);
+
+ pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
+ unused = __copy_from_user_inatomic(kmem, usermem, size + 1);
+
+ pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
+ unused = __copy_to_user_inatomic(usermem, kmem, size + 1);
+
+ pr_info("out-of-bounds in strncpy_from_user()\n");
+ unused = strncpy_from_user(kmem, usermem, size + 1);
+
+ vm_munmap((unsigned long)usermem, PAGE_SIZE);
+ kfree(kmem);
+}
+
+static noinline void __init use_after_scope_test(void)
+{
+ volatile char *volatile p;
+
+ pr_info("use-after-scope on int\n");
+ {
+ int local = 0;
+
+ p = (char *)&local;
+ }
+ p[0] = 1;
+ p[3] = 1;
+
+ pr_info("use-after-scope on array\n");
+ {
+ char local[1024] = {0};
+
+ p = local;
+ }
+ p[0] = 1;
+ p[1023] = 1;
+}
+
static int __init kmalloc_tests_init(void)
{
+ /*
+ * Temporarily enable multi-shot mode. Otherwise, we'd only get a
+ * report for the first case.
+ */
+ bool multishot = kasan_save_enable_multi_shot();
+
kmalloc_oob_right();
kmalloc_oob_left();
kmalloc_node_oob_right();
+#ifdef CONFIG_SLUB
+ kmalloc_pagealloc_oob_right();
+#endif
kmalloc_large_oob_right();
kmalloc_oob_krealloc_more();
kmalloc_oob_krealloc_less();
@@ -339,6 +469,12 @@ static int __init kmalloc_tests_init(void)
kmem_cache_oob();
kasan_stack_oob();
kasan_global_oob();
+ ksize_unpoisons_memory();
+ copy_user_test();
+ use_after_scope_test();
+
+ kasan_restore_multi_shot(multishot);
+
return -EAGAIN;
}
diff --git a/mm/Makefile b/mm/Makefile
index 8b532c94008f..ec91e951da28 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,11 +3,27 @@
#
KASAN_SANITIZE_slab_common.o := n
+KASAN_SANITIZE_slab.o := n
KASAN_SANITIZE_slub.o := n
# Since __builtin_frame_address does work as used, disable the warning.
CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
+# These files are disabled because they produce non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
+# free pages, or a task is migrated between nodes.
+KCOV_INSTRUMENT_slab_common.o := n
+KCOV_INSTRUMENT_slob.o := n
+KCOV_INSTRUMENT_slab.o := n
+KCOV_INSTRUMENT_slub.o := n
+KCOV_INSTRUMENT_page_alloc.o := n
+KCOV_INSTRUMENT_debug-pagealloc.o := n
+KCOV_INSTRUMENT_kmemleak.o := n
+KCOV_INSTRUMENT_kmemcheck.o := n
+KCOV_INSTRUMENT_memcontrol.o := n
+KCOV_INSTRUMENT_mmzone.o := n
+KCOV_INSTRUMENT_vmstat.o := n
+
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
diff --git a/mm/compaction.c b/mm/compaction.c
index dba02dec7195..b6f145ed7ae1 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -200,7 +200,8 @@ static void reset_cached_positions(struct zone *zone)
{
zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
- zone->compact_cached_free_pfn = zone_end_pfn(zone);
+ zone->compact_cached_free_pfn =
+ round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages);
}
/*
@@ -552,13 +553,17 @@ unsigned long
isolate_freepages_range(struct compact_control *cc,
unsigned long start_pfn, unsigned long end_pfn)
{
- unsigned long isolated, pfn, block_end_pfn;
+ unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
LIST_HEAD(freelist);
pfn = start_pfn;
+ block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ if (block_start_pfn < cc->zone->zone_start_pfn)
+ block_start_pfn = cc->zone->zone_start_pfn;
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
for (; pfn < end_pfn; pfn += isolated,
+ block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
/* Protect pfn from changing by isolate_freepages_block */
unsigned long isolate_start_pfn = pfn;
@@ -571,11 +576,13 @@ isolate_freepages_range(struct compact_control *cc,
* scanning range to right one.
*/
if (pfn >= block_end_pfn) {
+ block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
block_end_pfn = min(block_end_pfn, end_pfn);
}
- if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
+ if (!pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, cc->zone))
break;
isolated = isolate_freepages_block(cc, &isolate_start_pfn,
@@ -861,18 +868,23 @@ unsigned long
isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
unsigned long end_pfn)
{
- unsigned long pfn, block_end_pfn;
+ unsigned long pfn, block_start_pfn, block_end_pfn;
/* Scan block by block. First and last block may be incomplete */
pfn = start_pfn;
+ block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ if (block_start_pfn < cc->zone->zone_start_pfn)
+ block_start_pfn = cc->zone->zone_start_pfn;
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
for (; pfn < end_pfn; pfn = block_end_pfn,
+ block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
block_end_pfn = min(block_end_pfn, end_pfn);
- if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
+ if (!pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, cc->zone))
continue;
pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
@@ -1090,7 +1102,9 @@ int sysctl_compact_unevictable_allowed __read_mostly = 1;
static isolate_migrate_t isolate_migratepages(struct zone *zone,
struct compact_control *cc)
{
- unsigned long low_pfn, end_pfn;
+ unsigned long block_start_pfn;
+ unsigned long block_end_pfn;
+ unsigned long low_pfn;
unsigned long isolate_start_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
@@ -1102,16 +1116,21 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
* initialized by compact_zone()
*/
low_pfn = cc->migrate_pfn;
+ block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1);
+ if (block_start_pfn < zone->zone_start_pfn)
+ block_start_pfn = zone->zone_start_pfn;
/* Only scan within a pageblock boundary */
- end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
+ block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
/*
* Iterate over whole pageblocks until we find the first suitable.
* Do not cross the free scanner.
*/
- for (; end_pfn <= cc->free_pfn;
- low_pfn = end_pfn, end_pfn += pageblock_nr_pages) {
+ for (; block_end_pfn <= cc->free_pfn;
+ low_pfn = block_end_pfn,
+ block_start_pfn = block_end_pfn,
+ block_end_pfn += pageblock_nr_pages) {
/*
* This can potentially iterate a massively long zone with
@@ -1122,7 +1141,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
&& compact_should_abort(cc))
break;
- page = pageblock_pfn_to_page(low_pfn, end_pfn, zone);
+ page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
+ zone);
if (!page)
continue;
@@ -1141,8 +1161,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
/* Perform the isolation */
isolate_start_pfn = low_pfn;
- low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn,
- isolate_mode);
+ low_pfn = isolate_migratepages_block(cc, low_pfn,
+ block_end_pfn, isolate_mode);
if (!low_pfn || cc->contended) {
acct_isolated(zone, cc);
@@ -1358,11 +1378,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
*/
cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
cc->free_pfn = zone->compact_cached_free_pfn;
- if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
- cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
+ if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
+ cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages);
zone->compact_cached_free_pfn = cc->free_pfn;
}
- if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
+ if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
cc->migrate_pfn = start_pfn;
zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 57312b5d6e12..2821500e8123 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -452,13 +452,11 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
}
spin_unlock_irqrestore(&pool->lock, flags);
if (pool->dev)
- dev_err(pool->dev, "dma_pool_free %s, dma %Lx "
- "already free\n", pool->name,
- (unsigned long long)dma);
+ dev_err(pool->dev, "dma_pool_free %s, dma %Lx already free\n",
+ pool->name, (unsigned long long)dma);
else
- printk(KERN_ERR "dma_pool_free %s, dma %Lx "
- "already free\n", pool->name,
- (unsigned long long)dma);
+ printk(KERN_ERR "dma_pool_free %s, dma %Lx already free\n",
+ pool->name, (unsigned long long)dma);
return;
}
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6c6f5ccfcda1..d64d48ca789c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -135,8 +135,7 @@ static void set_recommended_min_free_kbytes(void)
if (recommended_min > min_free_kbytes) {
if (user_min_free_kbytes >= 0)
- pr_info("raising min_free_kbytes from %d to %lu "
- "to help transparent hugepage allocations\n",
+ pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
min_free_kbytes, recommended_min);
min_free_kbytes = recommended_min;
@@ -1304,17 +1303,11 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
VM_BUG_ON_PAGE(!PageHead(page), page);
if (flags & FOLL_TOUCH) {
pmd_t _pmd;
- /*
- * We should set the dirty bit only for FOLL_WRITE but
- * for now the dirty bit in the pmd is meaningless.
- * And if the dirty bit will become meaningful and
- * we'll only set it with FOLL_WRITE, an atomic
- * set_bit will be required on the pmd to set the
- * young bit, instead of the current set_pmd_at.
- */
- _pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
+ _pmd = pmd_mkyoung(*pmd);
+ if (flags & FOLL_WRITE)
+ _pmd = pmd_mkdirty(_pmd);
if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
- pmd, _pmd, 1))
+ pmd, _pmd, flags & FOLL_WRITE))
update_mmu_cache_pmd(vma, addr, pmd);
}
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
@@ -1572,35 +1565,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
+ pmd_t entry;
+ bool preserve_write;
+
int ret = 0;
- if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- pmd_t entry;
- bool preserve_write = prot_numa && pmd_write(*pmd);
- ret = 1;
+ if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
+ return 0;
- /*
- * Avoid trapping faults against the zero page. The read-only
- * data is likely to be read-cached on the local CPU and
- * local/remote hits to the zero page are not interesting.
- */
- if (prot_numa && is_huge_zero_pmd(*pmd)) {
- spin_unlock(ptl);
- return ret;
- }
+ preserve_write = prot_numa && pmd_write(*pmd);
+ ret = 1;
- if (!prot_numa || !pmd_protnone(*pmd)) {
- entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
- entry = pmd_modify(entry, newprot);
- if (preserve_write)
- entry = pmd_mkwrite(entry);
- ret = HPAGE_PMD_NR;
- set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(!preserve_write && pmd_write(entry));
- }
- spin_unlock(ptl);
- }
+ /*
+ * Avoid trapping faults against the zero page. The read-only
+ * data is likely to be read-cached on the local CPU and
+ * local/remote hits to the zero page are not interesting.
+ */
+ if (prot_numa && is_huge_zero_pmd(*pmd))
+ goto unlock;
+
+ if (prot_numa && pmd_protnone(*pmd))
+ goto unlock;
+ /*
+ * In case prot_numa, we are under down_read(mmap_sem). It's critical
+ * to not clear pmd intermittently to avoid race with MADV_DONTNEED
+ * which is also under down_read(mmap_sem):
+ *
+ * CPU0: CPU1:
+ * change_huge_pmd(prot_numa=1)
+ * pmdp_huge_get_and_clear_notify()
+ * madvise_dontneed()
+ * zap_pmd_range()
+ * pmd_trans_huge(*pmd) == 0 (without ptl)
+ * // skip the pmd
+ * set_pmd_at();
+ * // pmd is re-established
+ *
+ * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+ * which may break userspace.
+ *
+ * pmdp_invalidate() is required to make sure we don't miss
+ * dirty/young flags set by hardware.
+ */
+ entry = *pmd;
+ pmdp_invalidate(vma, addr, pmd);
+
+ /*
+ * Recover dirty/young flags. It relies on pmdp_invalidate to not
+ * corrupt them.
+ */
+ if (pmd_dirty(*pmd))
+ entry = pmd_mkdirty(entry);
+ if (pmd_young(*pmd))
+ entry = pmd_mkyoung(entry);
+
+ entry = pmd_modify(entry, newprot);
+ if (preserve_write)
+ entry = pmd_mkwrite(entry);
+ ret = HPAGE_PMD_NR;
+ set_pmd_at(mm, addr, pmd, entry);
+ BUG_ON(!preserve_write && pmd_write(entry));
+unlock:
+ spin_unlock(ptl);
return ret;
}
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 64710148941e..e1100433cefe 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -1,8 +1,9 @@
KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
CFLAGS_REMOVE_kasan.o = -pg
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
-obj-y := kasan.o report.o kasan_init.o
+obj-y := kasan.o report.o kasan_init.o quarantine.o
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 1ad20ade8c91..cfdbe1ce9ef8 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -17,7 +17,9 @@
#define DISABLE_BRANCH_PROFILING
#include <linux/export.h>
+#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/kmemleak.h>
#include <linux/linkage.h>
@@ -32,11 +34,21 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
-#include <linux/kasan.h>
+#include <linux/bug.h>
#include "kasan.h"
#include "../slab.h"
+void kasan_enable_current(void)
+{
+ current->kasan_depth++;
+}
+
+void kasan_disable_current(void)
+{
+ current->kasan_depth--;
+}
+
/*
* Poisons the shadow memory for 'size' bytes starting from 'addr'.
* Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.
@@ -61,7 +73,7 @@ void kasan_unpoison_shadow(const void *address, size_t size)
}
}
-static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+static void __kasan_unpoison_stack(struct task_struct *task, const void *sp)
{
void *base = task_stack_page(task);
size_t size = sp - base;
@@ -76,9 +88,31 @@ void kasan_unpoison_task_stack(struct task_struct *task)
}
/* Unpoison the stack for the current task beyond a watermark sp value. */
-asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
{
- __kasan_unpoison_stack(current, sp);
+ /*
+ * Calculate the task stack base address. Avoid using 'current'
+ * because this function is called by early resume code which hasn't
+ * yet set up the percpu register (%gs).
+ */
+ void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1));
+
+ kasan_unpoison_shadow(base, watermark - base);
+}
+
+/*
+ * Clear all poison for the region between the current SP and a provided
+ * watermark value, as is sometimes required prior to hand-crafted asm function
+ * returns in the middle of functions.
+ */
+void kasan_unpoison_stack_above_sp_to(const void *watermark)
+{
+ const void *sp = __builtin_frame_address(0);
+ size_t size = watermark - sp;
+
+ if (WARN_ON(sp > watermark))
+ return;
+ kasan_unpoison_shadow(sp, size);
}
/*
@@ -272,32 +306,48 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
return memory_is_poisoned_n(addr, size);
}
-
-static __always_inline void check_memory_region(unsigned long addr,
- size_t size, bool write)
+static __always_inline void check_memory_region_inline(unsigned long addr,
+ size_t size, bool write,
+ unsigned long ret_ip)
{
if (unlikely(size == 0))
return;
if (unlikely((void *)addr <
kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) {
- kasan_report(addr, size, write, _RET_IP_);
+ kasan_report(addr, size, write, ret_ip);
return;
}
if (likely(!memory_is_poisoned(addr, size)))
return;
- kasan_report(addr, size, write, _RET_IP_);
+ kasan_report(addr, size, write, ret_ip);
+}
+
+static void check_memory_region(unsigned long addr,
+ size_t size, bool write,
+ unsigned long ret_ip)
+{
+ check_memory_region_inline(addr, size, write, ret_ip);
+}
+
+void kasan_check_read(const void *p, unsigned int size)
+{
+ check_memory_region((unsigned long)p, size, false, _RET_IP_);
}
+EXPORT_SYMBOL(kasan_check_read);
-void __asan_loadN(unsigned long addr, size_t size);
-void __asan_storeN(unsigned long addr, size_t size);
+void kasan_check_write(const void *p, unsigned int size)
+{
+ check_memory_region((unsigned long)p, size, true, _RET_IP_);
+}
+EXPORT_SYMBOL(kasan_check_write);
#undef memset
void *memset(void *addr, int c, size_t len)
{
- __asan_storeN((unsigned long)addr, len);
+ check_memory_region((unsigned long)addr, len, true, _RET_IP_);
return __memset(addr, c, len);
}
@@ -305,8 +355,8 @@ void *memset(void *addr, int c, size_t len)
#undef memmove
void *memmove(void *dest, const void *src, size_t len)
{
- __asan_loadN((unsigned long)src, len);
- __asan_storeN((unsigned long)dest, len);
+ check_memory_region((unsigned long)src, len, false, _RET_IP_);
+ check_memory_region((unsigned long)dest, len, true, _RET_IP_);
return __memmove(dest, src, len);
}
@@ -314,8 +364,8 @@ void *memmove(void *dest, const void *src, size_t len)
#undef memcpy
void *memcpy(void *dest, const void *src, size_t len)
{
- __asan_loadN((unsigned long)src, len);
- __asan_storeN((unsigned long)dest, len);
+ check_memory_region((unsigned long)src, len, false, _RET_IP_);
+ check_memory_region((unsigned long)dest, len, true, _RET_IP_);
return __memcpy(dest, src, len);
}
@@ -334,6 +384,80 @@ void kasan_free_pages(struct page *page, unsigned int order)
KASAN_FREE_PAGE);
}
+/*
+ * Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
+ * For larger allocations larger redzones are used.
+ */
+static size_t optimal_redzone(size_t object_size)
+{
+ int rz =
+ object_size <= 64 - 16 ? 16 :
+ object_size <= 128 - 32 ? 32 :
+ object_size <= 512 - 64 ? 64 :
+ object_size <= 4096 - 128 ? 128 :
+ object_size <= (1 << 14) - 256 ? 256 :
+ object_size <= (1 << 15) - 512 ? 512 :
+ object_size <= (1 << 16) - 1024 ? 1024 : 2048;
+ return rz;
+}
+
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+ unsigned long *flags)
+{
+ int redzone_adjust;
+ int orig_size = *size;
+
+ /* Add alloc meta. */
+ cache->kasan_info.alloc_meta_offset = *size;
+ *size += sizeof(struct kasan_alloc_meta);
+
+ /* Add free meta. */
+ if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+ cache->object_size < sizeof(struct kasan_free_meta)) {
+ cache->kasan_info.free_meta_offset = *size;
+ *size += sizeof(struct kasan_free_meta);
+ }
+ redzone_adjust = optimal_redzone(cache->object_size) -
+ (*size - cache->object_size);
+
+ if (redzone_adjust > 0)
+ *size += redzone_adjust;
+
+ *size = min(KMALLOC_MAX_SIZE, max(*size, cache->object_size +
+ optimal_redzone(cache->object_size)));
+
+ /*
+ * If the metadata doesn't fit, don't enable KASAN at all.
+ */
+ if (*size <= cache->kasan_info.alloc_meta_offset ||
+ *size <= cache->kasan_info.free_meta_offset) {
+ cache->kasan_info.alloc_meta_offset = 0;
+ cache->kasan_info.free_meta_offset = 0;
+ *size = orig_size;
+ return;
+ }
+
+ *flags |= SLAB_KASAN;
+}
+
+void kasan_cache_shrink(struct kmem_cache *cache)
+{
+ quarantine_remove_cache(cache);
+}
+
+void kasan_cache_shutdown(struct kmem_cache *cache)
+{
+ quarantine_remove_cache(cache);
+}
+
+size_t kasan_metadata_size(struct kmem_cache *cache)
+{
+ return (cache->kasan_info.alloc_meta_offset ?
+ sizeof(struct kasan_alloc_meta) : 0) +
+ (cache->kasan_info.free_meta_offset ?
+ sizeof(struct kasan_free_meta) : 0);
+}
+
void kasan_poison_slab(struct page *page)
{
kasan_poison_shadow(page_address(page),
@@ -353,12 +477,84 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
KASAN_KMALLOC_REDZONE);
}
-void kasan_slab_alloc(struct kmem_cache *cache, void *object)
+static inline int in_irqentry_text(unsigned long ptr)
+{
+ return (ptr >= (unsigned long)&__irqentry_text_start &&
+ ptr < (unsigned long)&__irqentry_text_end) ||
+ (ptr >= (unsigned long)&__softirqentry_text_start &&
+ ptr < (unsigned long)&__softirqentry_text_end);
+}
+
+static inline void filter_irq_stacks(struct stack_trace *trace)
+{
+ int i;
+
+ if (!trace->nr_entries)
+ return;
+ for (i = 0; i < trace->nr_entries; i++)
+ if (in_irqentry_text(trace->entries[i])) {
+ /* Include the irqentry function into the stack. */
+ trace->nr_entries = i + 1;
+ break;
+ }
+}
+
+static inline depot_stack_handle_t save_stack(gfp_t flags)
+{
+ unsigned long entries[KASAN_STACK_DEPTH];
+ struct stack_trace trace = {
+ .nr_entries = 0,
+ .entries = entries,
+ .max_entries = KASAN_STACK_DEPTH,
+ .skip = 0
+ };
+
+ save_stack_trace(&trace);
+ filter_irq_stacks(&trace);
+ if (trace.nr_entries != 0 &&
+ trace.entries[trace.nr_entries-1] == ULONG_MAX)
+ trace.nr_entries--;
+
+ return depot_save_stack(&trace, flags);
+}
+
+static inline void set_track(struct kasan_track *track, gfp_t flags)
+{
+ track->pid = current->pid;
+ track->stack = save_stack(flags);
+}
+
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+ const void *object)
+{
+ BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
+ return (void *)object + cache->kasan_info.alloc_meta_offset;
+}
+
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+ const void *object)
+{
+ BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
+ return (void *)object + cache->kasan_info.free_meta_offset;
+}
+
+void kasan_init_slab_obj(struct kmem_cache *cache, const void *object)
+{
+ struct kasan_alloc_meta *alloc_info;
+
+ if (!(cache->flags & SLAB_KASAN))
+ return;
+
+ alloc_info = get_alloc_info(cache, object);
+ __memset(alloc_info, 0, sizeof(*alloc_info));
+}
+
+void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
{
- kasan_kmalloc(cache, object, cache->object_size);
+ kasan_kmalloc(cache, object, cache->object_size, flags);
}
-void kasan_slab_free(struct kmem_cache *cache, void *object)
+static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
{
unsigned long size = cache->object_size;
unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -370,11 +566,40 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
}
-void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
+bool kasan_slab_free(struct kmem_cache *cache, void *object)
+{
+ s8 shadow_byte;
+
+ /* RCU slabs could be legally used after free within the RCU period */
+ if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ return false;
+
+ shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
+ if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) {
+ kasan_report_double_free(cache, object,
+ __builtin_return_address(1));
+ return true;
+ }
+
+ kasan_poison_slab_free(cache, object);
+
+ if (unlikely(!(cache->flags & SLAB_KASAN)))
+ return false;
+
+ set_track(&get_alloc_info(cache, object)->free_track, GFP_NOWAIT);
+ quarantine_put(get_free_info(cache, object), cache);
+ return true;
+}
+
+void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
+ gfp_t flags)
{
unsigned long redzone_start;
unsigned long redzone_end;
+ if (gfpflags_allow_blocking(flags))
+ quarantine_reduce();
+
if (unlikely(object == NULL))
return;
@@ -386,15 +611,21 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
kasan_unpoison_shadow(object, size);
kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
KASAN_KMALLOC_REDZONE);
+
+ if (cache->flags & SLAB_KASAN)
+ set_track(&get_alloc_info(cache, object)->alloc_track, flags);
}
EXPORT_SYMBOL(kasan_kmalloc);
-void kasan_kmalloc_large(const void *ptr, size_t size)
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
{
struct page *page;
unsigned long redzone_start;
unsigned long redzone_end;
+ if (gfpflags_allow_blocking(flags))
+ quarantine_reduce();
+
if (unlikely(ptr == NULL))
return;
@@ -408,7 +639,7 @@ void kasan_kmalloc_large(const void *ptr, size_t size)
KASAN_PAGE_REDZONE);
}
-void kasan_krealloc(const void *object, size_t size)
+void kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
struct page *page;
@@ -418,12 +649,12 @@ void kasan_krealloc(const void *object, size_t size)
page = virt_to_head_page(object);
if (unlikely(!PageSlab(page)))
- kasan_kmalloc_large(object, size);
+ kasan_kmalloc_large(object, size, flags);
else
- kasan_kmalloc(page->slab_cache, object, size);
+ kasan_kmalloc(page->slab_cache, object, size, flags);
}
-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
{
struct page *page;
@@ -433,7 +664,7 @@ void kasan_kfree(void *ptr)
kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
KASAN_FREE_PAGE);
else
- kasan_slab_free(page->slab_cache, ptr);
+ kasan_poison_slab_free(page->slab_cache, ptr);
}
void kasan_kfree_large(const void *ptr)
@@ -503,22 +734,22 @@ void __asan_unregister_globals(struct kasan_global *globals, size_t size)
}
EXPORT_SYMBOL(__asan_unregister_globals);
-#define DEFINE_ASAN_LOAD_STORE(size) \
- void __asan_load##size(unsigned long addr) \
- { \
- check_memory_region(addr, size, false); \
- } \
- EXPORT_SYMBOL(__asan_load##size); \
- __alias(__asan_load##size) \
- void __asan_load##size##_noabort(unsigned long); \
- EXPORT_SYMBOL(__asan_load##size##_noabort); \
- void __asan_store##size(unsigned long addr) \
- { \
- check_memory_region(addr, size, true); \
- } \
- EXPORT_SYMBOL(__asan_store##size); \
- __alias(__asan_store##size) \
- void __asan_store##size##_noabort(unsigned long); \
+#define DEFINE_ASAN_LOAD_STORE(size) \
+ void __asan_load##size(unsigned long addr) \
+ { \
+ check_memory_region_inline(addr, size, false, _RET_IP_);\
+ } \
+ EXPORT_SYMBOL(__asan_load##size); \
+ __alias(__asan_load##size) \
+ void __asan_load##size##_noabort(unsigned long); \
+ EXPORT_SYMBOL(__asan_load##size##_noabort); \
+ void __asan_store##size(unsigned long addr) \
+ { \
+ check_memory_region_inline(addr, size, true, _RET_IP_); \
+ } \
+ EXPORT_SYMBOL(__asan_store##size); \
+ __alias(__asan_store##size) \
+ void __asan_store##size##_noabort(unsigned long); \
EXPORT_SYMBOL(__asan_store##size##_noabort)
DEFINE_ASAN_LOAD_STORE(1);
@@ -529,7 +760,7 @@ DEFINE_ASAN_LOAD_STORE(16);
void __asan_loadN(unsigned long addr, size_t size)
{
- check_memory_region(addr, size, false);
+ check_memory_region(addr, size, false, _RET_IP_);
}
EXPORT_SYMBOL(__asan_loadN);
@@ -539,7 +770,7 @@ EXPORT_SYMBOL(__asan_loadN_noabort);
void __asan_storeN(unsigned long addr, size_t size)
{
- check_memory_region(addr, size, true);
+ check_memory_region(addr, size, true, _RET_IP_);
}
EXPORT_SYMBOL(__asan_storeN);
@@ -551,6 +782,25 @@ EXPORT_SYMBOL(__asan_storeN_noabort);
void __asan_handle_no_return(void) {}
EXPORT_SYMBOL(__asan_handle_no_return);
+/* Emitted by compiler to poison large objects when they go out of scope. */
+void __asan_poison_stack_memory(const void *addr, size_t size)
+{
+ /*
+ * Addr is KASAN_SHADOW_SCALE_SIZE-aligned and the object is surrounded
+ * by redzones, so we simply round up size to simplify logic.
+ */
+ kasan_poison_shadow(addr, round_up(size, KASAN_SHADOW_SCALE_SIZE),
+ KASAN_USE_AFTER_SCOPE);
+}
+EXPORT_SYMBOL(__asan_poison_stack_memory);
+
+/* Emitted by compiler to unpoison large objects when they go into scope. */
+void __asan_unpoison_stack_memory(const void *addr, size_t size)
+{
+ kasan_unpoison_shadow(addr, size);
+}
+EXPORT_SYMBOL(__asan_unpoison_stack_memory);
+
#ifdef CONFIG_MEMORY_HOTPLUG
static int kasan_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
@@ -560,8 +810,8 @@ static int kasan_mem_notifier(struct notifier_block *nb,
static int __init kasan_memhotplug_init(void)
{
- pr_err("WARNING: KASAN doesn't support memory hot-add\n");
- pr_err("Memory hot-add will be disabled\n");
+ pr_info("WARNING: KASAN doesn't support memory hot-add\n");
+ pr_info("Memory hot-add will be disabled\n");
hotplug_memory_notifier(kasan_mem_notifier, 0);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 37ff0ab6a8ff..1229298cce64 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -2,6 +2,7 @@
#define __MM_KASAN_KASAN_H
#include <linux/kasan.h>
+#include <linux/stackdepot.h>
#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
@@ -20,6 +21,7 @@
#define KASAN_STACK_MID 0xF2
#define KASAN_STACK_RIGHT 0xF3
#define KASAN_STACK_PARTIAL 0xF4
+#define KASAN_USE_AFTER_SCOPE 0xF8
/* Don't break randconfig/all*config builds */
#ifndef KASAN_ABI_VERSION
@@ -57,18 +59,57 @@ struct kasan_global {
#endif
};
+/**
+ * Structures to keep alloc and free tracks *
+ */
+
+#define KASAN_STACK_DEPTH 64
+
+struct kasan_track {
+ u32 pid;
+ depot_stack_handle_t stack;
+};
+
+struct kasan_alloc_meta {
+ struct kasan_track alloc_track;
+ struct kasan_track free_track;
+};
+
+struct qlist_node {
+ struct qlist_node *next;
+};
+struct kasan_free_meta {
+ /* This field is used while the object is in the quarantine.
+ * Otherwise it might be used for the allocator freelist.
+ */
+ struct qlist_node quarantine_link;
+};
+
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+ const void *object);
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+ const void *object);
+
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
<< KASAN_SHADOW_SCALE_SHIFT);
}
-static inline bool kasan_report_enabled(void)
-{
- return !current->kasan_depth;
-}
-
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);
+void kasan_report_double_free(struct kmem_cache *cache, void *object,
+ void *ip);
+
+#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB)
+void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
+void quarantine_reduce(void);
+void quarantine_remove_cache(struct kmem_cache *cache);
+#else
+static inline void quarantine_put(struct kasan_free_meta *info,
+ struct kmem_cache *cache) { }
+static inline void quarantine_reduce(void) { }
+static inline void quarantine_remove_cache(struct kmem_cache *cache) { }
+#endif
#endif
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
new file mode 100644
index 000000000000..3a8ddf8baf7d
--- /dev/null
+++ b/mm/kasan/quarantine.c
@@ -0,0 +1,328 @@
+/*
+ * KASAN quarantine.
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/shrinker.h>
+#include <linux/slab.h>
+#include <linux/srcu.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "../slab.h"
+#include "kasan.h"
+
+/* Data structure and operations for quarantine queues. */
+
+/*
+ * Each queue is a signle-linked list, which also stores the total size of
+ * objects inside of it.
+ */
+struct qlist_head {
+ struct qlist_node *head;
+ struct qlist_node *tail;
+ size_t bytes;
+};
+
+#define QLIST_INIT { NULL, NULL, 0 }
+
+static bool qlist_empty(struct qlist_head *q)
+{
+ return !q->head;
+}
+
+static void qlist_init(struct qlist_head *q)
+{
+ q->head = q->tail = NULL;
+ q->bytes = 0;
+}
+
+static void qlist_put(struct qlist_head *q, struct qlist_node *qlink,
+ size_t size)
+{
+ if (unlikely(qlist_empty(q)))
+ q->head = qlink;
+ else
+ q->tail->next = qlink;
+ q->tail = qlink;
+ qlink->next = NULL;
+ q->bytes += size;
+}
+
+static void qlist_move_all(struct qlist_head *from, struct qlist_head *to)
+{
+ if (unlikely(qlist_empty(from)))
+ return;
+
+ if (qlist_empty(to)) {
+ *to = *from;
+ qlist_init(from);
+ return;
+ }
+
+ to->tail->next = from->head;
+ to->tail = from->tail;
+ to->bytes += from->bytes;
+
+ qlist_init(from);
+}
+
+#define QUARANTINE_PERCPU_SIZE (1 << 20)
+#define QUARANTINE_BATCHES \
+ (1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS)
+
+/*
+ * The object quarantine consists of per-cpu queues and a global queue,
+ * guarded by quarantine_lock.
+ */
+static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine);
+
+/* Round-robin FIFO array of batches. */
+static struct qlist_head global_quarantine[QUARANTINE_BATCHES];
+static int quarantine_head;
+static int quarantine_tail;
+/* Total size of all objects in global_quarantine across all batches. */
+static unsigned long quarantine_size;
+static DEFINE_SPINLOCK(quarantine_lock);
+DEFINE_STATIC_SRCU(remove_cache_srcu);
+
+/* Maximum size of the global queue. */
+static unsigned long quarantine_max_size;
+
+/*
+ * Target size of a batch in global_quarantine.
+ * Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM.
+ */
+static unsigned long quarantine_batch_size;
+
+/*
+ * The fraction of physical memory the quarantine is allowed to occupy.
+ * Quarantine doesn't support memory shrinker with SLAB allocator, so we keep
+ * the ratio low to avoid OOM.
+ */
+#define QUARANTINE_FRACTION 32
+
+static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
+{
+ return virt_to_head_page(qlink)->slab_cache;
+}
+
+static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
+{
+ struct kasan_free_meta *free_info =
+ container_of(qlink, struct kasan_free_meta,
+ quarantine_link);
+
+ return ((void *)free_info) - cache->kasan_info.free_meta_offset;
+}
+
+static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
+{
+ void *object = qlink_to_object(qlink, cache);
+ unsigned long flags;
+
+ if (IS_ENABLED(CONFIG_SLAB))
+ local_irq_save(flags);
+
+ ___cache_free(cache, object, _THIS_IP_);
+
+ if (IS_ENABLED(CONFIG_SLAB))
+ local_irq_restore(flags);
+}
+
+static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
+{
+ struct qlist_node *qlink;
+
+ if (unlikely(qlist_empty(q)))
+ return;
+
+ qlink = q->head;
+ while (qlink) {
+ struct kmem_cache *obj_cache =
+ cache ? cache : qlink_to_cache(qlink);
+ struct qlist_node *next = qlink->next;
+
+ qlink_free(qlink, obj_cache);
+ qlink = next;
+ }
+ qlist_init(q);
+}
+
+void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
+{
+ unsigned long flags;
+ struct qlist_head *q;
+ struct qlist_head temp = QLIST_INIT;
+
+ /*
+ * Note: irq must be disabled until after we move the batch to the
+ * global quarantine. Otherwise quarantine_remove_cache() can miss
+ * some objects belonging to the cache if they are in our local temp
+ * list. quarantine_remove_cache() executes on_each_cpu() at the
+ * beginning which ensures that it either sees the objects in per-cpu
+ * lists or in the global quarantine.
+ */
+ local_irq_save(flags);
+
+ q = this_cpu_ptr(&cpu_quarantine);
+ qlist_put(q, &info->quarantine_link, cache->size);
+ if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
+ qlist_move_all(q, &temp);
+
+ spin_lock(&quarantine_lock);
+ WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
+ qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
+ if (global_quarantine[quarantine_tail].bytes >=
+ READ_ONCE(quarantine_batch_size)) {
+ int new_tail;
+
+ new_tail = quarantine_tail + 1;
+ if (new_tail == QUARANTINE_BATCHES)
+ new_tail = 0;
+ if (new_tail != quarantine_head)
+ quarantine_tail = new_tail;
+ }
+ spin_unlock(&quarantine_lock);
+ }
+
+ local_irq_restore(flags);
+}
+
+void quarantine_reduce(void)
+{
+ size_t total_size, new_quarantine_size, percpu_quarantines;
+ unsigned long flags;
+ int srcu_idx;
+ struct qlist_head to_free = QLIST_INIT;
+
+ if (likely(READ_ONCE(quarantine_size) <=
+ READ_ONCE(quarantine_max_size)))
+ return;
+
+ /*
+ * srcu critical section ensures that quarantine_remove_cache()
+ * will not miss objects belonging to the cache while they are in our
+ * local to_free list. srcu is chosen because (1) it gives us private
+ * grace period domain that does not interfere with anything else,
+ * and (2) it allows synchronize_srcu() to return without waiting
+ * if there are no pending read critical sections (which is the
+ * expected case).
+ */
+ srcu_idx = srcu_read_lock(&remove_cache_srcu);
+ spin_lock_irqsave(&quarantine_lock, flags);
+
+ /*
+ * Update quarantine size in case of hotplug. Allocate a fraction of
+ * the installed memory to quarantine minus per-cpu queue limits.
+ */
+ total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
+ QUARANTINE_FRACTION;
+ percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
+ new_quarantine_size = (total_size < percpu_quarantines) ?
+ 0 : total_size - percpu_quarantines;
+ WRITE_ONCE(quarantine_max_size, new_quarantine_size);
+ /* Aim at consuming at most 1/2 of slots in quarantine. */
+ WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE,
+ 2 * total_size / QUARANTINE_BATCHES));
+
+ if (likely(quarantine_size > quarantine_max_size)) {
+ qlist_move_all(&global_quarantine[quarantine_head], &to_free);
+ WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes);
+ quarantine_head++;
+ if (quarantine_head == QUARANTINE_BATCHES)
+ quarantine_head = 0;
+ }
+
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+
+ qlist_free_all(&to_free, NULL);
+ srcu_read_unlock(&remove_cache_srcu, srcu_idx);
+}
+
+static void qlist_move_cache(struct qlist_head *from,
+ struct qlist_head *to,
+ struct kmem_cache *cache)
+{
+ struct qlist_node *curr;
+
+ if (unlikely(qlist_empty(from)))
+ return;
+
+ curr = from->head;
+ qlist_init(from);
+ while (curr) {
+ struct qlist_node *next = curr->next;
+ struct kmem_cache *obj_cache = qlink_to_cache(curr);
+
+ if (obj_cache == cache)
+ qlist_put(to, curr, obj_cache->size);
+ else
+ qlist_put(from, curr, obj_cache->size);
+
+ curr = next;
+ }
+}
+
+static void per_cpu_remove_cache(void *arg)
+{
+ struct kmem_cache *cache = arg;
+ struct qlist_head to_free = QLIST_INIT;
+ struct qlist_head *q;
+
+ q = this_cpu_ptr(&cpu_quarantine);
+ qlist_move_cache(q, &to_free, cache);
+ qlist_free_all(&to_free, cache);
+}
+
+/* Free all quarantined objects belonging to cache. */
+void quarantine_remove_cache(struct kmem_cache *cache)
+{
+ unsigned long flags, i;
+ struct qlist_head to_free = QLIST_INIT;
+
+ /*
+ * Must be careful to not miss any objects that are being moved from
+ * per-cpu list to the global quarantine in quarantine_put(),
+ * nor objects being freed in quarantine_reduce(). on_each_cpu()
+ * achieves the first goal, while synchronize_srcu() achieves the
+ * second.
+ */
+ on_each_cpu(per_cpu_remove_cache, cache, 1);
+
+ spin_lock_irqsave(&quarantine_lock, flags);
+ for (i = 0; i < QUARANTINE_BATCHES; i++) {
+ if (qlist_empty(&global_quarantine[i]))
+ continue;
+ qlist_move_cache(&global_quarantine[i], &to_free, cache);
+ /* Scanning whole quarantine can take a while. */
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+ cond_resched();
+ spin_lock_irqsave(&quarantine_lock, flags);
+ }
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+
+ qlist_free_all(&to_free, cache);
+
+ synchronize_srcu(&remove_cache_srcu);
+}
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index b4e31f78ae69..04bb1d3eb9ec 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,12 +13,15 @@
*
*/
+#include <linux/bitops.h>
#include <linux/ftrace.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/stackdepot.h>
#include <linux/stacktrace.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -48,7 +51,13 @@ static const void *find_first_bad_addr(const void *addr, size_t size)
return first_bad_addr;
}
-static void print_error_description(struct kasan_access_info *info)
+static bool addr_has_shadow(struct kasan_access_info *info)
+{
+ return (info->access_addr >=
+ kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+}
+
+static const char *get_shadow_bug_type(struct kasan_access_info *info)
{
const char *bug_type = "unknown-crash";
u8 *shadow_addr;
@@ -90,14 +99,44 @@ static void print_error_description(struct kasan_access_info *info)
case KASAN_KMALLOC_FREE:
bug_type = "use-after-free";
break;
+ case KASAN_USE_AFTER_SCOPE:
+ bug_type = "use-after-scope";
+ break;
}
- pr_err("BUG: KASAN: %s in %pS at addr %p\n",
- bug_type, (void *)info->ip,
- info->access_addr);
- pr_err("%s of size %zu by task %s/%d\n",
- info->is_write ? "Write" : "Read",
- info->access_size, current->comm, task_pid_nr(current));
+ return bug_type;
+}
+
+static const char *get_wild_bug_type(struct kasan_access_info *info)
+{
+ const char *bug_type = "unknown-crash";
+
+ if ((unsigned long)info->access_addr < PAGE_SIZE)
+ bug_type = "null-ptr-deref";
+ else if ((unsigned long)info->access_addr < TASK_SIZE)
+ bug_type = "user-memory-access";
+ else
+ bug_type = "wild-memory-access";
+
+ return bug_type;
+}
+
+static const char *get_bug_type(struct kasan_access_info *info)
+{
+ if (addr_has_shadow(info))
+ return get_shadow_bug_type(info);
+ return get_wild_bug_type(info);
+}
+
+static void print_error_description(struct kasan_access_info *info)
+{
+ const char *bug_type = get_bug_type(info);
+
+ pr_err("BUG: KASAN: %s in %pS\n",
+ bug_type, (void *)info->ip);
+ pr_err("%s of size %zu at addr %p by task %s/%d\n",
+ info->is_write ? "Write" : "Read", info->access_size,
+ info->access_addr, current->comm, task_pid_nr(current));
}
static inline bool kernel_or_module_addr(const void *addr)
@@ -116,39 +155,119 @@ static inline bool init_task_stack_addr(const void *addr)
sizeof(init_thread_union.stack));
}
-static void print_address_description(struct kasan_access_info *info)
+static DEFINE_SPINLOCK(report_lock);
+
+static void kasan_start_report(unsigned long *flags)
+{
+ /*
+ * Make sure we don't end up in loop.
+ */
+ kasan_disable_current();
+ spin_lock_irqsave(&report_lock, *flags);
+ pr_err("==================================================================\n");
+}
+
+static void kasan_end_report(unsigned long *flags)
+{
+ pr_err("==================================================================\n");
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irqrestore(&report_lock, *flags);
+ if (panic_on_warn)
+ panic("panic_on_warn set ...\n");
+ kasan_enable_current();
+}
+
+static void print_track(struct kasan_track *track, const char *prefix)
{
- const void *addr = info->access_addr;
+ pr_err("%s by task %u:\n", prefix, track->pid);
+ if (track->stack) {
+ struct stack_trace trace;
+ depot_fetch_stack(track->stack, &trace);
+ print_stack_trace(&trace, 0);
+ } else {
+ pr_err("(stack is not available)\n");
+ }
+}
+
+static struct page *addr_to_page(const void *addr)
+{
if ((addr >= (void *)PAGE_OFFSET) &&
- (addr < high_memory)) {
- struct page *page = virt_to_head_page(addr);
+ (addr < high_memory))
+ return virt_to_head_page(addr);
+ return NULL;
+}
- if (PageSlab(page)) {
- void *object;
- struct kmem_cache *cache = page->slab_cache;
- void *last_object;
+static void describe_object_addr(struct kmem_cache *cache, void *object,
+ const void *addr)
+{
+ unsigned long access_addr = (unsigned long)addr;
+ unsigned long object_addr = (unsigned long)object;
+ const char *rel_type;
+ int rel_bytes;
- object = virt_to_obj(cache, page_address(page), addr);
- last_object = page_address(page) +
- page->objects * cache->size;
+ pr_err("The buggy address belongs to the object at %p\n"
+ " which belongs to the cache %s of size %d\n",
+ object, cache->name, cache->object_size);
- if (unlikely(object > last_object))
- object = last_object; /* we hit into padding */
+ if (!addr)
+ return;
- object_err(cache, page, object,
- "kasan: bad access detected");
- return;
- }
- dump_page(page, "kasan: bad access detected");
+ if (access_addr < object_addr) {
+ rel_type = "to the left";
+ rel_bytes = object_addr - access_addr;
+ } else if (access_addr >= object_addr + cache->object_size) {
+ rel_type = "to the right";
+ rel_bytes = access_addr - (object_addr + cache->object_size);
+ } else {
+ rel_type = "inside";
+ rel_bytes = access_addr - object_addr;
}
- if (kernel_or_module_addr(addr)) {
- if (!init_task_stack_addr(addr))
- pr_err("Address belongs to variable %pS\n", addr);
+ pr_err("The buggy address is located %d bytes %s of\n"
+ " %d-byte region [%p, %p)\n",
+ rel_bytes, rel_type, cache->object_size, (void *)object_addr,
+ (void *)(object_addr + cache->object_size));
+}
+
+static void describe_object(struct kmem_cache *cache, void *object,
+ const void *addr)
+{
+ struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+
+ if (cache->flags & SLAB_KASAN) {
+ print_track(&alloc_info->alloc_track, "Allocated");
+ pr_err("\n");
+ print_track(&alloc_info->free_track, "Freed");
+ pr_err("\n");
}
+ describe_object_addr(cache, object, addr);
+}
+
+static void print_address_description(void *addr)
+{
+ struct page *page = addr_to_page(addr);
+
dump_stack();
+ pr_err("\n");
+
+ if (page && PageSlab(page)) {
+ struct kmem_cache *cache = page->slab_cache;
+ void *object = nearest_obj(cache, page, addr);
+
+ describe_object(cache, object, addr);
+ }
+
+ if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) {
+ pr_err("The buggy address belongs to the variable:\n");
+ pr_err(" %pS\n", addr);
+ }
+
+ if (page) {
+ pr_err("The buggy address belongs to the page:\n");
+ dump_page(page, "kasan: bad access detected");
+ }
}
static bool row_is_guilty(const void *row, const void *guilty)
@@ -203,45 +322,72 @@ static void print_shadow_for_address(const void *addr)
}
}
-static DEFINE_SPINLOCK(report_lock);
+void kasan_report_double_free(struct kmem_cache *cache, void *object,
+ void *ip)
+{
+ unsigned long flags;
+
+ kasan_start_report(&flags);
+ pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip);
+ pr_err("\n");
+ print_address_description(object);
+ pr_err("\n");
+ print_shadow_for_address(object);
+ kasan_end_report(&flags);
+}
static void kasan_report_error(struct kasan_access_info *info)
{
unsigned long flags;
- const char *bug_type;
- /*
- * Make sure we don't end up in loop.
- */
- kasan_disable_current();
- spin_lock_irqsave(&report_lock, flags);
- pr_err("================================="
- "=================================\n");
- if (info->access_addr <
- kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) {
- if ((unsigned long)info->access_addr < PAGE_SIZE)
- bug_type = "null-ptr-deref";
- else if ((unsigned long)info->access_addr < TASK_SIZE)
- bug_type = "user-memory-access";
- else
- bug_type = "wild-memory-access";
- pr_err("BUG: KASAN: %s on address %p\n",
- bug_type, info->access_addr);
- pr_err("%s of size %zu by task %s/%d\n",
- info->is_write ? "Write" : "Read",
- info->access_size, current->comm,
- task_pid_nr(current));
+ kasan_start_report(&flags);
+
+ print_error_description(info);
+ pr_err("\n");
+
+ if (!addr_has_shadow(info)) {
dump_stack();
} else {
- print_error_description(info);
- print_address_description(info);
+ print_address_description((void *)info->access_addr);
+ pr_err("\n");
print_shadow_for_address(info->first_bad_addr);
}
- pr_err("================================="
- "=================================\n");
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
- spin_unlock_irqrestore(&report_lock, flags);
- kasan_enable_current();
+
+ kasan_end_report(&flags);
+}
+
+static unsigned long kasan_flags;
+
+#define KASAN_BIT_REPORTED 0
+#define KASAN_BIT_MULTI_SHOT 1
+
+bool kasan_save_enable_multi_shot(void)
+{
+ return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot);
+
+void kasan_restore_multi_shot(bool enabled)
+{
+ if (!enabled)
+ clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
+
+static int __init kasan_set_multi_shot(char *str)
+{
+ set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+ return 1;
+}
+__setup("kasan_multi_shot", kasan_set_multi_shot);
+
+static inline bool kasan_report_enabled(void)
+{
+ if (current->kasan_depth)
+ return false;
+ if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+ return true;
+ return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
}
void kasan_report(unsigned long addr, size_t size,
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index cab58bb592d8..e6347772bbda 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -20,8 +20,7 @@ void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order);
if (!shadow) {
if (printk_ratelimit())
- printk(KERN_ERR "kmemcheck: failed to allocate "
- "shadow bitmap\n");
+ printk(KERN_ERR "kmemcheck: failed to allocate shadow bitmap\n");
return;
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 19423a45d7d7..f78217d6e46e 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -597,8 +597,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
else if (parent->pointer + parent->size <= ptr)
link = &parent->rb_node.rb_right;
else {
- kmemleak_stop("Cannot insert 0x%lx into the object "
- "search tree (overlaps existing)\n",
+ kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
ptr);
/*
* No need for parent->lock here since "parent" cannot
@@ -671,8 +670,8 @@ static void delete_object_part(unsigned long ptr, size_t size)
object = find_and_remove_object(ptr, 1);
if (!object) {
#ifdef DEBUG
- kmemleak_warn("Partially freeing unknown object at 0x%08lx "
- "(size %zu)\n", ptr, size);
+ kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
+ ptr, size);
#endif
return;
}
@@ -718,8 +717,8 @@ static void paint_ptr(unsigned long ptr, int color)
object = find_and_get_object(ptr, 0);
if (!object) {
- kmemleak_warn("Trying to color unknown object "
- "at 0x%08lx as %s\n", ptr,
+ kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n",
+ ptr,
(color == KMEMLEAK_GREY) ? "Grey" :
(color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
return;
@@ -1464,8 +1463,8 @@ static void kmemleak_scan(void)
if (new_leaks) {
kmemleak_found_leaks = true;
- pr_info("%d new suspected memory leaks (see "
- "/sys/kernel/debug/kmemleak)\n", new_leaks);
+ pr_info("%d new suspected memory leaks (see /sys/kernel/debug/kmemleak)\n",
+ new_leaks);
}
}
@@ -1796,8 +1795,7 @@ static void kmemleak_do_cleanup(struct work_struct *work)
if (!kmemleak_found_leaks)
__kmemleak_do_cleanup();
else
- pr_info("Kmemleak disabled without freeing internal data. "
- "Reclaim the memory with \"echo clear > /sys/kernel/debug/kmemleak\"\n");
+ pr_info("Kmemleak disabled without freeing internal data. Reclaim the memory with \"echo clear > /sys/kernel/debug/kmemleak\".\n");
}
static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup);
diff --git a/mm/madvise.c b/mm/madvise.c
index c154e1076303..b04f2d26cdb8 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -223,15 +223,14 @@ static long madvise_willneed(struct vm_area_struct *vma,
{
struct file *file = vma->vm_file;
+ *prev = vma;
#ifdef CONFIG_SWAP
if (!file) {
- *prev = vma;
force_swapin_readahead(vma, start, end);
return 0;
}
if (shmem_mapping(file->f_mapping)) {
- *prev = vma;
force_shm_swapin_readahead(vma, start, end,
file->f_mapping);
return 0;
@@ -246,7 +245,6 @@ static long madvise_willneed(struct vm_area_struct *vma,
return 0;
}
- *prev = vma;
start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (end > vma->vm_end)
end = vma->vm_end;
diff --git a/mm/memblock.c b/mm/memblock.c
index 843c7d15fe53..e799ac0f123f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -241,8 +241,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
* so we use WARN_ONCE() here to see the stack trace if
* fail happens.
*/
- WARN_ONCE(1, "memblock: bottom-up allocation failed, "
- "memory hotunplug may be affected\n");
+ WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n");
}
return __memblock_find_range_top_down(start, end, size, align, nid,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 484bedd8d811..e25b93a4267d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5334,7 +5334,6 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
- .attach = mem_cgroup_move_task,
.post_attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind,
.dfl_cftypes = memory_files,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a18923e4359d..4d91216dcab2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1927,8 +1927,7 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
- pr_warn("removing memory fails, because memory "
- "[%pa-%pa] is onlined\n",
+ pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
&beginpa, &endpa);
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 177668a9c267..62e4af5b287f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2514,9 +2514,7 @@ static void __init check_numabalancing_enable(void)
set_numabalancing_state(numabalancing_override == 1);
if (num_online_nodes() > 1 && !numabalancing_override) {
- pr_info("%s automatic NUMA balancing. "
- "Configure with numa_balancing= or the "
- "kernel.numa_balancing sysctl",
+ pr_info("%s automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl\n",
numabalancing_default ? "Enabling" : "Disabling");
set_numabalancing_state(numabalancing_default);
}
diff --git a/mm/mempool.c b/mm/mempool.c
index 7924f4f58a6d..5ba6c8b3b814 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element)
static void kasan_poison_element(mempool_t *pool, void *element)
{
- if (pool->alloc == mempool_alloc_slab)
- kasan_slab_free(pool->pool_data, element);
- if (pool->alloc == mempool_kmalloc)
- kasan_kfree(element);
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_poison_kfree(element);
if (pool->alloc == mempool_alloc_pages)
kasan_free_pages(element, (unsigned long)pool->pool_data);
}
-static void kasan_unpoison_element(mempool_t *pool, void *element)
+static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
{
- if (pool->alloc == mempool_alloc_slab)
- kasan_slab_alloc(pool->pool_data, element);
- if (pool->alloc == mempool_kmalloc)
- kasan_krealloc(element, (size_t)pool->pool_data);
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_unpoison_slab(element);
if (pool->alloc == mempool_alloc_pages)
kasan_alloc_pages(element, (unsigned long)pool->pool_data);
}
@@ -130,12 +126,12 @@ static void add_element(mempool_t *pool, void *element)
pool->elements[pool->curr_nr++] = element;
}
-static void *remove_element(mempool_t *pool)
+static void *remove_element(mempool_t *pool, gfp_t flags)
{
void *element = pool->elements[--pool->curr_nr];
BUG_ON(pool->curr_nr < 0);
- kasan_unpoison_element(pool, element);
+ kasan_unpoison_element(pool, element, flags);
check_element(pool, element);
return element;
}
@@ -154,7 +150,7 @@ void mempool_destroy(mempool_t *pool)
return;
while (pool->curr_nr) {
- void *element = remove_element(pool);
+ void *element = remove_element(pool, GFP_KERNEL);
pool->free(element, pool->pool_data);
}
kfree(pool->elements);
@@ -250,7 +246,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr)
spin_lock_irqsave(&pool->lock, flags);
if (new_min_nr <= pool->min_nr) {
while (new_min_nr < pool->curr_nr) {
- element = remove_element(pool);
+ element = remove_element(pool, GFP_KERNEL);
spin_unlock_irqrestore(&pool->lock, flags);
pool->free(element, pool->pool_data);
spin_lock_irqsave(&pool->lock, flags);
@@ -336,7 +332,7 @@ repeat_alloc:
spin_lock_irqsave(&pool->lock, flags);
if (likely(pool->curr_nr)) {
- element = remove_element(pool);
+ element = remove_element(pool, gfp_temp);
spin_unlock_irqrestore(&pool->lock, flags);
/* paired with rmb in mempool_free(), read comment there */
smp_wmb();
diff --git a/mm/mmap.c b/mm/mmap.c
index 19823fc3dcfa..a399f3056ad7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2702,8 +2702,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
unsigned long ret = -EINVAL;
struct file *file;
- pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
- "See Documentation/vm/remap_file_pages.txt.\n",
+ pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
current->comm, current->pid);
if (prot)
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index f802c2d216a7..6f4d27c5bb32 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -4,9 +4,9 @@
*/
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/mmu_context.h>
#include <linux/export.h>
-#include <linux/sched.h>
#include <asm/mmu_context.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c12680993ff3..6c1b9c43cc23 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -386,8 +386,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
static void dump_header(struct oom_control *oc, struct task_struct *p,
struct mem_cgroup *memcg)
{
- pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
- "oom_score_adj=%hd\n",
+ pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, oc->order,
current->signal->oom_score_adj);
cpuset_print_current_mems_allowed();
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1e6769449ac2..71b0f525180a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1172,6 +1172,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
unsigned long balanced_dirty_ratelimit;
unsigned long step;
unsigned long x;
+ unsigned long shift;
/*
* The dirty rate will match the writeout rate in long term, except
@@ -1296,11 +1297,11 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
* rate itself is constantly fluctuating. So decrease the track speed
* when it gets close to the target. Helps eliminate pointless tremors.
*/
- step >>= dirty_ratelimit / (2 * step + 1);
- /*
- * Limit the tracking speed to avoid overshooting.
- */
- step = (step + 7) / 8;
+ shift = dirty_ratelimit / (2 * step + 1);
+ if (shift < BITS_PER_LONG)
+ step = DIV_ROUND_UP(step >> shift, 8);
+ else
+ step = 0;
if (dirty_ratelimit < balanced_dirty_ratelimit)
dirty_ratelimit += step;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 02c2f11c0e63..692c0377ac77 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3990,8 +3990,7 @@ static int __parse_numa_zonelist_order(char *s)
user_zonelist_order = ZONELIST_ORDER_ZONE;
} else {
printk(KERN_WARNING
- "Ignoring invalid numa_zonelist_order value: "
- "%s\n", s);
+ "Ignoring invalid numa_zonelist_order value: %s\n", s);
return -EINVAL;
}
return 0;
@@ -4456,12 +4455,11 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
else
page_group_by_mobility_disabled = 0;
- pr_info("Built %i zonelists in %s order, mobility grouping %s. "
- "Total pages: %ld\n",
- nr_online_nodes,
- zonelist_order_name[current_zonelist_order],
- page_group_by_mobility_disabled ? "off" : "on",
- vm_total_pages);
+ pr_info("Built %i zonelists in %s order, mobility grouping %s. Total pages: %ld\n",
+ nr_online_nodes,
+ zonelist_order_name[current_zonelist_order],
+ page_group_by_mobility_disabled ? "off" : "on",
+ vm_total_pages);
#ifdef CONFIG_NUMA
pr_info("Policy zone: %s\n", zone_names[policy_zone]);
#endif
@@ -5932,22 +5930,21 @@ void __init mem_init_print_info(const char *str)
#undef adj_init_size
- pr_info("Memory: %luK/%luK available "
- "(%luK kernel code, %luK rwdata, %luK rodata, "
- "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
+ pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"
#ifdef CONFIG_HIGHMEM
- ", %luK highmem"
+ ", %luK highmem"
#endif
- "%s%s)\n",
- nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
- codesize >> 10, datasize >> 10, rosize >> 10,
- (init_data_size + init_code_size) >> 10, bss_size >> 10,
- (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
- totalcma_pages << (PAGE_SHIFT-10),
+ "%s%s)\n",
+ nr_free_pages() << (PAGE_SHIFT - 10),
+ physpages << (PAGE_SHIFT - 10),
+ codesize >> 10, datasize >> 10, rosize >> 10,
+ (init_data_size + init_code_size) >> 10, bss_size >> 10,
+ (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
+ totalcma_pages << (PAGE_SHIFT - 10),
#ifdef CONFIG_HIGHMEM
- totalhigh_pages << (PAGE_SHIFT-10),
+ totalhigh_pages << (PAGE_SHIFT - 10),
#endif
- str ? ", " : "", str ? str : "");
+ str ? ", " : "", str ? str : "");
}
/**
diff --git a/mm/percpu.c b/mm/percpu.c
index ef6353f0adbd..d9f91253953e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -889,8 +889,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
size = ALIGN(size, 2);
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
- WARN(true, "illegal size (%zu) or align (%zu) for "
- "percpu allocation\n", size, align);
+ WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n",
+ size, align);
return NULL;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index ede183c32f45..1bceb49aa214 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -587,19 +587,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
}
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void percpu_flush_tlb_batch_pages(void *data)
-{
- /*
- * All TLB entries are flushed on the assumption that it is
- * cheaper to flush all TLBs and let them be refilled than
- * flushing individual PFNs. Note that we do not track mm's
- * to flush as that might simply be multiple full TLB flushes
- * for no gain.
- */
- count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
- flush_tlb_local();
-}
-
/*
* Flush TLB entries for recently unmapped pages from remote CPUs. It is
* important if a PTE was dirty when it was unmapped that it's flushed
@@ -616,15 +603,14 @@ void try_to_unmap_flush(void)
cpu = get_cpu();
- trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, -1UL);
-
- if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask))
- percpu_flush_tlb_batch_pages(&tlb_ubc->cpumask);
-
- if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) {
- smp_call_function_many(&tlb_ubc->cpumask,
- percpu_flush_tlb_batch_pages, (void *)tlb_ubc, true);
+ if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ local_flush_tlb();
+ trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
}
+
+ if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
+ flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
cpumask_clear(&tlb_ubc->cpumask);
tlb_ubc->flush_required = false;
tlb_ubc->writable = false;
diff --git a/mm/slab.c b/mm/slab.c
index 462938fc7cb9..8fc762c178bd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -390,36 +390,26 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#endif
-#define OBJECT_FREE (0)
-#define OBJECT_ACTIVE (1)
-
#ifdef CONFIG_DEBUG_SLAB_LEAK
-static void set_obj_status(struct page *page, int idx, int val)
+static inline bool is_store_user_clean(struct kmem_cache *cachep)
{
- int freelist_size;
- char *status;
- struct kmem_cache *cachep = page->slab_cache;
-
- freelist_size = cachep->num * sizeof(freelist_idx_t);
- status = (char *)page->freelist + freelist_size;
- status[idx] = val;
+ return atomic_read(&cachep->store_user_clean) == 1;
}
-static inline unsigned int get_obj_status(struct page *page, int idx)
+static inline void set_store_user_clean(struct kmem_cache *cachep)
{
- int freelist_size;
- char *status;
- struct kmem_cache *cachep = page->slab_cache;
-
- freelist_size = cachep->num * sizeof(freelist_idx_t);
- status = (char *)page->freelist + freelist_size;
+ atomic_set(&cachep->store_user_clean, 1);
+}
- return status[idx];
+static inline void set_store_user_dirty(struct kmem_cache *cachep)
+{
+ if (is_store_user_clean(cachep))
+ atomic_set(&cachep->store_user_clean, 0);
}
#else
-static inline void set_obj_status(struct page *page, int idx, int val) {}
+static inline void set_store_user_dirty(struct kmem_cache *cachep) {}
#endif
@@ -480,9 +470,6 @@ static size_t calculate_freelist_size(int nr_objs, size_t align)
size_t freelist_size;
freelist_size = nr_objs * sizeof(freelist_idx_t);
- if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
- freelist_size += nr_objs * sizeof(char);
-
if (align)
freelist_size = ALIGN(freelist_size, align);
@@ -495,10 +482,7 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
int nr_objs;
size_t remained_size;
size_t freelist_size;
- int extra_space = 0;
- if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
- extra_space = sizeof(char);
/*
* Ignore padding for the initial guess. The padding
* is at most @align-1 bytes, and @buffer_size is at
@@ -507,7 +491,7 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
* into the memory allocation when taking the padding
* into account.
*/
- nr_objs = slab_size / (buffer_size + idx_size + extra_space);
+ nr_objs = slab_size / (buffer_size + idx_size);
/*
* This calculated number will be either the right
@@ -1758,11 +1742,9 @@ static void dump_line(char *data, int offset, int limit)
if (bad_count == 1) {
error ^= POISON_FREE;
if (!(error & (error - 1))) {
- printk(KERN_ERR "Single bit error detected. Probably "
- "bad RAM.\n");
+ printk(KERN_ERR "Single bit error detected. Probably bad RAM.\n");
#ifdef CONFIG_X86
- printk(KERN_ERR "Run memtest86+ or a similar memory "
- "test tool.\n");
+ printk(KERN_ERR "Run memtest86+ or a similar memory test tool.\n");
#else
printk(KERN_ERR "Run a memory test tool.\n");
#endif
@@ -1879,11 +1861,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
}
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
- slab_error(cachep, "start of a freed object "
- "was overwritten");
+ slab_error(cachep, "start of a freed object was overwritten");
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
- slab_error(cachep, "end of a freed object "
- "was overwritten");
+ slab_error(cachep, "end of a freed object was overwritten");
}
}
}
@@ -1965,16 +1945,13 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
break;
if (flags & CFLGS_OFF_SLAB) {
- size_t freelist_size_per_obj = sizeof(freelist_idx_t);
/*
* Max number of objs-per-slab for caches which
* use off-slab slabs. Needed to avoid a possible
* looping condition in cache_grow().
*/
- if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
- freelist_size_per_obj += sizeof(char);
offslab_limit = size;
- offslab_limit /= freelist_size_per_obj;
+ offslab_limit /= sizeof(freelist_idx_t);
if (num > offslab_limit)
break;
@@ -2199,6 +2176,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
else
size += BYTES_PER_WORD;
}
+#endif
+
+ kasan_cache_create(cachep, &size, &flags);
+
+ size = ALIGN(size, cachep->align);
+ /*
+ * We should restrict the number of objects in a slab to implement
+ * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
+ */
+ if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
+ size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
+
+#if DEBUG
/*
* To activate debug pagealloc, off-slab management is necessary
* requirement. In early phase of initialization, small sized slab
@@ -2209,8 +2199,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
- ALIGN(size, cachep->align) < PAGE_SIZE) {
- cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
+ size < PAGE_SIZE) {
+ cachep->obj_offset += PAGE_SIZE - size;
size = PAGE_SIZE;
}
#endif
@@ -2222,20 +2212,13 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
*/
if (size >= OFF_SLAB_MIN_SIZE && !slab_early_init &&
- !(flags & SLAB_NOLEAKTRACE))
+ !(flags & SLAB_NOLEAKTRACE)) {
/*
* Size is large, assume best to place the slab management obj
* off-slab (should allow better packing of objs).
*/
flags |= CFLGS_OFF_SLAB;
-
- size = ALIGN(size, cachep->align);
- /*
- * We should restrict the number of objects in a slab to implement
- * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
- */
- if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
- size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
+ }
left_over = calculate_slab_order(cachep, size, cachep->align, flags);
@@ -2502,14 +2485,14 @@ static inline void set_free_obj(struct page *page,
((freelist_idx_t *)(page->freelist))[idx] = val;
}
-static void cache_init_objs(struct kmem_cache *cachep,
- struct page *page)
+static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
{
+#if DEBUG
int i;
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
-#if DEBUG
+ kasan_init_slab_obj(cachep, objp);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
@@ -2522,27 +2505,46 @@ static void cache_init_objs(struct kmem_cache *cachep,
* cache which they are a constructor for. Otherwise, deadlock.
* They must also be threaded.
*/
- if (cachep->ctor && !(cachep->flags & SLAB_POISON))
+ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
+ kasan_unpoison_object_data(cachep,
+ objp + obj_offset(cachep));
cachep->ctor(objp + obj_offset(cachep));
+ kasan_poison_object_data(
+ cachep, objp + obj_offset(cachep));
+ }
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
- slab_error(cachep, "constructor overwrote the"
- " end of an object");
+ slab_error(cachep, "constructor overwrote the end of an object");
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
- slab_error(cachep, "constructor overwrote the"
- " start of an object");
+ slab_error(cachep, "constructor overwrote the start of an object");
}
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
slab_kernel_map(cachep, objp, 0, 0);
}
-#else
- if (cachep->ctor)
- cachep->ctor(objp);
+ }
#endif
- set_obj_status(page, i, OBJECT_FREE);
+}
+
+static void cache_init_objs(struct kmem_cache *cachep,
+ struct page *page)
+{
+ int i;
+ void *objp;
+
+ cache_init_objs_debug(cachep, page);
+
+ for (i = 0; i < cachep->num; i++) {
+ /* constructor could break poison info */
+ if (DEBUG == 0 && cachep->ctor) {
+ objp = index_to_obj(cachep, page, i);
+ kasan_unpoison_object_data(cachep, objp);
+ cachep->ctor(objp);
+ kasan_poison_object_data(cachep, objp);
+ }
+
set_free_obj(page, i, i);
}
}
@@ -2568,6 +2570,11 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
#endif
+#if DEBUG
+ if (cachep->flags & SLAB_STORE_USER)
+ set_store_user_dirty(cachep);
+#endif
+
return objp;
}
@@ -2584,8 +2591,8 @@ static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
/* Verify double free bug */
for (i = page->active; i < cachep->num; i++) {
if (get_free_obj(page, i) == objnr) {
- printk(KERN_ERR "slab: double free detected in cache "
- "'%s', objp %p\n", cachep->name, objp);
+ printk(KERN_ERR "slab: double free detected in cache '%s', objp %p\n",
+ cachep->name, objp);
BUG();
}
}
@@ -2670,6 +2677,7 @@ static int cache_grow(struct kmem_cache *cachep,
slab_map_pages(cachep, page, freelist);
+ kasan_poison_slab(page);
cache_init_objs(cachep, page);
if (gfpflags_allow_blocking(local_flags))
@@ -2746,15 +2754,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
*dbg_redzone1(cachep, objp) = RED_INACTIVE;
*dbg_redzone2(cachep, objp) = RED_INACTIVE;
}
- if (cachep->flags & SLAB_STORE_USER)
+ if (cachep->flags & SLAB_STORE_USER) {
+ set_store_user_dirty(cachep);
*dbg_userword(cachep, objp) = (void *)caller;
+ }
objnr = obj_to_index(cachep, page, objp);
BUG_ON(objnr >= cachep->num);
BUG_ON(objp != index_to_obj(cachep, page, objnr));
- set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
slab_kernel_map(cachep, objp, 0, caller);
@@ -2879,8 +2888,6 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
gfp_t flags, void *objp, unsigned long caller)
{
- struct page *page;
-
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
@@ -2894,8 +2901,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
*dbg_redzone2(cachep, objp) != RED_INACTIVE) {
- slab_error(cachep, "double free, or memory outside"
- " object was overwritten");
+ slab_error(cachep, "double free, or memory outside object was overwritten");
printk(KERN_ERR
"%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
objp, *dbg_redzone1(cachep, objp),
@@ -2905,8 +2911,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
}
- page = virt_to_head_page(objp);
- set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE);
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
cachep->ctor(objp);
@@ -3370,6 +3374,16 @@ free_done:
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
+ /* Put the object into the quarantine, don't touch it for now. */
+ if (kasan_slab_free(cachep, objp))
+ return;
+
+ ___cache_free(cachep, objp, caller);
+}
+
+void ___cache_free(struct kmem_cache *cachep, void *objp,
+ unsigned long caller)
+{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
@@ -3410,6 +3424,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
+ kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
@@ -3438,6 +3453,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, flags, _RET_IP_);
+ kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(_RET_IP_, ret,
size, cachep->size, flags);
return ret;
@@ -3461,6 +3477,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+ kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
cachep->object_size, cachep->size,
flags, nodeid);
@@ -3479,6 +3496,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+ kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc_node(_RET_IP_, ret,
size, cachep->size,
flags, nodeid);
@@ -3491,11 +3509,15 @@ static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{
struct kmem_cache *cachep;
+ void *ret;
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return kmem_cache_alloc_node_trace(cachep, flags, node, size);
+ ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
+ kasan_kmalloc(cachep, ret, size, flags);
+
+ return ret;
}
void *__kmalloc_node(size_t size, gfp_t flags, int node)
@@ -3529,6 +3551,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
return cachep;
ret = slab_alloc(cachep, flags, caller);
+ kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(caller, ret,
size, cachep->size, flags);
@@ -4002,8 +4025,7 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
unsigned long node_frees = cachep->node_frees;
unsigned long overflows = cachep->node_overflow;
- seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
- "%4lu %4lu %4lu %4lu %4lu",
+ seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu",
allocs, high, grown,
reaped, errors, max_freeable, node_allocs,
node_frees, overflows);
@@ -4108,15 +4130,34 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c,
struct page *page)
{
void *p;
- int i;
+ int i, j;
+ unsigned long v;
if (n[0] == n[1])
return;
for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
- if (get_obj_status(page, i) != OBJECT_ACTIVE)
+ bool active = true;
+
+ for (j = page->active; j < c->num; j++) {
+ if (get_free_obj(page, j) == i) {
+ active = false;
+ break;
+ }
+ }
+
+ if (!active)
+ continue;
+
+ /*
+ * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table
+ * mapping is established when actual object allocation and
+ * we could mistakenly access the unmapped object in the cpu
+ * cache.
+ */
+ if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v)))
continue;
- if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
+ if (!add_caller(n, v))
return;
}
}
@@ -4152,21 +4193,31 @@ static int leaks_show(struct seq_file *m, void *p)
if (!(cachep->flags & SLAB_RED_ZONE))
return 0;
- /* OK, we can do it */
+ /*
+ * Set store_user_clean and start to grab stored user information
+ * for all objects on this cache. If some alloc/free requests comes
+ * during the processing, information would be wrong so restart
+ * whole processing.
+ */
+ do {
+ set_store_user_clean(cachep);
+ drain_cpu_caches(cachep);
- x[1] = 0;
+ x[1] = 0;
- for_each_kmem_cache_node(cachep, node, n) {
+ for_each_kmem_cache_node(cachep, node, n) {
- check_irq_on();
- spin_lock_irq(&n->list_lock);
+ check_irq_on();
+ spin_lock_irq(&n->list_lock);
+
+ list_for_each_entry(page, &n->slabs_full, lru)
+ handle_slab(x, cachep, page);
+ list_for_each_entry(page, &n->slabs_partial, lru)
+ handle_slab(x, cachep, page);
+ spin_unlock_irq(&n->list_lock);
+ }
+ } while (!is_store_user_clean(cachep));
- list_for_each_entry(page, &n->slabs_full, lru)
- handle_slab(x, cachep, page);
- list_for_each_entry(page, &n->slabs_partial, lru)
- handle_slab(x, cachep, page);
- spin_unlock_irq(&n->list_lock);
- }
name = cachep->name;
if (x[0] == x[1]) {
/* Increase the buffer size */
@@ -4276,10 +4327,18 @@ const char *__check_heap_object(const void *ptr, unsigned long n,
*/
size_t ksize(const void *objp)
{
+ size_t size;
+
BUG_ON(!objp);
if (unlikely(objp == ZERO_SIZE_PTR))
return 0;
- return virt_to_cache(objp)->object_size;
+ size = virt_to_cache(objp)->object_size;
+ /* We assume that ksize callers could use the whole allocated area,
+ * so we need to unpoison this area.
+ */
+ kasan_krealloc(objp, size, GFP_NOWAIT);
+
+ return size;
}
EXPORT_SYMBOL(ksize);
diff --git a/mm/slab.h b/mm/slab.h
index 7b6087197997..66118e967e04 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -371,4 +371,6 @@ void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
int memcg_slab_show(struct seq_file *m, void *p);
+void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
+
#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 01e7246de8df..1577d113fac5 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -35,7 +35,7 @@ struct kmem_cache *kmem_cache;
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB)
+ SLAB_FAILSLAB | SLAB_KASAN)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
@@ -453,6 +453,9 @@ EXPORT_SYMBOL(kmem_cache_create);
static int shutdown_cache(struct kmem_cache *s,
struct list_head *release, bool *need_rcu_barrier)
{
+ /* free asan quarantined objects */
+ kasan_cache_shutdown(s);
+
if (__kmem_cache_shutdown(s) != 0)
return -EBUSY;
@@ -723,8 +726,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
err = shutdown_cache(s, &release, &need_rcu_barrier);
if (err) {
- pr_err("kmem_cache_destroy %s: "
- "Slab cache still has objects\n", s->name);
+ pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
+ s->name);
dump_stack();
}
out_unlock:
@@ -750,6 +753,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
get_online_cpus();
get_online_mems();
+ kasan_cache_shrink(cachep);
ret = __kmem_cache_shrink(cachep, false);
put_online_mems();
put_online_cpus();
@@ -1010,7 +1014,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
page = alloc_kmem_pages(flags, order);
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
- kasan_kmalloc_large(ret, size);
+ kasan_kmalloc_large(ret, size, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
@@ -1044,13 +1048,11 @@ static void print_slabinfo_header(struct seq_file *m)
#else
seq_puts(m, "slabinfo - version: 2.1\n");
#endif
- seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
- "<objperslab> <pagesperslab>");
+ seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
- seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
- "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
+ seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
@@ -1191,7 +1193,7 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
ks = ksize(p);
if (ks >= new_size) {
- kasan_krealloc((void *)p, new_size);
+ kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}
diff --git a/mm/slub.c b/mm/slub.c
index d6fe997c0577..06bed117ce90 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -287,6 +287,9 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
return s->object_size;
#endif
+ if (s->flags & SLAB_KASAN)
+ return s->object_size;
+
/*
* If we have the need to store the freelist pointer
* back there or track user information then we can
@@ -469,8 +472,6 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p)
*/
#if defined(CONFIG_SLUB_DEBUG_ON)
static int slub_debug = DEBUG_DEFAULT_FLAGS;
-#elif defined(CONFIG_KASAN)
-static int slub_debug = SLAB_STORE_USER;
#else
static int slub_debug;
#endif
@@ -675,6 +676,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);
+ off += kasan_metadata_size(s);
+
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
print_section("Padding ", p + off, size_from_object(s) - off);
@@ -802,6 +805,8 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
/* We also have user information there */
off += 2 * sizeof(struct track);
+ off += kasan_metadata_size(s);
+
if (size_from_object(s) == off)
return 1;
@@ -965,14 +970,14 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
max_objects = MAX_OBJS_PER_PAGE;
if (page->objects != max_objects) {
- slab_err(s, page, "Wrong number of objects. Found %d but "
- "should be %d", page->objects, max_objects);
+ slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
+ page->objects, max_objects);
page->objects = max_objects;
slab_fix(s, "Number of objects adjusted.");
}
if (page->inuse != page->objects - nr) {
- slab_err(s, page, "Wrong object count. Counter is %d but "
- "counted were %d", page->inuse, page->objects - nr);
+ slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
+ page->inuse, page->objects - nr);
page->inuse = page->objects - nr;
slab_fix(s, "Object count adjusted.");
}
@@ -1136,8 +1141,8 @@ next_object:
if (unlikely(s != page->slab_cache)) {
if (!PageSlab(page)) {
- slab_err(s, page, "Attempt to free object(0x%p) "
- "outside of slab", object);
+ slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
+ object);
} else if (!page->slab_cache) {
pr_err("SLUB <none>: no slab for object 0x%p.\n",
object);
@@ -1307,7 +1312,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
kmemleak_alloc(ptr, size, 1, flags);
- kasan_kmalloc_large(ptr, size);
+ kasan_kmalloc_large(ptr, size, flags);
}
static inline void kfree_hook(const void *x)
@@ -1341,13 +1346,15 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
kmemleak_alloc_recursive(object, s->object_size, 1,
s->flags, flags);
- kasan_slab_alloc(s, object);
+ kasan_slab_alloc(s, object, flags);
}
memcg_kmem_put_cache(s);
}
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
+static inline void *slab_free_hook(struct kmem_cache *s, void *x)
{
+ void *freeptr;
+
kmemleak_free_recursive(x, s->flags);
/*
@@ -1368,7 +1375,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);
+ freeptr = get_freepointer(s, x);
+ /*
+ * kasan_slab_free() may put x into memory quarantine, delaying its
+ * reuse. In this case the object's freelist pointer is changed.
+ */
kasan_slab_free(s, x);
+ return freeptr;
}
static inline void slab_free_freelist_hook(struct kmem_cache *s,
@@ -1386,11 +1399,11 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s,
void *object = head;
void *tail_obj = tail ? : head;
+ void *freeptr;
do {
- slab_free_hook(s, object);
- } while ((object != tail_obj) &&
- (object = get_freepointer(s, object)));
+ freeptr = slab_free_hook(s, object);
+ } while ((object != tail_obj) && (object = freeptr));
#endif
}
@@ -1398,6 +1411,7 @@ static void setup_object(struct kmem_cache *s, struct page *page,
void *object)
{
setup_object_debug(s, page, object);
+ kasan_init_slab_obj(s, object);
if (unlikely(s->ctor)) {
kasan_unpoison_object_data(s, object);
s->ctor(object);
@@ -2625,7 +2639,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, gfpflags, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_trace);
@@ -2653,7 +2667,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
trace_kmalloc_node(_RET_IP_, ret,
size, s->size, gfpflags, node);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
@@ -2798,16 +2812,13 @@ slab_empty:
* same page) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
-static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
- void *head, void *tail, int cnt,
- unsigned long addr)
+static __always_inline void do_slab_free(struct kmem_cache *s,
+ struct page *page, void *head, void *tail,
+ int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
-
- slab_free_freelist_hook(s, head, tail);
-
redo:
/*
* Determine the currently cpus per cpu slab.
@@ -2841,6 +2852,27 @@ redo:
}
+static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int cnt,
+ unsigned long addr)
+{
+ slab_free_freelist_hook(s, head, tail);
+ /*
+ * slab_free_freelist_hook() could have put the items into quarantine.
+ * If so, no need to free them.
+ */
+ if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+ return;
+ do_slab_free(s, page, head, tail, cnt, addr);
+}
+
+#ifdef CONFIG_KASAN
+void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
+{
+ do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
+}
+#endif
+
void kmem_cache_free(struct kmem_cache *s, void *x)
{
s = cache_from_obj(s, x);
@@ -3197,7 +3229,8 @@ static void early_kmem_cache_node_alloc(int node)
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
- kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node));
+ kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
+ GFP_KERNEL);
init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, page->objects);
@@ -3260,7 +3293,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
- unsigned long size = s->object_size;
+ size_t size = s->object_size;
int order;
/*
@@ -3319,7 +3352,10 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
* the object.
*/
size += 2 * sizeof(struct track);
+#endif
+ kasan_cache_create(s, &size, &s->flags);
+#ifdef CONFIG_SLUB_DEBUG
if (flags & SLAB_RED_ZONE) {
/*
* Add some empty padding so that we can catch
@@ -3448,10 +3484,9 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
free_kmem_cache_nodes(s);
error:
if (flags & SLAB_PANIC)
- panic("Cannot create slab %s size=%lu realsize=%u "
- "order=%u offset=%u flags=%lx\n",
- s->name, (unsigned long)s->size, s->size,
- oo_order(s->oo), s->offset, flags);
+ panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n",
+ s->name, (unsigned long)s->size, s->size,
+ oo_order(s->oo), s->offset, flags);
return -EINVAL;
}
@@ -3575,7 +3610,7 @@ void *__kmalloc(size_t size, gfp_t flags)
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, flags);
return ret;
}
@@ -3620,7 +3655,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
- kasan_kmalloc(s, ret, size);
+ kasan_kmalloc(s, ret, size, flags);
return ret;
}
@@ -3689,7 +3724,7 @@ size_t ksize(const void *object)
size_t size = __ksize(object);
/* We assume that ksize callers could use whole allocated area,
so we need unpoison this area. */
- kasan_krealloc(object, size);
+ kasan_krealloc(object, size, GFP_NOWAIT);
return size;
}
EXPORT_SYMBOL(ksize);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 4cba9c2783a1..c22065e02084 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -94,8 +94,8 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
int actual_node = early_pfn_to_nid(pfn);
if (node_distance(actual_node, node) > LOCAL_DISTANCE)
- printk(KERN_WARNING "[%lx-%lx] potential offnode "
- "page_structs\n", start, end - 1);
+ printk(KERN_WARNING "[%lx-%lx] potential offnode page_structs\n",
+ start, end - 1);
}
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
@@ -220,8 +220,8 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
if (map_map[pnum])
continue;
ms = __nr_to_section(pnum);
- printk(KERN_ERR "%s: sparsemem memory map backing failed "
- "some memory will not be available.\n", __func__);
+ printk(KERN_ERR "%s: sparsemem memory map backing failed some memory will not be available.\n",
+ __func__);
ms->section_mem_map = 0;
}
diff --git a/mm/sparse.c b/mm/sparse.c
index d1b48b691ac8..1b7543a775a4 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -428,8 +428,8 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
if (map_map[pnum])
continue;
ms = __nr_to_section(pnum);
- printk(KERN_ERR "%s: sparsemem memory map backing failed "
- "some memory will not be available.\n", __func__);
+ printk(KERN_ERR "%s: sparsemem memory map backing failed some memory will not be available.\n",
+ __func__);
ms->section_mem_map = 0;
}
}
@@ -456,8 +456,8 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
if (map)
return map;
- printk(KERN_ERR "%s: sparsemem memory map backing failed "
- "some memory will not be available.\n", __func__);
+ printk(KERN_ERR "%s: sparsemem memory map backing failed some memory will not be available.\n",
+ __func__);
ms->section_mem_map = 0;
return NULL;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c1a0f3dea8b5..65e07eb6558b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2542,8 +2542,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
(swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
- pr_info("Adding %uk swap on %s. "
- "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
+ pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8e3c9c5a3042..77f2f0f501d1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -470,8 +470,8 @@ overflow:
goto retry;
}
if (printk_ratelimit())
- pr_warn("vmap allocation for size %lu failed: "
- "use vmalloc=<size> to increase size.\n", size);
+ pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
+ size);
kfree(va);
return ERR_PTR(-EBUSY);
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 44618235722c..8640a185dfc6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -738,6 +738,7 @@ const char * const vmstat_text[] = {
"nr_slab_unreclaimable",
"nr_page_table_pages",
"nr_kernel_stack",
+ "nr_overhead",
"nr_unstable",
"nr_bounce",
"nr_vmscan_write",
@@ -1352,7 +1353,9 @@ static int vmstat_show(struct seq_file *m, void *arg)
unsigned long *l = arg;
unsigned long off = l - (unsigned long *)m->private;
- seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
+ seq_puts(m, vmstat_text[off]);
+ seq_put_decimal_ull(m, ' ', *l);
+ seq_putc(m, '\n');
return 0;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 45476f429789..568015e2fe7a 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -123,7 +123,7 @@ struct zswap_pool {
struct crypto_comp * __percpu *tfm;
struct kref kref;
struct list_head list;
- struct rcu_head rcu_head;
+ struct work_struct work;
struct notifier_block notifier;
char tfm_name[CRYPTO_MAX_ALG_NAME];
};
@@ -667,9 +667,11 @@ static int __must_check zswap_pool_get(struct zswap_pool *pool)
return kref_get_unless_zero(&pool->kref);
}
-static void __zswap_pool_release(struct rcu_head *head)
+static void __zswap_pool_release(struct work_struct *work)
{
- struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
+ struct zswap_pool *pool = container_of(work, typeof(*pool), work);
+
+ synchronize_rcu();
/* nobody should have been able to get a kref... */
WARN_ON(kref_get_unless_zero(&pool->kref));
@@ -689,7 +691,9 @@ static void __zswap_pool_empty(struct kref *kref)
WARN_ON(pool == zswap_pool_current());
list_del_rcu(&pool->list);
- call_rcu(&pool->rcu_head, __zswap_pool_release);
+
+ INIT_WORK(&pool->work, __zswap_pool_release);
+ schedule_work(&pool->work);
spin_unlock(&zswap_pools_lock);
}
@@ -748,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
pool = zswap_pool_find_get(type, compressor);
if (pool) {
zswap_pool_debug("using existing", pool);
+ WARN_ON(pool == zswap_pool_current());
list_del_rcu(&pool->list);
- } else {
- spin_unlock(&zswap_pools_lock);
- pool = zswap_pool_create(type, compressor);
- spin_lock(&zswap_pools_lock);
}
+ spin_unlock(&zswap_pools_lock);
+
+ if (!pool)
+ pool = zswap_pool_create(type, compressor);
+
if (pool)
ret = param_set_charp(s, kp);
else
ret = -EINVAL;
+ spin_lock(&zswap_pools_lock);
+
if (!ret) {
put_pool = zswap_pool_current();
list_add_rcu(&pool->list, &zswap_pools);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 01abb6431fd9..e2713b0794ae 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 357bcd34cf1f..af68674690af 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3342,9 +3342,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
break;
case L2CAP_CONF_EFS:
- remote_efs = 1;
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
+ remote_efs = 1;
memcpy(&efs, (void *) val, olen);
+ }
break;
case L2CAP_CONF_EWS:
@@ -3563,16 +3564,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
memcpy(&efs, (void *)val, olen);
- if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != chan->local_stype)
- return -ECONNREFUSED;
+ if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != chan->local_stype)
+ return -ECONNREFUSED;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+ (unsigned long) &efs, endptr - ptr);
+ }
break;
case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 97fc19f001bf..55dcb2b20b59 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -701,18 +701,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge;
- unsigned int mtu_reserved;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ unsigned int mtu, mtu_reserved;
mtu_reserved = nf_bridge_mtu_reduction(skb);
+ mtu = skb->dev->mtu;
+
+ if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+ mtu = nf_bridge->frag_max_size;
- if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
- nf_bridge = nf_bridge_info_get(skb);
-
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a1f697ec4fc2..0ce26a0f7913 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1067,19 +1067,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
struct net_bridge *br = netdev_priv(dev);
int err;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
if (tb[IFLA_ADDRESS]) {
spin_lock_bh(&br->lock);
br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
spin_unlock_bh(&br->lock);
}
- err = register_netdevice(dev);
- if (err)
- return err;
-
err = br_changelink(dev, tb, data);
if (err)
- unregister_netdevice(dev);
+ br_dev_delete(dev, NULL);
+
return err;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 630704d8d6a2..3b67c1e5756f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1300,6 +1300,7 @@ void netdev_notify_peers(struct net_device *dev)
{
rtnl_lock();
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
rtnl_unlock();
}
EXPORT_SYMBOL(netdev_notify_peers);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2e9a1c2818c7..b5c351d2830b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -261,7 +261,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
spin_lock_irqsave(&net->nsid_lock, flags);
peer = idr_find(&net->netns_ids, id);
if (peer)
- get_net(peer);
+ peer = maybe_get_net(peer);
spin_unlock_irqrestore(&net->nsid_lock, flags);
rcu_read_unlock();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d33609c2f276..86b619501350 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3676,7 +3676,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk = skb->sk;
if (!skb_may_tx_timestamp(sk, false))
- return;
+ goto err;
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
@@ -3685,7 +3685,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
sock_put(sk);
+ return;
}
+
+err:
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 3963c3872c69..9653798da293 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -294,7 +294,7 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a6beb7b6ae55..f5ef2115871f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -360,14 +360,16 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_net_busy_poll,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "busy_read",
.data = &sysctl_net_busy_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
#endif
#ifdef CONFIG_NET_SCHED
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0212591b0077..63f99e9a821b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1358,7 +1358,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ee94bd32d6dc..7dc9f0680bf6 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1253,7 +1253,7 @@ fail:
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1261,7 +1261,12 @@ static void ip_fib_net_exit(struct net *net)
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3809d523d012..b60106d34346 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -327,6 +328,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return htonl(INADDR_ANY);
+
+ for_ifa(in_dev) {
+ if (inet_ifa_match(fl4->saddr, ifa))
+ return fl4->saddr;
+ } endfor_ifa(in_dev);
+
+ return htonl(INADDR_ANY);
+}
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
@@ -374,7 +392,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
- pip->saddr = fl4.saddr;
+ pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
@@ -410,16 +428,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -441,12 +460,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -467,7 +491,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -494,12 +518,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -533,7 +557,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e2e162432aa3..7057a1b09b5e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -200,6 +200,7 @@ static void ip_expire(unsigned long arg)
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
+ rcu_read_lock();
spin_lock(&qp->q.lock);
if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -209,7 +210,7 @@ static void ip_expire(unsigned long arg)
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *head = qp->q.fragments;
+ struct sk_buff *clone, *head = qp->q.fragments;
const struct iphdr *iph;
int err;
@@ -218,32 +219,40 @@ static void ip_expire(unsigned long arg)
if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
goto out;
- rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
- goto out_rcu_unlock;
+ goto out;
+
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
if (err)
- goto out_rcu_unlock;
+ goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (frag_expire_skip_icmp(qp->user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out_rcu_unlock;
+ goto out;
+
+ clone = skb_clone(head, GFP_ATOMIC);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ if (clone) {
+ spin_unlock(&qp->q.lock);
+ icmp_send(clone, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_FRAGTIME, 0);
+ consume_skb(clone);
+ goto out_rcu_unlock;
+ }
}
out:
spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+ rcu_read_unlock();
ipq_put(qp);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 3310ac75e3f3..c18245e05d26 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -400,8 +400,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 461ca926fd39..6a20195a3a2a 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -158,6 +158,10 @@ static unsigned int ipv4_conntrack_local(void *priv,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
+
+ if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+ return NF_ACCEPT;
+
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 5075b7ecd26d..98a56077f604 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -268,11 +268,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
/* maniptype == SRC for postrouting. */
enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
- /* We never see fragments: conntrack defrags on pre-routing
- * and local-out, and nf_nat_out protects post-routing.
- */
- NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
* have dropped it. Hence it's the user's responsibilty to
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 2689c9c4f1a0..182eb878633d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = {
.timeout = 180,
};
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
-};
-
static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help = help,
@@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
static int __init nf_nat_snmp_basic_init(void)
{
- int ret = 0;
-
BUG_ON(nf_nat_snmp_hook != NULL);
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
- ret = nf_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- nf_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
+ return nf_conntrack_helper_register(&snmp_trap_helper);
}
static void __exit nf_nat_snmp_basic_fini(void)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ca1031411aa7..7541427537d0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -500,11 +500,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields
+ */
+ hdrincl = inet->hdrincl;
/*
* Check the flags.
*/
@@ -579,7 +584,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -601,9 +606,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
if (!saddr && ipc.oif) {
@@ -612,7 +617,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto done;
}
- if (!inet->hdrincl) {
+ if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -637,7 +642,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
&rt, msg->msg_flags);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5bdc0caa7f4c..e1a5e582ec48 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -627,9 +627,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
struct rtable *rt;
+ u32 genid, hval;
unsigned int i;
int depth;
- u32 hval = fnhe_hashfun(daddr);
+
+ genid = fnhe_genid(dev_net(nh->nh_dev));
+ hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -652,12 +655,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
}
if (fnhe) {
+ if (fnhe->fnhe_genid != genid)
+ fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu) {
+ if (pmtu)
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = max(1UL, expires);
- }
+ fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (rt)
@@ -676,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
- fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1d7eb209b4ae..9a77e35a7a9e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4943,7 +4943,7 @@ static void tcp_check_space(struct sock *sk)
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
/* pairs with tcp_poll() */
- smp_mb__after_atomic();
+ smp_mb();
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
tcp_new_space(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8ad606f0bdf6..8a5552c46b43 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -826,7 +826,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13951c4087d4..b9fac0522be6 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+ return min(tp->snd_ssthresh, tp->snd_cwnd);
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1604163c2850..e1fe8d227ef1 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -216,7 +216,6 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -910,12 +909,12 @@ static int __init inet6_init(void)
err = register_pernet_subsys(&inet6_net_ops);
if (err)
goto register_pernet_fail;
- err = icmpv6_init();
- if (err)
- goto icmp_fail;
err = ip6_mr_init();
if (err)
goto ipmr_fail;
+ err = icmpv6_init();
+ if (err)
+ goto icmp_fail;
err = ndisc_init();
if (err)
goto ndisc_fail;
@@ -1033,10 +1032,10 @@ igmp_fail:
ndisc_cleanup();
ndisc_fail:
ip6_mr_cleanup();
-ipmr_fail:
- icmpv6_cleanup();
icmp_fail:
unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+ icmpv6_cleanup();
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a543b55cdd75..f22b3d9249d1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -149,6 +149,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+ if (!np->autoflowlabel_set)
+ return ip6_default_np_autolabel(net);
+ else
+ return np->autoflowlabel;
+}
+
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
@@ -212,7 +220,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -1679,7 +1687,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1781,8 +1789,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.opt = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
+ }
if (dontfrag < 0)
dontfrag = inet6_sk(sk)->dontfrag;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 600975c5eacf..cbea14e09bc5 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1083,10 +1083,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index f615f982961a..06db53f82f6e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -189,12 +189,12 @@ static int vti6_tnl_create2(struct net_device *dev)
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
int err;
+ dev->rtnl_link_ops = &vti6_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &vti6_link_ops;
dev_hold(dev);
vti6_tnl_link(ip6n, t);
@@ -474,11 +474,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (!skb->ignore_df && skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
- if (skb->protocol == htons(ETH_P_IPV6))
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- else
+ } else {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ }
return -EMSGSIZE;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a4a30d2ca66f..435e26210587 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -872,6 +872,7 @@ pref_skip_coa:
break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
+ np->autoflowlabel_set = 1;
retv = 0;
break;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d64ee7e83664..06640685ff43 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1668,16 +1668,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, struct mld2_grec **ppgr)
+ int type, struct mld2_grec **ppgr, unsigned int mtu)
{
- struct net_device *dev = pmc->idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr;
- if (!skb)
- skb = mld_newpack(pmc->idev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = mld_newpack(pmc->idev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -1700,10 +1700,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV6_MIN_MTU)
+ return skb;
+
isquery = type == MLD2_MODE_IS_INCLUDE ||
type == MLD2_MODE_IS_EXCLUDE;
truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1724,7 +1729,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
}
}
first = 1;
@@ -1751,12 +1756,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -1790,7 +1795,7 @@ empty_source:
mld_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 184f0fe35dc6..b7ea5eaa4fd1 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1093,6 +1093,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
t->parms.iph.tos = p->iph.tos;
+ t->parms.iph.frag_off = p->iph.frag_off;
if (t->parms.link != p->link) {
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4618f52a4abe..7e29a31dd4f0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -951,7 +951,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index d48281ca9c72..ec8f6a6485e3 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1856,7 +1856,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- (void)l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 665cc74df5c5..fb3248ff8b48 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -285,7 +285,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_DELETE);
- (void) l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
out:
return ret;
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 4d2aaebd4f97..e546a987a9d3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -91,7 +91,7 @@ static const struct file_operations reset_ops = {
};
#endif
-static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
+static const char *hw_flag_names[] = {
#define FLAG(F) [IEEE80211_HW_##F] = #F
FLAG(HAS_RATE_CONTROL),
FLAG(RX_INCLUDES_FCS),
@@ -125,9 +125,6 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
FLAG(TDLS_WIDER_BW),
FLAG(SUPPORTS_AMSDU_IN_AMPDU),
FLAG(BEACON_TX_STATUS),
-
- /* keep last for the build bug below */
- (void *)0x1
#undef FLAG
};
@@ -147,7 +144,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
/* fail compilation if somebody adds or removes
* a flag without updating the name array above
*/
- BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1);
+ BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS);
for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
if (test_bit(i, local->hw.flags))
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9e1ded80a992..1cbc7bd26de3 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -295,8 +295,6 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
*pos |= ifmsh->ps_peers_deep_sleep ?
IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
- *pos++ = 0x00;
-
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e7c1b052c2a3..2c937c16dc27 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1999,12 +1999,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
" -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
} else {
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
+ if (svc->ipvs != ipvs)
+ return 0;
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 6d10002d23f8..8d34a488efc0 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
+struct nfnl_cthelper {
+ struct list_head list;
+ struct nf_conntrack_helper helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
static int
nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -205,18 +212,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_helper *helper;
+ struct nfnl_cthelper *nfcth;
int ret;
if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
return -EINVAL;
- helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
- if (helper == NULL)
+ nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+ if (nfcth == NULL)
return -ENOMEM;
+ helper = &nfcth->helper;
ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
- goto err;
+ goto err1;
strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -247,15 +256,101 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
ret = nf_conntrack_helper_register(helper);
if (ret < 0)
- goto err;
+ goto err2;
+ list_add_tail(&nfcth->list, &nfnl_cthelper_list);
return 0;
-err:
- kfree(helper);
+err2:
+ kfree(helper->expect_policy);
+err1:
+ kfree(nfcth);
return ret;
}
static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+ struct nf_conntrack_expect_policy *new_policy,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+ nfnl_cthelper_expect_pol);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFCTH_POLICY_NAME] ||
+ !tb[NFCTH_POLICY_EXPECT_MAX] ||
+ !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+ return -EINVAL;
+
+ if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+ return -EBUSY;
+
+ new_policy->max_expected =
+ ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+ new_policy->timeout =
+ ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+ return 0;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+ struct nf_conntrack_helper *helper)
+{
+ struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+ struct nf_conntrack_expect_policy *policy;
+ int i, err;
+
+ /* Check first that all policy attributes are well-formed, so we don't
+ * leave things in inconsistent state on errors.
+ */
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+ if (!tb[NFCTH_POLICY_SET + i])
+ return -EINVAL;
+
+ err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+ &new_policy[i],
+ tb[NFCTH_POLICY_SET + i]);
+ if (err < 0)
+ return err;
+ }
+ /* Now we can safely update them. */
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
+ policy = (struct nf_conntrack_expect_policy *)
+ &helper->expect_policy[i];
+ policy->max_expected = new_policy->max_expected;
+ policy->timeout = new_policy->timeout;
+ }
+
+ return 0;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+ unsigned int class_max;
+ int err;
+
+ err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+ nfnl_cthelper_expect_policy_set);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFCTH_POLICY_SET_NUM])
+ return -EINVAL;
+
+ class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+ if (helper->expect_class_max + 1 != class_max)
+ return -EBUSY;
+
+ return nfnl_cthelper_update_policy_all(tb, helper);
+}
+
+static int
nfnl_cthelper_update(const struct nlattr * const tb[],
struct nf_conntrack_helper *helper)
{
@@ -265,8 +360,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
return -EBUSY;
if (tb[NFCTH_POLICY]) {
- ret = nfnl_cthelper_parse_expect_policy(helper,
- tb[NFCTH_POLICY]);
+ ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
return ret;
}
@@ -295,7 +389,8 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
struct nf_conntrack_tuple tuple;
- int ret = 0, i;
+ struct nfnl_cthelper *nlcth;
+ int ret = 0;
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -306,31 +401,22 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
if (ret < 0)
return ret;
- rcu_read_lock();
- for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
- hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+ list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- if (strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0)
- continue;
+ if ((tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- if ((tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
- ret = -EEXIST;
- goto err;
- }
- helper = cur;
- break;
- }
+ helper = cur;
+ break;
}
- rcu_read_unlock();
if (helper == NULL)
ret = nfnl_cthelper_create(tb, &tuple);
@@ -338,9 +424,6 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
ret = nfnl_cthelper_update(tb, helper);
return ret;
-err:
- rcu_read_unlock();
- return ret;
}
static int
@@ -504,11 +587,12 @@ static int
nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
- int ret = -ENOENT, i;
+ int ret = -ENOENT;
struct nf_conntrack_helper *cur;
struct sk_buff *skb2;
char *helper_name = NULL;
struct nf_conntrack_tuple tuple;
+ struct nfnl_cthelper *nlcth;
bool tuple_set = false;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -529,45 +613,39 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
tuple_set = true;
}
- for (i = 0; i < nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+ list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
+ if (helper_name &&
+ strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ if (tuple_set &&
+ (tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- if (helper_name && strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0) {
- continue;
- }
- if (tuple_set &&
- (tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
-
- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL) {
- ret = -ENOMEM;
- break;
- }
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
+ ret = -ENOMEM;
+ break;
+ }
- ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
- NFNL_MSG_CTHELPER_NEW, cur);
- if (ret <= 0) {
- kfree_skb(skb2);
- break;
- }
+ ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_CTHELPER_NEW, cur);
+ if (ret <= 0) {
+ kfree_skb(skb2);
+ break;
+ }
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
- }
+ /* this avoids a loop in nfnetlink. */
+ return ret == -EAGAIN ? -ENOBUFS : ret;
}
return ret;
}
@@ -578,10 +656,10 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
- struct hlist_node *tmp;
struct nf_conntrack_tuple tuple;
bool tuple_set = false, found = false;
- int i, j = 0, ret;
+ struct nfnl_cthelper *nlcth, *n;
+ int j = 0, ret;
if (tb[NFCTH_NAME])
helper_name = nla_data(tb[NFCTH_NAME]);
@@ -594,28 +672,27 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
tuple_set = true;
}
- for (i = 0; i < nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
- hnode) {
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
+ j++;
- j++;
+ if (helper_name &&
+ strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- if (helper_name && strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0) {
- continue;
- }
- if (tuple_set &&
- (tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
+ if (tuple_set &&
+ (tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- found = true;
- nf_conntrack_helper_unregister(cur);
- }
+ found = true;
+ nf_conntrack_helper_unregister(cur);
+ kfree(cur->expect_policy);
+
+ list_del(&nlcth->list);
+ kfree(nlcth);
}
+
/* Make sure we return success if we flush and there is no helpers */
return (found || j == 0) ? 0 : -ENOENT;
}
@@ -664,20 +741,16 @@ err_out:
static void __exit nfnl_cthelper_exit(void)
{
struct nf_conntrack_helper *cur;
- struct hlist_node *tmp;
- int i;
+ struct nfnl_cthelper *nlcth, *n;
nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
- for (i=0; i<nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
- hnode) {
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
- nf_conntrack_helper_unregister(cur);
- }
+ nf_conntrack_helper_unregister(cur);
+ kfree(cur->expect_policy);
+ kfree(nlcth);
}
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 861c6615253b..f6837f9b6d6c 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -390,7 +390,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
- return NULL;
+ goto nlmsg_failure;
}
nlh = nlmsg_put(skb, 0, 0,
@@ -399,7 +399,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
- return NULL;
+ goto nlmsg_failure;
}
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = entry->state.pf;
@@ -542,12 +542,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
}
nlh->nlmsg_len = skb->len;
+ if (seclen)
+ security_release_secctx(secdata, seclen);
return skb;
nla_put_failure:
skb_tx_error(entskb);
kfree_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+ if (seclen)
+ security_release_secctx(secdata, seclen);
return NULL;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9ecdd61c6463..48e1608414e6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table);
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
+static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
+
+static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
+ "nlk_cb_mutex-ROUTE",
+ "nlk_cb_mutex-1",
+ "nlk_cb_mutex-USERSOCK",
+ "nlk_cb_mutex-FIREWALL",
+ "nlk_cb_mutex-SOCK_DIAG",
+ "nlk_cb_mutex-NFLOG",
+ "nlk_cb_mutex-XFRM",
+ "nlk_cb_mutex-SELINUX",
+ "nlk_cb_mutex-ISCSI",
+ "nlk_cb_mutex-AUDIT",
+ "nlk_cb_mutex-FIB_LOOKUP",
+ "nlk_cb_mutex-CONNECTOR",
+ "nlk_cb_mutex-NETFILTER",
+ "nlk_cb_mutex-IP6_FW",
+ "nlk_cb_mutex-DNRTMSG",
+ "nlk_cb_mutex-KOBJECT_UEVENT",
+ "nlk_cb_mutex-GENERIC",
+ "nlk_cb_mutex-17",
+ "nlk_cb_mutex-SCSITRANSPORT",
+ "nlk_cb_mutex-ECRYPTFS",
+ "nlk_cb_mutex-RDMA",
+ "nlk_cb_mutex-CRYPTO",
+ "nlk_cb_mutex-SMC",
+ "nlk_cb_mutex-23",
+ "nlk_cb_mutex-24",
+ "nlk_cb_mutex-25",
+ "nlk_cb_mutex-26",
+ "nlk_cb_mutex-27",
+ "nlk_cb_mutex-28",
+ "nlk_cb_mutex-29",
+ "nlk_cb_mutex-30",
+ "nlk_cb_mutex-31",
+ "nlk_cb_mutex-MAX_LINKS"
+};
+
static int netlink_dump(struct sock *sk);
static void netlink_skb_destructor(struct sk_buff *skb);
@@ -223,6 +261,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
struct sock *sk = skb->sk;
int ret = -ENOMEM;
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return 0;
+
dev_hold(dev);
if (is_vmalloc_addr(skb->head))
@@ -585,6 +626,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
} else {
nlk->cb_mutex = &nlk->cb_def_mutex;
mutex_init(nlk->cb_mutex);
+ lockdep_set_class_and_name(nlk->cb_mutex,
+ nlk_cb_mutex_keys + protocol,
+ nlk_cb_mutex_key_strings[protocol]);
}
init_waitqueue_head(&nlk->wait);
@@ -2203,6 +2247,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb = &nlk->cb;
memset(cb, 0, sizeof(*cb));
+ cb->start = control->start;
cb->dump = control->dump;
cb->done = control->done;
cb->nlh = nlh;
@@ -2216,6 +2261,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
mutex_unlock(nlk->cb_mutex);
+ if (cb->start)
+ cb->start(cb);
+
ret = netlink_dump(sk);
sock_put(sk);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index bc0e504f33a6..8e63662c6fb0 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
}
EXPORT_SYMBOL(genlmsg_put);
+static int genl_lock_start(struct netlink_callback *cb)
+{
+ /* our ops are always const - netlink API doesn't propagate that */
+ const struct genl_ops *ops = cb->data;
+ int rc = 0;
+
+ if (ops->start) {
+ genl_lock();
+ rc = ops->start(cb);
+ genl_unlock();
+ }
+ return rc;
+}
+
static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
/* our ops are always const - netlink API doesn't propagate that */
@@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
.module = family->module,
/* we have const, but the netlink API doesn't */
.data = (void *)ops,
+ .start = genl_lock_start,
.dump = genl_lock_dumpit,
.done = genl_lock_done,
};
@@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
} else {
struct netlink_dump_control c = {
.module = family->module,
+ .start = ops->start,
.dump = ops->dumpit,
.done = ops->done,
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1584f89c456a..92ca3e106c2b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1665,7 +1665,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
atomic_long_set(&rollover->num, 0);
atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
- po->rollover = rollover;
}
match = NULL;
@@ -1710,6 +1709,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
po->fanout = match;
+ po->rollover = rollover;
+ rollover = NULL;
atomic_inc(&match->sk_ref);
__fanout_link(sk, po);
err = 0;
@@ -1723,10 +1724,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
out:
- if (err && rollover) {
- kfree_rcu(rollover, rcu);
- po->rollover = NULL;
- }
+ kfree(rollover);
mutex_unlock(&fanout_mutex);
return err;
}
@@ -1750,11 +1748,6 @@ static struct packet_fanout *fanout_release(struct sock *sk)
list_del(&f->list);
else
f = NULL;
-
- if (po->rollover) {
- kfree_rcu(po->rollover, rcu);
- po->rollover = NULL;
- }
}
mutex_unlock(&fanout_mutex);
@@ -2914,6 +2907,7 @@ static int packet_release(struct socket *sock)
synchronize_net();
if (f) {
+ kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -2982,6 +2976,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
if (need_rehook) {
if (po->running) {
rcu_read_unlock();
+ /* prevents packet_notifier() from calling
+ * register_prot_hook()
+ */
+ po->num = 0;
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -2990,6 +2988,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
dev->ifindex);
}
+ BUG_ON(po->running);
po->num = proto;
po->prot_hook.type = proto;
@@ -3771,7 +3770,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
- struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3850,18 +3848,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- rcu_read_lock();
- rollover = rcu_dereference(po->rollover);
- if (rollover) {
- rstats.tp_all = atomic_long_read(&rollover->num);
- rstats.tp_huge = atomic_long_read(&rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
- }
- rcu_read_unlock();
- if (!rollover)
+ if (!po->rollover)
return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 9ee46314b7d7..d55bfc34d6b3 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -92,7 +92,6 @@ struct packet_fanout {
struct packet_rollover {
int sock;
- struct rcu_head rcu;
atomic_long_t num;
atomic_long_t num_huge;
atomic_long_t num_failed;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8d3a851a3476..bdfc395d1be2 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -184,7 +184,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
long i;
int ret;
- if (rs->rs_bound_addr == 0) {
+ if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
@@ -517,6 +517,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
@@ -866,6 +869,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d0dff0cd8186..cce4e6ada7fa 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -199,9 +199,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
+ int wlen = skb_network_offset(skb);
+
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- if (skb_cow_head(skb, sizeof(struct iphdr)))
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
goto drop;
skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -209,7 +213,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
case htons(ETH_P_IPV6):
- if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
goto drop;
skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 95d7b15dad21..e371a0d90068 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -166,7 +166,7 @@ static const char *const sctp_timer_tbl[] = {
/* Lookup timer debug name. */
const char *sctp_tname(const sctp_subtype_t id)
{
- if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX)
+ if (id.timeout < ARRAY_SIZE(sctp_timer_tbl))
return sctp_timer_tbl[id.timeout];
return "unknown_timer";
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7f0f689b8d2b..a870d27ca778 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -82,8 +82,8 @@
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+ size_t msg_len, struct sock **orig_sk);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1953,9 +1953,16 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
+ /* sk can be changed by peel off when waiting for buf. */
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ if (err) {
+ if (err == -ESRCH) {
+ /* asoc is already dead. */
+ new_asoc = NULL;
+ err = -EPIPE;
+ }
goto out_free;
+ }
}
/* If an address is passed with the sendto/sendmsg call, it is used
@@ -4146,7 +4153,7 @@ static int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
- percpu_counter_inc(&sctp_sockets_allocated);
+ sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
/* Nothing can fail after this block, otherwise
@@ -4190,7 +4197,7 @@ static void sctp_destroy_sock(struct sock *sk)
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
- percpu_counter_dec(&sctp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
}
@@ -4460,12 +4467,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
if (!asoc)
return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for
- * sending on this asoc, it cannot be peeled.
- */
- if (waitqueue_active(&asoc->wait))
- return -EBUSY;
-
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
*/
@@ -6975,7 +6976,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+ size_t msg_len, struct sock **orig_sk)
{
struct sock *sk = asoc->base.sk;
int err = 0;
@@ -6992,10 +6993,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
for (;;) {
prepare_to_wait_exclusive(&asoc->wait, &wait,
TASK_INTERRUPTIBLE);
+ if (asoc->base.dead)
+ goto do_dead;
if (!*timeo_p)
goto do_nonblock;
- if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
- asoc->base.dead)
+ if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
@@ -7008,11 +7010,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
+ if (sk != asoc->base.sk) {
+ release_sock(sk);
+ sk = asoc->base.sk;
+ lock_sock(sk);
+ }
*timeo_p = current_timeo;
}
out:
+ *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
@@ -7020,6 +7028,10 @@ out:
return err;
+do_dead:
+ err = -ESRCH;
+ goto out;
+
do_error:
err = -EPIPE;
goto out;
diff --git a/net/socket.c b/net/socket.c
index 18aff3d804ec..7a05647ce4ce 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1711,6 +1711,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
msg.msg_iocb = NULL;
+ msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 2410d557ae39..89731c9023f0 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
goto out_free_groups;
GROUP_AT(creds->cr_group_info, i) = kgid;
}
+ groups_sort(creds->cr_group_info);
return 0;
out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 033fec307528..036bbf2b44c1 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
goto out;
GROUP_AT(rsci.cred.cr_group_info, i) = kgid;
}
+ groups_sort(rsci.cred.cr_group_info);
/* mech name */
len = qword_get(&mesg, buf, mlen);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 73ad57a59989..1cb35c753dcd 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -273,10 +273,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task)
{
- trace_rpc_task_begin(task->tk_client, task, NULL);
-
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ trace_rpc_task_begin(task->tk_client, task, NULL);
}
/*
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 621ca7b4a155..98db1715cb17 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
GROUP_AT(ug.gi, i) = kgid;
}
+ groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
@@ -827,6 +828,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
GROUP_AT(cred->cr_group_info, i) = kgid;
}
+ groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 50f5b0ca7b3c..f351863076c2 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -311,6 +311,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
newcon->usr_data = s->tipc_conn_new(newcon->conid);
if (!newcon->usr_data) {
sock_release(newsock);
+ conn_put(newcon);
return -ENOMEM;
}
@@ -618,14 +619,12 @@ int tipc_server_start(struct tipc_server *s)
void tipc_server_stop(struct tipc_server *s)
{
struct tipc_conn *con;
- int total = 0;
int id;
spin_lock_bh(&s->idr_lock);
- for (id = 0; total < s->idr_in_use; id++) {
+ for (id = 0; s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
if (con) {
- total++;
spin_unlock_bh(&s->idr_lock);
tipc_close_conn(con);
spin_lock_bh(&s->idr_lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4096f699ba00..54ea796518dc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1307,7 +1307,7 @@ EXPORT_SYMBOL(xfrm_policy_delete);
int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
- struct net *net = xp_net(pol);
+ struct net *net = sock_net(sk);
struct xfrm_policy *old_pol;
#ifdef CONFIG_XFRM_SUB_POLICY
@@ -1361,6 +1361,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
newp->type = old->type;
+ newp->family = old->family;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
write_lock_bh(&net->xfrm.xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index c0aa1b18fe8f..7944daeb7378 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1845,6 +1845,13 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
+ if (!optval && !optlen) {
+ xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
+ xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
+ __sk_dst_reset(sk);
+ return 0;
+ }
+
if (optlen <= 0 || optlen > PAGE_SIZE)
return -EMSGSIZE;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 68010a01ea36..8b71b09e5ab6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1660,32 +1660,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
static int xfrm_dump_policy_done(struct netlink_callback *cb)
{
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct net *net = sock_net(cb->skb->sk);
xfrm_policy_walk_done(walk, net);
return 0;
}
+static int xfrm_dump_policy_start(struct netlink_callback *cb)
+{
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+
+ BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
+
+ xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
+ return 0;
+}
+
static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct xfrm_dump_info info;
- BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
- sizeof(cb->args) - sizeof(cb->args[0]));
-
info.in_skb = cb->skb;
info.out_skb = skb;
info.nlmsg_seq = cb->nlh->nlmsg_seq;
info.nlmsg_flags = NLM_F_MULTI;
- if (!cb->args[0]) {
- cb->args[0] = 1;
- xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
- }
-
(void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
return skb->len;
@@ -2437,6 +2439,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+ int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
const struct nla_policy *nla_pol;
@@ -2450,6 +2453,7 @@ static const struct xfrm_link {
[XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy },
[XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy },
[XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
+ .start = xfrm_dump_policy_start,
.dump = xfrm_dump_policy,
.done = xfrm_dump_policy_done },
[XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
@@ -2501,6 +2505,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct netlink_dump_control c = {
+ .start = link->start,
.dump = link->dump,
.done = link->done,
};
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index c3b3c94e2446..4dfe62241909 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -129,6 +129,12 @@ _c_flags += $(if $(patsubst n%,, \
$(CFLAGS_KASAN))
endif
+ifeq ($(CONFIG_KCOV),y)
+_c_flags += $(if $(patsubst n%,, \
+ $(KCOV_INSTRUMENT_$(basetarget).o)$(KCOV_INSTRUMENT)y), \
+ $(CFLAGS_KCOV))
+endif
+
# If building the kernel in a separate objtree expand all occurrences
# of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 88632df4381b..dafaf96e0a34 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -423,13 +423,15 @@ static struct string_list *read_node(FILE *f)
struct string_list node = {
.string = buffer,
.tag = SYM_NORMAL };
- int c;
+ int c, in_string = 0;
while ((c = fgetc(f)) != EOF) {
- if (c == ' ') {
+ if (!in_string && c == ' ') {
if (node.string == buffer)
continue;
break;
+ } else if (c == '"') {
+ in_string = !in_string;
} else if (c == '\n') {
if (node.string == buffer)
return NULL;
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 73a2c7da0e55..53234e85192a 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -19,4 +19,6 @@ SECTIONS {
. = ALIGN(8);
.init_array 0 : { *(SORT(.init_array.*)) *(.init_array) }
+
+ __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) }
}
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 493e226356ca..52917fb8e0c5 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -39,10 +39,9 @@ if test "$(objtree)" != "$(srctree)"; then \
false; \
fi ; \
$(srctree)/scripts/setlocalversion --save-scmversion; \
-ln -sf $(srctree) $(2); \
tar -cz $(RCS_TAR_IGNORE) -f $(2).tar.gz \
- $(addprefix $(2)/,$(TAR_CONTENT) $(3)); \
-rm -f $(2) $(objtree)/.scmversion
+ --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3); \
+rm -f $(objtree)/.scmversion
# rpm-pkg
# ---------------------------------------------------------------------------
diff --git a/security/Kconfig b/security/Kconfig
index d9c9b1f76059..547ff287104d 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -40,6 +40,16 @@ config SECURITY
If you are unsure how to answer this question, answer N.
+config PAGE_TABLE_ISOLATION
+ bool "Remove the kernel mapping in user mode"
+ default y
+ depends on X86_64 && SMP
+ help
+ This enforces a strict kernel and user space isolation, in order
+ to close hardware side channels on kernel address information.
+
+ If you are unsure how to answer this question, answer Y.
+
config SECURITYFS
bool "Enable the securityfs filesystem"
help
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index c21f09bf8b99..98289ba2a2e6 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -52,6 +52,8 @@ static int __init hash_setup(char *str)
ima_hash_algo = HASH_ALGO_SHA1;
else if (strncmp(str, "md5", 3) == 0)
ima_hash_algo = HASH_ALGO_MD5;
+ else
+ return 1;
goto out;
}
@@ -61,6 +63,8 @@ static int __init hash_setup(char *str)
break;
}
}
+ if (i == HASH_ALGO__LAST)
+ return 1;
out:
hash_setup_done = 1;
return 1;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 2ce733342b5a..3ae3acf473c8 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -250,11 +250,12 @@ static int construct_key(struct key *key, const void *callout_info,
* The keyring selected is returned with an extra reference upon it which the
* caller must release.
*/
-static void construct_get_dest_keyring(struct key **_dest_keyring)
+static int construct_get_dest_keyring(struct key **_dest_keyring)
{
struct request_key_auth *rka;
const struct cred *cred = current_cred();
struct key *dest_keyring = *_dest_keyring, *authkey;
+ int ret;
kenter("%p", dest_keyring);
@@ -263,6 +264,8 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
/* the caller supplied one */
key_get(dest_keyring);
} else {
+ bool do_perm_check = true;
+
/* use a default keyring; falling through the cases until we
* find one that we actually have */
switch (cred->jit_keyring) {
@@ -277,8 +280,10 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
dest_keyring =
key_get(rka->dest_keyring);
up_read(&authkey->sem);
- if (dest_keyring)
+ if (dest_keyring) {
+ do_perm_check = false;
break;
+ }
}
case KEY_REQKEY_DEFL_THREAD_KEYRING:
@@ -313,11 +318,29 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
default:
BUG();
}
+
+ /*
+ * Require Write permission on the keyring. This is essential
+ * because the default keyring may be the session keyring, and
+ * joining a keyring only requires Search permission.
+ *
+ * However, this check is skipped for the "requestor keyring" so
+ * that /sbin/request-key can itself use request_key() to add
+ * keys to the original requestor's destination keyring.
+ */
+ if (dest_keyring && do_perm_check) {
+ ret = key_permission(make_key_ref(dest_keyring, 1),
+ KEY_NEED_WRITE);
+ if (ret) {
+ key_put(dest_keyring);
+ return ret;
+ }
+ }
}
*_dest_keyring = dest_keyring;
kleave(" [dk %d]", key_serial(dest_keyring));
- return;
+ return 0;
}
/*
@@ -442,12 +465,16 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
if (ctx->index_key.type == &key_type_keyring)
return ERR_PTR(-EPERM);
-
- user = key_user_lookup(current_fsuid());
- if (!user)
- return ERR_PTR(-ENOMEM);
- construct_get_dest_keyring(&dest_keyring);
+ ret = construct_get_dest_keyring(&dest_keyring);
+ if (ret)
+ goto error;
+
+ user = key_user_lookup(current_fsuid());
+ if (!user) {
+ ret = -ENOMEM;
+ goto error_put_dest_keyring;
+ }
ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
key_user_put(user);
@@ -462,7 +489,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
} else if (ret == -EINPROGRESS) {
ret = 0;
} else {
- goto couldnt_alloc_key;
+ goto error_put_dest_keyring;
}
key_put(dest_keyring);
@@ -472,8 +499,9 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
construction_failed:
key_negate_and_link(key, key_negative_timeout, NULL, NULL);
key_put(key);
-couldnt_alloc_key:
+error_put_dest_keyring:
key_put(dest_keyring);
+error:
kleave(" = %d", ret);
return ERR_PTR(ret);
}
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 7f947f7c3331..0714b4c61a8b 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -80,10 +80,10 @@ static struct nlmsg_perm nlmsg_route_perms[] =
static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
{
- { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
- { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
- { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
- { SOCK_DESTROY_BACKPORT, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE },
+ { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+ { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+ { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+ { SOCK_DESTROY_BACKPORT,NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE },
};
static struct nlmsg_perm nlmsg_xfrm_perms[] =
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 33e72c809e50..494b7b533366 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -465,7 +465,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
v = snd_pcm_hw_param_last(pcm, params, var, dir);
else
v = snd_pcm_hw_param_first(pcm, params, var, dir);
- snd_BUG_ON(v < 0);
return v;
}
@@ -1370,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
return tmp;
- mutex_lock(&runtime->oss.params_lock);
while (bytes > 0) {
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
tmp = bytes;
if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
@@ -1415,14 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
xfer += tmp;
if ((substream->f_flags & O_NONBLOCK) != 0 &&
tmp != runtime->oss.period_bytes)
- break;
+ tmp = -EAGAIN;
}
- }
- mutex_unlock(&runtime->oss.params_lock);
- return xfer;
-
err:
- mutex_unlock(&runtime->oss.params_lock);
+ mutex_unlock(&runtime->oss.params_lock);
+ if (tmp < 0)
+ break;
+ if (signal_pending(current)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
+ tmp = 0;
+ }
return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
}
@@ -1470,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
return tmp;
- mutex_lock(&runtime->oss.params_lock);
while (bytes > 0) {
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
if (runtime->oss.buffer_used == 0) {
tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
@@ -1502,12 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
bytes -= tmp;
xfer += tmp;
}
- }
- mutex_unlock(&runtime->oss.params_lock);
- return xfer;
-
err:
- mutex_unlock(&runtime->oss.params_lock);
+ mutex_unlock(&runtime->oss.params_lock);
+ if (tmp < 0)
+ break;
+ if (signal_pending(current)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
+ tmp = 0;
+ }
return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
}
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index 727ac44d39f4..a84a1d3d23e5 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
snd_pcm_sframes_t frames = size;
plugin = snd_pcm_plug_first(plug);
- while (plugin && frames > 0) {
+ while (plugin) {
+ if (frames <= 0)
+ return frames;
if ((next = plugin->next) != NULL) {
snd_pcm_sframes_t frames1 = frames;
- if (plugin->dst_frames)
+ if (plugin->dst_frames) {
frames1 = plugin->dst_frames(plugin, frames);
+ if (frames1 <= 0)
+ return frames1;
+ }
if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
return err;
}
if (err != frames1) {
frames = err;
- if (plugin->src_frames)
+ if (plugin->src_frames) {
frames = plugin->src_frames(plugin, frames1);
+ if (frames <= 0)
+ return frames;
+ }
}
} else
dst_channels = NULL;
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 8e980aa678d0..074363b63cc4 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -149,7 +149,9 @@ static int snd_pcm_control_ioctl(struct snd_card *card,
err = -ENXIO;
goto _error;
}
+ mutex_lock(&pcm->open_mutex);
err = snd_pcm_info_user(substream, info);
+ mutex_unlock(&pcm->open_mutex);
_error:
mutex_unlock(&register_mutex);
return err;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index cd20f91326fe..7b805766306e 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1664,7 +1664,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
return changed;
if (params->rmask) {
int err = snd_pcm_hw_refine(pcm, params);
- if (snd_BUG_ON(err < 0))
+ if (err < 0)
return err;
}
return snd_pcm_hw_param_value(params, var, dir);
@@ -1711,7 +1711,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
return changed;
if (params->rmask) {
int err = snd_pcm_hw_refine(pcm, params);
- if (snd_BUG_ON(err < 0))
+ if (err < 0)
return err;
}
return snd_pcm_hw_param_value(params, var, dir);
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index b450a27588c8..16f8124b1150 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
return 0;
}
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+ struct snd_rawmidi_info *info)
{
struct snd_rawmidi *rmidi;
struct snd_rawmidi_str *pstr;
struct snd_rawmidi_substream *substream;
- mutex_lock(&register_mutex);
rmidi = snd_rawmidi_search(card, info->device);
- mutex_unlock(&register_mutex);
if (!rmidi)
return -ENXIO;
if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
}
return -ENXIO;
}
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+ int ret;
+
+ mutex_lock(&register_mutex);
+ ret = __snd_rawmidi_info_select(card, info);
+ mutex_unlock(&register_mutex);
+ return ret;
+}
EXPORT_SYMBOL(snd_rawmidi_info_select);
static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index 293104926098..3be67560ead5 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -355,7 +355,7 @@ static int initialize_timer(struct snd_seq_timer *tmr)
unsigned long freq;
t = tmr->timeri->timer;
- if (snd_BUG_ON(!t))
+ if (!t)
return -EINVAL;
freq = tmr->preferred_resolution;
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index 54f348a4fb78..cbd20cb8ca11 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -39,6 +39,7 @@
#include <sound/core.h>
#include <sound/control.h>
#include <sound/pcm.h>
+#include <sound/pcm_params.h>
#include <sound/info.h>
#include <sound/initval.h>
@@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
return 0;
}
-static void params_change_substream(struct loopback_pcm *dpcm,
- struct snd_pcm_runtime *runtime)
-{
- struct snd_pcm_runtime *dst_runtime;
-
- if (dpcm == NULL || dpcm->substream == NULL)
- return;
- dst_runtime = dpcm->substream->runtime;
- if (dst_runtime == NULL)
- return;
- dst_runtime->hw = dpcm->cable->hw;
-}
-
static void params_change(struct snd_pcm_substream *substream)
{
struct snd_pcm_runtime *runtime = substream->runtime;
@@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
cable->hw.rate_max = runtime->rate;
cable->hw.channels_min = runtime->channels;
cable->hw.channels_max = runtime->channels;
- params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
- runtime);
- params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
- runtime);
}
static int loopback_prepare(struct snd_pcm_substream *substream)
@@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
static int rule_format(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
+ struct snd_mask m;
- struct snd_pcm_hardware *hw = rule->private;
- struct snd_mask *maskp = hw_param_mask(params, rule->var);
-
- maskp->bits[0] &= (u_int32_t)hw->formats;
- maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
- memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
- if (! maskp->bits[0] && ! maskp->bits[1])
- return -EINVAL;
- return 0;
+ snd_mask_none(&m);
+ mutex_lock(&dpcm->loopback->cable_lock);
+ m.bits[0] = (u_int32_t)cable->hw.formats;
+ m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
+ mutex_unlock(&dpcm->loopback->cable_lock);
+ return snd_mask_refine(hw_param_mask(params, rule->var), &m);
}
static int rule_rate(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
- struct snd_pcm_hardware *hw = rule->private;
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
struct snd_interval t;
- t.min = hw->rate_min;
- t.max = hw->rate_max;
+ mutex_lock(&dpcm->loopback->cable_lock);
+ t.min = cable->hw.rate_min;
+ t.max = cable->hw.rate_max;
+ mutex_unlock(&dpcm->loopback->cable_lock);
t.openmin = t.openmax = 0;
t.integer = 0;
return snd_interval_refine(hw_param_interval(params, rule->var), &t);
@@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
static int rule_channels(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
- struct snd_pcm_hardware *hw = rule->private;
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
struct snd_interval t;
- t.min = hw->channels_min;
- t.max = hw->channels_max;
+ mutex_lock(&dpcm->loopback->cable_lock);
+ t.min = cable->hw.channels_min;
+ t.max = cable->hw.channels_max;
+ mutex_unlock(&dpcm->loopback->cable_lock);
t.openmin = t.openmax = 0;
t.integer = 0;
return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}
+static void free_cable(struct snd_pcm_substream *substream)
+{
+ struct loopback *loopback = substream->private_data;
+ int dev = get_cable_index(substream);
+ struct loopback_cable *cable;
+
+ cable = loopback->cables[substream->number][dev];
+ if (!cable)
+ return;
+ if (cable->streams[!substream->stream]) {
+ /* other stream is still alive */
+ cable->streams[substream->stream] = NULL;
+ } else {
+ /* free the cable */
+ loopback->cables[substream->number][dev] = NULL;
+ kfree(cable);
+ }
+}
+
static int loopback_open(struct snd_pcm_substream *substream)
{
struct snd_pcm_runtime *runtime = substream->runtime;
struct loopback *loopback = substream->private_data;
struct loopback_pcm *dpcm;
- struct loopback_cable *cable;
+ struct loopback_cable *cable = NULL;
int err = 0;
int dev = get_cable_index(substream);
@@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
if (!cable) {
cable = kzalloc(sizeof(*cable), GFP_KERNEL);
if (!cable) {
- kfree(dpcm);
err = -ENOMEM;
goto unlock;
}
@@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
/* are cached -> they do not reflect the actual state */
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_FORMAT,
- rule_format, &runtime->hw,
+ rule_format, dpcm,
SNDRV_PCM_HW_PARAM_FORMAT, -1);
if (err < 0)
goto unlock;
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_RATE,
- rule_rate, &runtime->hw,
+ rule_rate, dpcm,
SNDRV_PCM_HW_PARAM_RATE, -1);
if (err < 0)
goto unlock;
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_CHANNELS,
- rule_channels, &runtime->hw,
+ rule_channels, dpcm,
SNDRV_PCM_HW_PARAM_CHANNELS, -1);
if (err < 0)
goto unlock;
@@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
else
runtime->hw = cable->hw;
unlock:
+ if (err < 0) {
+ free_cable(substream);
+ kfree(dpcm);
+ }
mutex_unlock(&loopback->cable_lock);
return err;
}
@@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
{
struct loopback *loopback = substream->private_data;
struct loopback_pcm *dpcm = substream->runtime->private_data;
- struct loopback_cable *cable;
- int dev = get_cable_index(substream);
loopback_timer_stop(dpcm);
mutex_lock(&loopback->cable_lock);
- cable = loopback->cables[substream->number][dev];
- if (cable->streams[!substream->stream]) {
- /* other stream is still alive */
- cable->streams[substream->stream] = NULL;
- } else {
- /* free the cable */
- loopback->cables[substream->number][dev] = NULL;
- kfree(cable);
- }
+ free_cable(substream);
mutex_unlock(&loopback->cable_lock);
return 0;
}
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 8fef1b8d1fd8..bd7bcf428bcf 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -183,7 +183,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
*/
int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
{
- if (WARN_ON(!hdac_acomp))
+ if (!hdac_acomp)
return -ENODEV;
hdac_acomp->audio_ops = aops;
@@ -240,7 +240,8 @@ out_master_del:
out_err:
kfree(acomp);
bus->audio_component = NULL;
- dev_err(dev, "failed to add i915 component master (%d)\n", ret);
+ hdac_acomp = NULL;
+ dev_info(dev, "failed to add i915 component master (%d)\n", ret);
return ret;
}
@@ -273,6 +274,7 @@ int snd_hdac_i915_exit(struct hdac_bus *bus)
kfree(acomp);
bus->audio_component = NULL;
+ hdac_acomp = NULL;
return 0;
}
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index e2e08fc73b50..20512fe32a97 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2088,9 +2088,11 @@ static int azx_probe_continue(struct azx *chip)
* for other chips, still continue probing as other
* codecs can be on the same link.
*/
- if (CONTROLLER_IN_GPU(pci))
+ if (CONTROLLER_IN_GPU(pci)) {
+ dev_err(chip->card->dev,
+ "HSW/BDW HD-audio HDMI/DP requires binding with gfx driver\n");
goto out_free;
- else
+ } else
goto skip_i915;
}
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index ac5de4365e15..c92b7ba344ef 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -261,6 +261,7 @@ enum {
CXT_FIXUP_HP_530,
CXT_FIXUP_CAP_MIX_AMP_5047,
CXT_FIXUP_MUTE_LED_EAPD,
+ CXT_FIXUP_HP_DOCK,
CXT_FIXUP_HP_SPECTRE,
CXT_FIXUP_HP_GATE_MIC,
};
@@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cxt_fixup_mute_led_eapd,
},
+ [CXT_FIXUP_HP_DOCK] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x16, 0x21011020 }, /* line-out */
+ { 0x18, 0x2181103f }, /* line-in */
+ { }
+ }
+ },
[CXT_FIXUP_HP_SPECTRE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+ SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
@@ -872,6 +882,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
{ .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
{ .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
{ .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
+ { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
{}
};
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e5730a7d0480..5875a08d555e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4839,6 +4839,7 @@ enum {
ALC286_FIXUP_HP_GPIO_LED,
ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
ALC280_FIXUP_HP_DOCK_PINS,
+ ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
ALC280_FIXUP_HP_9480M,
ALC288_FIXUP_DELL_HEADSET_MODE,
ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -5377,6 +5378,16 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC280_FIXUP_HP_GPIO4
},
+ [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x1b, 0x21011020 }, /* line-out */
+ { 0x18, 0x2181103f }, /* line-in */
+ { },
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
+ },
[ALC280_FIXUP_HP_9480M] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc280_fixup_hp_9480m,
@@ -5629,7 +5640,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
- SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+ SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -5794,6 +5805,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
{.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
{.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
+ {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
{.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
{.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
{.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
@@ -5942,6 +5954,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x1b, 0x01011020},
{0x21, 0x02211010}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x1b, 0x01011020},
+ {0x21, 0x0221101f}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x12, 0x90a60160},
{0x14, 0x90170120},
{0x21, 0x02211030}),
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index a5a4e9f75c57..a06395507225 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
struct device_node *twl4030_codec_node = NULL;
- twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node,
+ twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
"codec");
if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
GFP_KERNEL);
if (!pdata) {
dev_err(codec->dev, "Can not allocate memory\n");
+ of_node_put(twl4030_codec_node);
return NULL;
}
twl4030_setup_pdata_of(pdata, twl4030_codec_node);
+ of_node_put(twl4030_codec_node);
}
return pdata;
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 95d2392303eb..7ca67613e0d4 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -1408,12 +1408,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
sizeof(fsl_ssi_ac97_dai));
fsl_ac97_data = ssi_private;
-
- ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
- if (ret) {
- dev_err(&pdev->dev, "could not set AC'97 ops\n");
- return ret;
- }
} else {
/* Initialize this copy of the CPU DAI driver structure */
memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1473,6 +1467,14 @@ static int fsl_ssi_probe(struct platform_device *pdev)
return ret;
}
+ if (fsl_ssi_is_ac97(ssi_private)) {
+ ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "could not set AC'97 ops\n");
+ goto error_ac97_ops;
+ }
+ }
+
ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
&ssi_private->cpu_dai_drv, 1);
if (ret) {
@@ -1556,6 +1558,10 @@ error_sound_card:
fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
error_asoc_register:
+ if (fsl_ssi_is_ac97(ssi_private))
+ snd_soc_set_ac97_ops(NULL);
+
+error_ac97_ops:
if (ssi_private->soc->imx)
fsl_ssi_imx_clean(pdev, ssi_private);
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 1050008d7719..c9ae29068c7c 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -203,6 +203,10 @@ static int snd_usb_copy_string_desc(struct mixer_build *state,
int index, char *buf, int maxlen)
{
int len = usb_string(state->chip->dev, index, buf, maxlen - 1);
+
+ if (len < 0)
+ return 0;
+
buf[len] = 0;
return len;
}
@@ -2097,19 +2101,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
kctl->private_value = (unsigned long)namelist;
kctl->private_free = usb_mixer_selector_elem_free;
- nameid = uac_selector_unit_iSelector(desc);
+ /* check the static mapping table at first */
len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
- if (len)
- ;
- else if (nameid)
- snd_usb_copy_string_desc(state, nameid, kctl->id.name,
- sizeof(kctl->id.name));
- else {
- len = get_term_name(state, &state->oterm,
+ if (!len) {
+ /* no mapping ? */
+ /* if iSelector is given, use it */
+ nameid = uac_selector_unit_iSelector(desc);
+ if (nameid)
+ len = snd_usb_copy_string_desc(state, nameid,
+ kctl->id.name,
+ sizeof(kctl->id.name));
+ /* ... or pick up the terminal name at next */
+ if (!len)
+ len = get_term_name(state, &state->oterm,
kctl->id.name, sizeof(kctl->id.name), 0);
+ /* ... or use the fixed string "USB" as the last resort */
if (!len)
strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+ /* and add the proper suffix */
if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
append_ctl_name(kctl, " Clock Source");
else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index bc7adb84e679..60a94b3e532e 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,14 @@ static void kvp_update_mem_state(int pool)
for (;;) {
readp = &record[records_read];
records_read += fread(readp, sizeof(struct kvp_record),
- ENTRIES_PER_BLOCK * num_blocks,
- filep);
+ ENTRIES_PER_BLOCK * num_blocks - records_read,
+ filep);
if (ferror(filep)) {
- syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+ syslog(LOG_ERR,
+ "Failed to read file, pool: %d; error: %d %s",
+ pool, errno, strerror(errno));
+ kvp_release_lock(pool);
exit(EXIT_FAILURE);
}
@@ -210,6 +213,7 @@ static void kvp_update_mem_state(int pool)
if (record == NULL) {
syslog(LOG_ERR, "malloc failed");
+ kvp_release_lock(pool);
exit(EXIT_FAILURE);
}
continue;
@@ -224,15 +228,11 @@ static void kvp_update_mem_state(int pool)
fclose(filep);
kvp_release_lock(pool);
}
+
static int kvp_file_init(void)
{
int fd;
- FILE *filep;
- size_t records_read;
char *fname;
- struct kvp_record *record;
- struct kvp_record *readp;
- int num_blocks;
int i;
int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
@@ -246,61 +246,19 @@ static int kvp_file_init(void)
for (i = 0; i < KVP_POOL_COUNT; i++) {
fname = kvp_file_info[i].fname;
- records_read = 0;
- num_blocks = 1;
sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */);
if (fd == -1)
return 1;
-
- filep = fopen(fname, "re");
- if (!filep) {
- close(fd);
- return 1;
- }
-
- record = malloc(alloc_unit * num_blocks);
- if (record == NULL) {
- fclose(filep);
- close(fd);
- return 1;
- }
- for (;;) {
- readp = &record[records_read];
- records_read += fread(readp, sizeof(struct kvp_record),
- ENTRIES_PER_BLOCK,
- filep);
-
- if (ferror(filep)) {
- syslog(LOG_ERR, "Failed to read file, pool: %d",
- i);
- exit(EXIT_FAILURE);
- }
-
- if (!feof(filep)) {
- /*
- * We have more data to read.
- */
- num_blocks++;
- record = realloc(record, alloc_unit *
- num_blocks);
- if (record == NULL) {
- fclose(filep);
- close(fd);
- return 1;
- }
- continue;
- }
- break;
- }
kvp_file_info[i].fd = fd;
- kvp_file_info[i].num_blocks = num_blocks;
- kvp_file_info[i].records = record;
- kvp_file_info[i].num_records = records_read;
- fclose(filep);
-
+ kvp_file_info[i].num_blocks = 1;
+ kvp_file_info[i].records = malloc(alloc_unit);
+ if (kvp_file_info[i].records == NULL)
+ return 1;
+ kvp_file_info[i].num_records = 0;
+ kvp_update_mem_state(i);
}
return 0;
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 638875a0960a..79547c225c14 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -150,7 +150,7 @@ static int run_dir(const char *d, const char *perf)
snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
d, d, perf, vcnt, v);
- return system(cmd);
+ return system(cmd) ? TEST_FAIL : TEST_OK;
}
int test__attr(void)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 754711be8b25..237830873c71 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -200,7 +200,7 @@ void symbols__fixup_end(struct rb_root *symbols)
/* Last entry */
if (curr->end == curr->start)
- curr->end = roundup(curr->start, 4096);
+ curr->end = roundup(curr->start, 4096) + 4096;
}
void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index f7997affd143..f45cee80c58b 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -109,9 +109,11 @@ int test_harness(int (test_function)(void), char *name)
rc = run_test(test_function, name);
- if (rc == MAGIC_SKIP_RETURN_VALUE)
+ if (rc == MAGIC_SKIP_RETURN_VALUE) {
test_skip(name);
- else
+ /* so that skipped test is not marked as failed */
+ rc = 0;
+ } else
test_finish(name, rc);
return rc;
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index eabcff411984..92d7eff2827a 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,7 +4,8 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall \
+ test_vsyscall
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 923e59eb82c7..412b845412d2 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -351,9 +351,24 @@ static void do_simple_tests(void)
install_invalid(&desc, false);
desc.seg_not_present = 0;
- desc.read_exec_only = 0;
desc.seg_32bit = 1;
+ desc.read_exec_only = 0;
+ desc.limit = 0xfffff;
+
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.contents = 1;
+ desc.read_exec_only = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+
+ desc.limit = 0;
install_invalid(&desc, true);
}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 000000000000..6e0bd52ad53d
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gtod)
+ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_gettime)
+ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+ if (!vdso_time)
+ printf("[WARN]\tfailed to find time in vDSO\n");
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu) {
+ /* getcpu() was never wired up in the 32-bit vDSO. */
+ printf("[%s]\tfailed to find getcpu in vDSO\n",
+ sizeof(long) == 8 ? "WARN" : "NOTE");
+ }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+ int nerrs = 0;
+ FILE *maps;
+ char line[128];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ should_read_vsyscall = true;
+ return 0;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ char r, x;
+ void *start, *end;
+ char name[128];
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ printf("\tvsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ printf("[FAIL]\taddress range is nonsense\n");
+ nerrs++;
+ }
+
+ printf("\tvsyscall permissions are %c-%c\n", r, x);
+ should_read_vsyscall = (r == 'r');
+ if (x != 'x') {
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("\tno vsyscall map in /proc/self/maps\n");
+ should_read_vsyscall = false;
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ return nerrs;
+#else
+ return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+ return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+ return (double)(a->tv_sec - b->tv_sec) +
+ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
+{
+ int nerrs = 0;
+ double d1, d2;
+
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+ printf("[FAIL] %s tz mismatch\n", which);
+ nerrs++;
+ }
+
+ d1 = tv_diff(tv_other, tv_sys1);
+ d2 = tv_diff(tv_sys2, tv_other);
+ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+ if (d1 < 0 || d2 < 0) {
+ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+ }
+
+ return nerrs;
+}
+
+static int test_gtod(void)
+{
+ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+ struct timezone tz_sys, tz_vdso, tz_vsys;
+ long ret_vdso = -1;
+ long ret_vsys = -1;
+ int nerrs = 0;
+
+ printf("[RUN]\ttest gettimeofday()\n");
+
+ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+ if (vdso_gtod)
+ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+ if (vgtod)
+ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+
+ if (vdso_gtod) {
+ if (ret_vdso == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ } else {
+ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+ nerrs++;
+ }
+ }
+
+ if (vgtod) {
+ if (ret_vsys == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ } else {
+ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+ nerrs++;
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_time(void) {
+ int nerrs = 0;
+
+ printf("[RUN]\ttest time()\n");
+ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+ t_sys1 = sys_time(&t2_sys1);
+ if (vdso_time)
+ t_vdso = vdso_time(&t2_vdso);
+ if (vtime)
+ t_vsys = vtime(&t2_vsys);
+ t_sys2 = sys_time(&t2_sys2);
+ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+ nerrs++;
+ return nerrs;
+ }
+
+ if (vdso_time) {
+ if (t_vdso < 0 || t_vdso != t2_vdso) {
+ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+ nerrs++;
+ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO time() is okay\n");
+ }
+ }
+
+ if (vtime) {
+ if (t_vsys < 0 || t_vsys != t2_vsys) {
+ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+ nerrs++;
+ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall time() is okay\n");
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+ int nerrs = 0;
+ long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+ return nerrs;
+ }
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+ unsigned node = 0;
+ bool have_node = false;
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (ret_sys == 0) {
+ if (cpu_sys != cpu) {
+ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+ nerrs++;
+ }
+
+ have_node = true;
+ node = node_sys;
+ }
+
+ if (vdso_getcpu) {
+ if (ret_vdso) {
+ printf("[FAIL]\tvDSO getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vdso;
+ }
+
+ if (cpu_vdso != cpu) {
+ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct CPU\n");
+ }
+
+ if (node_vdso != node) {
+ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct node\n");
+ }
+ }
+ }
+
+ if (vgetcpu) {
+ if (ret_vsys) {
+ printf("[FAIL]\tvsyscall getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vsys;
+ }
+
+ if (cpu_vsys != cpu) {
+ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct CPU\n");
+ }
+
+ if (node_vsys != node) {
+ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct node\n");
+ }
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+ printf("[RUN]\tChecking read access to the vsyscall page\n");
+ bool can_read;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ *(volatile int *)0xffffffffff600000;
+ can_read = true;
+ } else {
+ can_read = false;
+ }
+
+ if (can_read && !should_read_vsyscall) {
+ printf("[FAIL]\tWe have read access, but we shouldn't\n");
+ return 1;
+ } else if (!can_read && should_read_vsyscall) {
+ printf("[FAIL]\tWe don't have read access, but we should\n");
+ return 1;
+ } else {
+ printf("[OK]\tgot expected result\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+ num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+ time_t tmp;
+ bool is_native;
+
+ if (!vtime)
+ return 0;
+
+ printf("[RUN]\tchecking for native vsyscall\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ vtime(&tmp);
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * If vsyscalls are emulated, we expect a single trap in the
+ * vsyscall page -- the call instruction will trap with RIP
+ * pointing to the entry point before emulation takes over.
+ * In native mode, we expect two traps, since whatever code
+ * the vsyscall page contains will be more than just a ret
+ * instruction.
+ */
+ is_native = (num_vsyscall_traps > 1);
+
+ printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "native" : "emulated"),
+ (int)num_vsyscall_traps);
+
+ return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int nerrs = 0;
+
+ init_vdso();
+ nerrs += init_vsys();
+
+ nerrs += test_gtod();
+ nerrs += test_time();
+ nerrs += test_getcpu(0);
+ nerrs += test_getcpu(1);
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+ nerrs += test_native_vsyscall();
+#endif
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c
index 2b3d6d235015..3d7b42e77299 100644
--- a/tools/usb/usbip/src/utils.c
+++ b/tools/usb/usbip/src/utils.c
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add)
char command[SYSFS_BUS_ID_SIZE + 4];
char match_busid_attr_path[SYSFS_PATH_MAX];
int rc;
+ int cmd_size;
snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
"%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add)
attr_name);
if (add)
- snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid);
+ cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+ busid);
else
- snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid);
+ cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+ busid);
rc = write_sysfs_attribute(match_busid_attr_path, command,
- sizeof(command));
+ cmd_size);
if (rc < 0) {
dbg("failed to write match_busid: %s", strerror(errno));
return -1;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index a7b9022b5c8f..7f38db2a46c8 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -84,9 +84,6 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
struct kvm_vcpu *vcpu;
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
- vcpu->arch.timer_cpu.armed = false;
-
- WARN_ON(!kvm_timer_should_fire(vcpu));
/*
* If the vcpu is blocked we want to wake it up so that it will see
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cb092bd9965b..d080f06fd8d9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -986,7 +986,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
* changes) is disallowed above, so any other attribute changes getting
* here can be skipped.
*/
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
r = kvm_iommu_map_pages(kvm, &new);
return r;
}