summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Huang <huangtao@rock-chips.com>2018-10-10 19:37:13 +0800
committerTao Huang <huangtao@rock-chips.com>2018-10-10 19:37:13 +0800
commitd376ad8f232e1d32b334f3f94ae94a84091a6b5d (patch)
tree6fbd8ab11256b088163ee96f603689b2f3e3af29
parent90a00e3789943713ef89ff262bb6182fa99e625d (diff)
parent8fd9c723bde83907697121ca0f7beb51bbdb32da (diff)
Merge branch 'linux-linaro-lsk-v4.4-android' of git://git.linaro.org/kernel/linux-linaro-stable.git
* linux-linaro-lsk-v4.4-android: (1212 commits) ANDROID: sdcardfs: Change current->fs under lock ANDROID: sdcardfs: Don't use OVERRIDE_CRED macro ANDROID: restrict store of prefer_idle as boolean BACKPORT: arm/syscalls: Optimize address limit check UPSTREAM: syscalls: Use CHECK_DATA_CORRUPTION for addr_limit_user_check BACKPORT: arm64/syscalls: Check address limit on user-mode return BACKPORT: x86/syscalls: Check address limit on user-mode return BACKPORT: lkdtm: add bad USER_DS test UPSTREAM: bug: switch data corruption check to __must_check BACKPORT: lkdtm: Add tests for struct list corruption UPSTREAM: bug: Provide toggle for BUG on data corruption UPSTREAM: list: Split list_del() debug checking into separate function UPSTREAM: rculist: Consolidate DEBUG_LIST for list_add_rcu() BACKPORT: list: Split list_add() debug checking into separate function FROMLIST: ANDROID: binder: Add BINDER_GET_NODE_INFO_FOR_REF ioctl. BACKPORT: arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW ANDROID: arm64: mm: fix 4.4.154 merge BACKPORT: zsmalloc: introduce zs_huge_class_size() BACKPORT: zram: drop max_zpage_size and use zs_huge_class_size() ANDROID: tracing: fix race condition reading saved tgids ... Change-Id: I9f23db35eb926b6fa0d7af7dbbb55c9a37d536fc
-rw-r--r--Documentation/ABI/obsolete/sysfs-block-zram119
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram118
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs8
-rw-r--r--Documentation/blockdev/zram.txt200
-rw-r--r--Documentation/filesystems/f2fs.txt20
-rw-r--r--Documentation/filesystems/fscrypt.rst626
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/kernel/osf_sys.c64
-rw-r--r--arch/arc/configs/axs101_defconfig1
-rw-r--r--arch/arc/configs/axs103_defconfig1
-rw-r--r--arch/arc/configs/axs103_smp_defconfig1
-rw-r--r--arch/arm/boot/dts/tegra30-cardhu.dtsi1
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/include/asm/arch_gicv3.h1
-rw-r--r--arch/arm/kernel/entry-common.S10
-rw-r--r--arch/arm/kernel/signal.c7
-rw-r--r--arch/arm/mach-exynos/suspend.c1
-rw-r--r--arch/arm/mach-hisi/hotplug.c41
-rw-r--r--arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi2
-rw-r--r--arch/arm64/include/asm/thread_info.h5
-rw-r--r--arch/arm64/include/asm/uaccess.h3
-rw-r--r--arch/arm64/kernel/signal.c4
-rw-r--r--arch/mips/ath79/setup.c1
-rw-r--r--arch/mips/cavium-octeon/octeon-platform.c2
-rw-r--r--arch/mips/include/asm/io.h8
-rw-r--r--arch/mips/include/asm/mach-ath79/ath79.h1
-rw-r--r--arch/mips/jz4740/Platform2
-rw-r--r--arch/mips/kernel/process.c1
-rw-r--r--arch/mips/kernel/vdso.c20
-rw-r--r--arch/mips/loongson64/common/cs5536/cs5536_ohci.c2
-rw-r--r--arch/mips/mm/c-r4k.c6
-rw-r--r--arch/openrisc/kernel/process.c2
-rw-r--r--arch/powerpc/include/asm/fadump.h3
-rw-r--r--arch/powerpc/kernel/fadump.c92
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c4
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c2
-rw-r--r--arch/s390/lib/mem.S9
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c22
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c20
-rw-r--r--arch/x86/configs/x86_64_cuttlefish_defconfig14
-rw-r--r--arch/x86/entry/common.c3
-rw-r--r--arch/x86/include/asm/io.h6
-rw-r--r--arch/x86/include/asm/pgtable-3level.h7
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/kernel/cpu/bugs.c47
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/pat.c14
-rw-r--r--arch/x86/xen/pmu.c2
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/cfq-iosched.c6
-rw-r--r--block/partitions/aix.c13
-rw-r--r--crypto/Kconfig9
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/algapi.c15
-rw-r--r--crypto/internal.h3
-rw-r--r--crypto/skcipher.c200
-rw-r--r--crypto/testmgr.c16
-rw-r--r--crypto/testmgr.h74
-rw-r--r--crypto/zstd.c209
-rw-r--r--drivers/android/binder.c55
-rw-r--r--drivers/ata/libahci.c2
-rw-r--r--drivers/block/zram/Kconfig35
-rw-r--r--drivers/block/zram/Makefile4
-rw-r--r--drivers/block/zram/zcomp.c401
-rw-r--r--drivers/block/zram/zcomp.h48
-rw-r--r--drivers/block/zram/zcomp_lz4.c66
-rw-r--r--drivers/block/zram/zcomp_lz4.h17
-rw-r--r--drivers/block/zram/zcomp_lzo.c66
-rw-r--r--drivers/block/zram/zcomp_lzo.h17
-rw-r--r--drivers/block/zram/zram_drv.c1389
-rw-r--r--drivers/block/zram/zram_drv.h62
-rw-r--r--drivers/bluetooth/Kconfig1
-rw-r--r--drivers/clk/imx/clk-imx6ul.c1
-rw-r--r--drivers/crypto/sahara.c4
-rw-r--r--drivers/crypto/vmx/aes_cbc.c30
-rw-r--r--drivers/dma/pl330.c5
-rw-r--r--drivers/gpio/gpio-ml-ioh.c3
-rw-r--r--drivers/gpio/gpio-tegra.c2
-rw-r--r--drivers/gpio/gpiolib.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c2
-rw-r--r--drivers/gpu/drm/ast/ast_ttm.c6
-rw-r--r--drivers/gpu/drm/cirrus/cirrus_ttm.c7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c3
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_ttm.c7
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c20
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c13
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c5
-rw-r--r--drivers/hid/hid-core.c3
-rw-r--r--drivers/hid/hid-ids.h2
-rw-r--r--drivers/hid/hid-sony.c6
-rw-r--r--drivers/hwtracing/coresight/coresight-tpiu.c7
-rw-r--r--drivers/hwtracing/coresight/coresight.c7
-rw-r--r--drivers/i2c/busses/i2c-i801.c7
-rw-r--r--drivers/i2c/busses/i2c-xiic.c4
-rw-r--r--drivers/iio/frequency/ad9523.c4
-rw-r--r--drivers/infiniband/core/cma.c25
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c2
-rw-r--r--drivers/input/touchscreen/atmel_mxt_ts.c7
-rw-r--r--drivers/iommu/arm-smmu-v3.c1
-rw-r--r--drivers/iommu/dmar.c6
-rw-r--r--drivers/iommu/intel-iommu.c18
-rw-r--r--drivers/iommu/ipmmu-vmsa.c9
-rw-r--r--drivers/irqchip/irq-bcm7038-l1.c4
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c34
-rw-r--r--drivers/irqchip/irq-gic.c2
-rw-r--r--drivers/macintosh/via-pmu.c9
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-cache-metadata.c3
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/md/dm-table.c10
-rw-r--r--drivers/md/raid5.c6
-rw-r--r--drivers/media/v4l2-core/videobuf2-core.c5
-rw-r--r--drivers/mfd/sm501.c1
-rw-r--r--drivers/mfd/ti_am335x_tscadc.c3
-rw-r--r--drivers/misc/hmc6352.c2
-rw-r--r--drivers/misc/lkdtm.c82
-rw-r--r--drivers/misc/mei/bus-fixup.c2
-rw-r--r--drivers/misc/mei/pci-me.c5
-rw-r--r--drivers/misc/mic/scif/scif_api.c20
-rw-r--r--drivers/misc/ti-st/st_kim.c4
-rw-r--r--drivers/misc/vmw_balloon.c68
-rw-r--r--drivers/mtd/maps/solutionengine.c6
-rw-r--r--drivers/mtd/mtdchar.c10
-rw-r--r--drivers/mtd/ubi/wl.c8
-rw-r--r--drivers/net/appletalk/ipddp.c8
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h3
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c10
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c2
-rw-r--r--drivers/net/ethernet/hp/hp100.c2
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c1
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c23
-rw-r--r--drivers/net/ethernet/ti/cpsw.c49
-rw-r--r--drivers/net/ethernet/ti/cpsw.h1
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c1
-rw-r--r--drivers/net/usb/lan78xx.c4
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c7
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi-tlv.c5
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi-tlv.h5
-rw-r--r--drivers/net/xen-netfront.c38
-rw-r--r--drivers/parport/parport_sunbpp.c8
-rw-r--r--drivers/pci/host/pci-mvebu.c2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-spmi-gpio.c32
-rw-r--r--drivers/platform/x86/alienware-wmi.c1
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c1
-rw-r--r--drivers/platform/x86/toshiba_acpi.c3
-rw-r--r--drivers/pwm/pwm-tiehrpwm.c2
-rw-r--r--drivers/rtc/rtc-bq4802.c4
-rw-r--r--drivers/s390/block/dasd_eckd.c7
-rw-r--r--drivers/s390/net/qeth_core_main.c3
-rw-r--r--drivers/s390/net/qeth_core_sys.c1
-rw-r--r--drivers/scsi/3w-9xxx.c6
-rw-r--r--drivers/scsi/3w-sas.c3
-rw-r--r--drivers/scsi/3w-xxxx.c2
-rw-r--r--drivers/scsi/aic94xx/aic94xx_init.c4
-rw-r--r--drivers/spi/spi-davinci.c2
-rw-r--r--drivers/staging/android/ion/ion.c60
-rw-r--r--drivers/staging/comedi/drivers/ni_mio_common.c3
-rw-r--r--drivers/staging/rts5208/rtsx_scsi.c2
-rw-r--r--drivers/staging/rts5208/xd.c2
-rw-r--r--drivers/target/iscsi/iscsi_target_auth.c30
-rw-r--r--drivers/target/target_core_transport.c5
-rw-r--r--drivers/tty/rocket.c2
-rw-r--r--drivers/tty/vt/vt_ioctl.c4
-rw-r--r--drivers/uio/uio.c3
-rw-r--r--drivers/usb/class/cdc-wdm.c2
-rw-r--r--drivers/usb/core/hcd-pci.c2
-rw-r--r--drivers/usb/core/message.c11
-rw-r--r--drivers/usb/core/quirks.c7
-rw-r--r--drivers/usb/gadget/udc/net2280.c16
-rw-r--r--drivers/usb/host/u132-hcd.c2
-rw-r--r--drivers/usb/host/xhci.c3
-rw-r--r--drivers/usb/misc/uss720.c4
-rw-r--r--drivers/usb/misc/yurex.c5
-rw-r--r--drivers/usb/serial/io_ti.h2
-rw-r--r--drivers/usb/serial/ti_usb_3410_5052.h2
-rw-r--r--drivers/usb/storage/scsiglue.c9
-rw-r--r--drivers/usb/storage/unusual_devs.h7
-rw-r--r--drivers/video/fbdev/core/fbmem.c38
-rw-r--r--drivers/video/fbdev/core/modedb.c41
-rw-r--r--drivers/video/fbdev/goldfishfb.c1
-rw-r--r--drivers/video/fbdev/omap/omapfb_main.c2
-rw-r--r--drivers/video/fbdev/via/viafbdev.c3
-rw-r--r--fs/9p/xattr.c6
-rw-r--r--fs/autofs4/autofs_i.h4
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c19
-rw-r--r--fs/btrfs/relocation.c23
-rw-r--r--fs/ceph/addr.c14
-rw-r--r--fs/cifs/cifs_debug.c8
-rw-r--r--fs/cifs/readdir.c11
-rw-r--r--fs/cifs/smb2misc.c7
-rw-r--r--fs/cifs/smb2pdu.c27
-rw-r--r--fs/crypto/crypto.c47
-rw-r--r--fs/crypto/fname.c32
-rw-r--r--fs/crypto/fscrypt_private.h19
-rw-r--r--fs/crypto/hooks.c5
-rw-r--r--fs/crypto/keyinfo.c288
-rw-r--r--fs/dcache.c3
-rw-r--r--fs/ext4/dir.c20
-rw-r--r--fs/ext4/inline.c4
-rw-r--r--fs/ext4/inode.c14
-rw-r--r--fs/ext4/mmp.c1
-rw-r--r--fs/ext4/resize.c23
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/checkpoint.c308
-rw-r--r--fs/f2fs/data.c358
-rw-r--r--fs/f2fs/debug.c9
-rw-r--r--fs/f2fs/dir.c79
-rw-r--r--fs/f2fs/extent_cache.c22
-rw-r--r--fs/f2fs/f2fs.h588
-rw-r--r--fs/f2fs/file.c455
-rw-r--r--fs/f2fs/gc.c321
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/inline.c109
-rw-r--r--fs/f2fs/inode.c241
-rw-r--r--fs/f2fs/namei.c71
-rw-r--r--fs/f2fs/node.c685
-rw-r--r--fs/f2fs/node.h9
-rw-r--r--fs/f2fs/recovery.c88
-rw-r--r--fs/f2fs/segment.c746
-rw-r--r--fs/f2fs/segment.h44
-rw-r--r--fs/f2fs/shrinker.c4
-rw-r--r--fs/f2fs/super.c360
-rw-r--r--fs/f2fs/sysfs.c99
-rw-r--r--fs/f2fs/xattr.c43
-rw-r--r--fs/fat/cache.c19
-rw-r--r--fs/fat/fat.h5
-rw-r--r--fs/fat/fatent.c6
-rw-r--r--fs/gfs2/aops.c20
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/hfs/brec.c7
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/nfs/blocklayout/dev.c2
-rw-r--r--fs/nfs/callback_xdr.c11
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/page.c7
-rw-r--r--fs/nilfs2/segment.c12
-rw-r--r--fs/ocfs2/buffer_head_io.c1
-rw-r--r--fs/overlayfs/copy_up.c26
-rw-r--r--fs/overlayfs/dir.c67
-rw-r--r--fs/overlayfs/overlayfs.h3
-rw-r--r--fs/overlayfs/readdir.c93
-rw-r--r--fs/overlayfs/super.c20
-rw-r--r--fs/pstore/ram_core.c17
-rw-r--r--fs/quota/quota.c2
-rw-r--r--fs/reiserfs/reiserfs.h2
-rw-r--r--fs/sdcardfs/file.c24
-rw-r--r--fs/sdcardfs/inode.c213
-rw-r--r--fs/sdcardfs/lookup.c9
-rw-r--r--fs/sdcardfs/main.c7
-rw-r--r--fs/sdcardfs/sdcardfs.h25
-rw-r--r--fs/ubifs/journal.c18
-rw-r--r--fs/ubifs/lprops.c8
-rw-r--r--fs/xattr.c2
-rw-r--r--include/crypto/internal/skcipher.h87
-rw-r--r--include/crypto/skcipher.h147
-rw-r--r--include/linux/bug.h19
-rw-r--r--include/linux/crypto.h1
-rw-r--r--include/linux/f2fs_fs.h5
-rw-r--r--include/linux/fscrypt_notsupp.h10
-rw-r--r--include/linux/fscrypt_supp.h16
-rw-r--r--include/linux/fscrypto.h411
-rw-r--r--include/linux/intel-iommu.h8
-rw-r--r--include/linux/io.h22
-rw-r--r--include/linux/list.h37
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/overflow.h278
-rw-r--r--include/linux/pagemap.h12
-rw-r--r--include/linux/pagevec.h14
-rw-r--r--include/linux/rculist.h8
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/syscalls.h20
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/vmacache.h5
-rw-r--r--include/linux/zsmalloc.h6
-rw-r--r--include/net/nfc/hci.h2
-rw-r--r--include/uapi/linux/android/binder.h10
-rw-r--r--include/uapi/linux/ethtool.h4
-rw-r--r--include/video/udlfb.h2
-rw-r--r--kernel/audit_watch.c12
-rw-r--r--kernel/fork.c19
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/kthread.c8
-rw-r--r--kernel/locking/osq_lock.c13
-rw-r--r--kernel/locking/rwsem-xadd.c27
-rw-r--r--kernel/power/Kconfig1
-rw-r--r--kernel/sched/tune.c2
-rw-r--r--kernel/sys.c95
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c105
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/user_namespace.c39
-rw-r--r--kernel/utsname_sysctl.c41
-rw-r--r--lib/Kconfig.debug10
-rw-r--r--lib/debugobjects.c7
-rw-r--r--lib/list_debug.c110
-rw-r--r--mm/debug.c4
-rw-r--r--mm/fadvise.c8
-rw-r--r--mm/filemap.c47
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/memory.c9
-rw-r--r--mm/page-writeback.c20
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swapfile.c20
-rw-r--r--mm/vmacache.c38
-rw-r--r--mm/zsmalloc.c67
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_fd.c7
-rw-r--r--net/9p/trans_rdma.c3
-rw-r--r--net/9p/trans_virtio.c16
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/core/neighbour.c13
-rw-r--r--net/dcb/dcbnl.c11
-rw-r--r--net/ieee802154/6lowpan/tx.c21
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/irda/af_irda.c13
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/key.c24
-rw-r--r--net/mac802154/tx.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c15
-rw-r--r--net/netfilter/x_tables.c4
-rw-r--r--net/sched/sch_hhf.c3
-rw-r--r--net/sched/sch_htb.c5
-rw-r--r--net/sched/sch_multiq.c9
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_tbf.c5
-rw-r--r--net/socket.c18
-rw-r--r--net/xfrm/xfrm_policy.c5
-rw-r--r--scripts/Kbuild.include3
-rwxr-xr-xscripts/depmod.sh4
-rw-r--r--scripts/mod/modpost.c8
-rwxr-xr-xscripts/tags.sh222
-rw-r--r--security/selinux/avc.c14
-rw-r--r--sound/core/pcm_lib.c14
-rw-r--r--sound/firewire/bebob/bebob_maudio.c24
-rw-r--r--sound/isa/msnd/msnd_pinnacle.c4
-rw-r--r--sound/pci/emu10k1/emufx.c2
-rw-r--r--sound/pci/hda/hda_codec.c3
-rw-r--r--sound/soc/codecs/cs4265.c4
-rw-r--r--sound/soc/codecs/wm8994.c1
-rw-r--r--sound/usb/quirks-table.h3
-rw-r--r--tools/hv/hv_kvp_daemon.c2
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c10
-rw-r--r--tools/perf/perf.h2
-rw-r--r--tools/perf/util/auxtrace.c3
-rw-r--r--tools/testing/selftests/powerpc/harness.c18
-rw-r--r--tools/testing/selftests/timers/raw_skew.c5
-rw-r--r--verity_dev_keys.x50924
370 files changed, 9159 insertions, 4481 deletions
diff --git a/Documentation/ABI/obsolete/sysfs-block-zram b/Documentation/ABI/obsolete/sysfs-block-zram
deleted file mode 100644
index 720ea92cfb2e..000000000000
--- a/Documentation/ABI/obsolete/sysfs-block-zram
+++ /dev/null
@@ -1,119 +0,0 @@
-What: /sys/block/zram<id>/num_reads
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The num_reads file is read-only and specifies the number of
- reads (failed or successful) done on this device.
- Now accessible via zram<id>/stat node.
-
-What: /sys/block/zram<id>/num_writes
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The num_writes file is read-only and specifies the number of
- writes (failed or successful) done on this device.
- Now accessible via zram<id>/stat node.
-
-What: /sys/block/zram<id>/invalid_io
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The invalid_io file is read-only and specifies the number of
- non-page-size-aligned I/O requests issued to this device.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/failed_reads
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_reads file is read-only and specifies the number of
- failed reads happened on this device.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/failed_writes
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_writes file is read-only and specifies the number of
- failed writes happened on this device.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/notify_free
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The notify_free file is read-only. Depending on device usage
- scenario it may account a) the number of pages freed because
- of swap slot free notifications or b) the number of pages freed
- because of REQ_DISCARD requests sent by bio. The former ones
- are sent to a swap block device when a swap slot is freed, which
- implies that this disk is being used as a swap disk. The latter
- ones are sent by filesystem mounted with discard option,
- whenever some data blocks are getting discarded.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/zero_pages
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The zero_pages file is read-only and specifies number of zero
- filled pages written to this disk. No memory is allocated for
- such pages.
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/orig_data_size
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The orig_data_size file is read-only and specifies uncompressed
- size of data stored in this disk. This excludes zero-filled
- pages (zero_pages) since no memory is allocated for them.
- Unit: bytes
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/compr_data_size
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The compr_data_size file is read-only and specifies compressed
- size of data stored in this disk. So, compression ratio can be
- calculated using orig_data_size and this statistic.
- Unit: bytes
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/mem_used_total
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The mem_used_total file is read-only and specifies the amount
- of memory, including allocator fragmentation and metadata
- overhead, allocated for this disk. So, allocator space
- efficiency can be calculated using compr_data_size and this
- statistic.
- Unit: bytes
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/mem_used_max
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The mem_used_max file is read/write and specifies the amount
- of maximum memory zram have consumed to store compressed data.
- For resetting the value, you should write "0". Otherwise,
- you could see -EINVAL.
- Unit: bytes
- Downgraded to write-only node: so it's possible to set new
- value only; its current value is stored in zram<id>/mm_stat
- node.
-
-What: /sys/block/zram<id>/mem_limit
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The mem_limit file is read/write and specifies the maximum
- amount of memory ZRAM can use to store the compressed data.
- The limit could be changed in run time and "0" means disable
- the limit. No limit is the initial state. Unit: bytes
- Downgraded to write-only node: so it's possible to set new
- value only; its current value is stored in zram<id>/mm_stat
- node.
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 2e69e83bf510..c1513c756af1 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -22,41 +22,6 @@ Description:
device. The reset operation frees all the memory associated
with this device.
-What: /sys/block/zram<id>/num_reads
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The num_reads file is read-only and specifies the number of
- reads (failed or successful) done on this device.
-
-What: /sys/block/zram<id>/num_writes
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The num_writes file is read-only and specifies the number of
- writes (failed or successful) done on this device.
-
-What: /sys/block/zram<id>/invalid_io
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The invalid_io file is read-only and specifies the number of
- non-page-size-aligned I/O requests issued to this device.
-
-What: /sys/block/zram<id>/failed_reads
-Date: February 2014
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_reads file is read-only and specifies the number of
- failed reads happened on this device.
-
-What: /sys/block/zram<id>/failed_writes
-Date: February 2014
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_writes file is read-only and specifies the number of
- failed writes happened on this device.
-
What: /sys/block/zram<id>/max_comp_streams
Date: February 2014
Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@@ -73,74 +38,24 @@ Description:
available and selected compression algorithms, change
compression algorithm selection.
-What: /sys/block/zram<id>/notify_free
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The notify_free file is read-only. Depending on device usage
- scenario it may account a) the number of pages freed because
- of swap slot free notifications or b) the number of pages freed
- because of REQ_DISCARD requests sent by bio. The former ones
- are sent to a swap block device when a swap slot is freed, which
- implies that this disk is being used as a swap disk. The latter
- ones are sent by filesystem mounted with discard option,
- whenever some data blocks are getting discarded.
-
-What: /sys/block/zram<id>/zero_pages
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The zero_pages file is read-only and specifies number of zero
- filled pages written to this disk. No memory is allocated for
- such pages.
-
-What: /sys/block/zram<id>/orig_data_size
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The orig_data_size file is read-only and specifies uncompressed
- size of data stored in this disk. This excludes zero-filled
- pages (zero_pages) since no memory is allocated for them.
- Unit: bytes
-
-What: /sys/block/zram<id>/compr_data_size
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The compr_data_size file is read-only and specifies compressed
- size of data stored in this disk. So, compression ratio can be
- calculated using orig_data_size and this statistic.
- Unit: bytes
-
-What: /sys/block/zram<id>/mem_used_total
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The mem_used_total file is read-only and specifies the amount
- of memory, including allocator fragmentation and metadata
- overhead, allocated for this disk. So, allocator space
- efficiency can be calculated using compr_data_size and this
- statistic.
- Unit: bytes
-
What: /sys/block/zram<id>/mem_used_max
Date: August 2014
Contact: Minchan Kim <minchan@kernel.org>
Description:
- The mem_used_max file is read/write and specifies the amount
- of maximum memory zram have consumed to store compressed data.
- For resetting the value, you should write "0". Otherwise,
- you could see -EINVAL.
+ The mem_used_max file is write-only and is used to reset
+ the counter of maximum memory zram have consumed to store
+ compressed data. For resetting the value, you should write
+ "0". Otherwise, you could see -EINVAL.
Unit: bytes
What: /sys/block/zram<id>/mem_limit
Date: August 2014
Contact: Minchan Kim <minchan@kernel.org>
Description:
- The mem_limit file is read/write and specifies the maximum
- amount of memory ZRAM can use to store the compressed data. The
- limit could be changed in run time and "0" means disable the
- limit. No limit is the initial state. Unit: bytes
+ The mem_limit file is write-only and specifies the maximum
+ amount of memory ZRAM can use to store the compressed data.
+ The limit could be changed in run time and "0" means disable
+ the limit. No limit is the initial state. Unit: bytes
What: /sys/block/zram<id>/compact
Date: August 2015
@@ -166,3 +81,20 @@ Description:
The mm_stat file is read-only and represents device's mm
statistics (orig_data_size, compr_data_size, etc.) in a format
similar to block layer statistics file format.
+
+What: /sys/block/zram<id>/debug_stat
+Date: July 2016
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The debug_stat file is read-only and represents various
+ device's debugging info useful for kernel developers. Its
+ format is not documented intentionally and may change
+ anytime without any notice.
+
+What: /sys/block/zram<id>/backing_dev
+Date: June 2017
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The backing_dev file is read-write and set up backing
+ device for zram to write incompressible pages.
+ For using, user should enable CONFIG_ZRAM_WRITEBACK.
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index f82da9bbb1fd..3bbb9fe9548c 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,14 @@ Description:
Controls the dirty page count condition for the in-place-update
policies.
+What: /sys/fs/f2fs/<disk>/min_seq_blocks
+Date: August 2018
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the dirty page count condition for batched sequential
+ writes in ->writepages.
+
+
What: /sys/fs/f2fs/<disk>/min_hot_blocks
Date: March 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 5bda5031c83d..875b2b56b87f 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -59,34 +59,23 @@ num_devices parameter is optional and tells zram how many devices should be
pre-created. Default: 1.
2) Set max number of compression streams
- Compression backend may use up to max_comp_streams compression streams,
- thus allowing up to max_comp_streams concurrent compression operations.
- By default, compression backend uses single compression stream.
-
- Examples:
- #show max compression streams number
+Regardless the value passed to this attribute, ZRAM will always
+allocate multiple compression streams - one per online CPUs - thus
+allowing several concurrent compression operations. The number of
+allocated compression streams goes down when some of the CPUs
+become offline. There is no single-compression-stream mode anymore,
+unless you are running a UP system or has only 1 CPU online.
+
+To find out how many streams are currently available:
cat /sys/block/zram0/max_comp_streams
- #set max compression streams number to 3
- echo 3 > /sys/block/zram0/max_comp_streams
-
-Note:
-In order to enable compression backend's multi stream support max_comp_streams
-must be initially set to desired concurrency level before ZRAM device
-initialisation. Once the device initialised as a single stream compression
-backend (max_comp_streams equals to 1), you will see error if you try to change
-the value of max_comp_streams because single stream compression backend
-implemented as a special case by lock overhead issue and does not support
-dynamic max_comp_streams. Only multi stream backend supports dynamic
-max_comp_streams adjustment.
-
3) Select compression algorithm
- Using comp_algorithm device attribute one can see available and
- currently selected (shown in square brackets) compression algorithms,
- change selected compression algorithm (once the device is initialised
- there is no way to change compression algorithm).
+Using comp_algorithm device attribute one can see available and
+currently selected (shown in square brackets) compression algorithms,
+change selected compression algorithm (once the device is initialised
+there is no way to change compression algorithm).
- Examples:
+Examples:
#show supported compression algorithms
cat /sys/block/zram0/comp_algorithm
lzo [lz4]
@@ -94,17 +83,27 @@ max_comp_streams adjustment.
#select lzo compression algorithm
echo lzo > /sys/block/zram0/comp_algorithm
+For the time being, the `comp_algorithm' content does not necessarily
+show every compression algorithm supported by the kernel. We keep this
+list primarily to simplify device configuration and one can configure
+a new device with a compression algorithm that is not listed in
+`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API
+and, if some of the algorithms were built as modules, it's impossible
+to list all of them using, for instance, /proc/crypto or any other
+method. This, however, has an advantage of permitting the usage of
+custom crypto compression modules (implementing S/W or H/W compression).
+
4) Set Disksize
- Set disk size by writing the value to sysfs node 'disksize'.
- The value can be either in bytes or you can use mem suffixes.
- Examples:
- # Initialize /dev/zram0 with 50MB disksize
- echo $((50*1024*1024)) > /sys/block/zram0/disksize
+Set disk size by writing the value to sysfs node 'disksize'.
+The value can be either in bytes or you can use mem suffixes.
+Examples:
+ # Initialize /dev/zram0 with 50MB disksize
+ echo $((50*1024*1024)) > /sys/block/zram0/disksize
- # Using mem suffixes
- echo 256K > /sys/block/zram0/disksize
- echo 512M > /sys/block/zram0/disksize
- echo 1G > /sys/block/zram0/disksize
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/disksize
+ echo 512M > /sys/block/zram0/disksize
+ echo 1G > /sys/block/zram0/disksize
Note:
There is little point creating a zram of greater than twice the size of memory
@@ -112,20 +111,20 @@ since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
size of the disk when not in use so a huge zram is wasteful.
5) Set memory limit: Optional
- Set memory limit by writing the value to sysfs node 'mem_limit'.
- The value can be either in bytes or you can use mem suffixes.
- In addition, you could change the value in runtime.
- Examples:
- # limit /dev/zram0 with 50MB memory
- echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+Set memory limit by writing the value to sysfs node 'mem_limit'.
+The value can be either in bytes or you can use mem suffixes.
+In addition, you could change the value in runtime.
+Examples:
+ # limit /dev/zram0 with 50MB memory
+ echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
- # Using mem suffixes
- echo 256K > /sys/block/zram0/mem_limit
- echo 512M > /sys/block/zram0/mem_limit
- echo 1G > /sys/block/zram0/mem_limit
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/mem_limit
+ echo 512M > /sys/block/zram0/mem_limit
+ echo 1G > /sys/block/zram0/mem_limit
- # To disable memory limit
- echo 0 > /sys/block/zram0/mem_limit
+ # To disable memory limit
+ echo 0 > /sys/block/zram0/mem_limit
6) Activate:
mkswap /dev/zram0
@@ -162,41 +161,15 @@ Name access description
disksize RW show and set the device's disk size
initstate RO shows the initialization state of the device
reset WO trigger device reset
-num_reads RO the number of reads
-failed_reads RO the number of failed reads
-num_write RO the number of writes
-failed_writes RO the number of failed writes
-invalid_io RO the number of non-page-size-aligned I/O requests
+mem_used_max WO reset the `mem_used_max' counter (see later)
+mem_limit WO specifies the maximum amount of memory ZRAM can use
+ to store the compressed data
max_comp_streams RW the number of possible concurrent compress operations
comp_algorithm RW show and change the compression algorithm
-notify_free RO the number of notifications to free pages (either
- slot free notifications or REQ_DISCARD requests)
-zero_pages RO the number of zero filled pages written to this disk
-orig_data_size RO uncompressed size of data stored in this disk
-compr_data_size RO compressed size of data stored in this disk
-mem_used_total RO the amount of memory allocated for this disk
-mem_used_max RW the maximum amount of memory zram have consumed to
- store the data (to reset this counter to the actual
- current value, write 1 to this attribute)
-mem_limit RW the maximum amount of memory ZRAM can use to store
- the compressed data
-pages_compacted RO the number of pages freed during compaction
- (available only via zram<id>/mm_stat node)
compact WO trigger memory compaction
+debug_stat RO this file is used for zram debugging purposes
+backing_dev RW set up backend storage for zram to write out
-WARNING
-=======
-per-stat sysfs attributes are considered to be deprecated.
-The basic strategy is:
--- the existing RW nodes will be downgraded to WO nodes (in linux 4.11)
--- deprecated RO sysfs nodes will eventually be removed (in linux 4.11)
-
-The list of deprecated attributes can be found here:
-Documentation/ABI/obsolete/sysfs-block-zram
-
-Basically, every attribute that has its own read accessible sysfs node
-(e.g. num_reads) *AND* is accessible via one of the stat files (zram<id>/stat
-or zram<id>/io_stat or zram<id>/mm_stat) is considered to be deprecated.
User space is advised to use the following files to read the device statistics.
@@ -211,22 +184,41 @@ The stat file represents device's I/O statistics not accounted by block
layer and, thus, not available in zram<id>/stat file. It consists of a
single line of text and contains the following stats separated by
whitespace:
- failed_reads
- failed_writes
- invalid_io
- notify_free
+ failed_reads the number of failed reads
+ failed_writes the number of failed writes
+ invalid_io the number of non-page-size-aligned I/O requests
+ notify_free Depending on device usage scenario it may account
+ a) the number of pages freed because of swap slot free
+ notifications or b) the number of pages freed because of
+ REQ_DISCARD requests sent by bio. The former ones are
+ sent to a swap block device when a swap slot is freed,
+ which implies that this disk is being used as a swap disk.
+ The latter ones are sent by filesystem mounted with
+ discard option, whenever some data blocks are getting
+ discarded.
File /sys/block/zram<id>/mm_stat
The stat file represents device's mm statistics. It consists of a single
line of text and contains the following stats separated by whitespace:
- orig_data_size
- compr_data_size
- mem_used_total
- mem_limit
- mem_used_max
- zero_pages
- num_migrated
+ orig_data_size uncompressed size of data stored in this disk.
+ This excludes same-element-filled pages (same_pages) since
+ no memory is allocated for them.
+ Unit: bytes
+ compr_data_size compressed size of data stored in this disk
+ mem_used_total the amount of memory allocated for this disk. This
+ includes allocator fragmentation and metadata overhead,
+ allocated for this disk. So, allocator space efficiency
+ can be calculated using compr_data_size and this statistic.
+ Unit: bytes
+ mem_limit the maximum amount of memory ZRAM can use to store
+ the compressed data
+ mem_used_max the maximum amount of memory zram have consumed to
+ store the data
+ same_pages the number of same element filled pages written to this disk.
+ No memory is allocated for such pages.
+ pages_compacted the number of pages freed during compaction
+ huge_pages the number of incompressible pages
9) Deactivate:
swapoff /dev/zram0
@@ -241,5 +233,39 @@ line of text and contains the following stats separated by whitespace:
resets the disksize to zero. You must set the disksize again
before reusing the device.
+* Optional Feature
+
+= writeback
+
+With incompressible pages, there is no memory saving with zram.
+Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page
+to backing storage rather than keeping it in memory.
+User should set up backing device via /sys/block/zramX/backing_dev
+before disksize setting.
+
+= memory tracking
+
+With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
+zram block. It could be useful to catch cold or incompressible
+pages of the process with*pagemap.
+If you enable the feature, you could see block state via
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
+
+ 300 75.033841 .wh
+ 301 63.806904 s..
+ 302 63.806919 ..h
+
+First column is zram's block index.
+Second column is access time since the system was booted
+Third column is state of the block.
+(s: same page
+w: written page to backing store
+h: huge page)
+
+First line of above example says 300th block is accessed at 75.033841sec
+and the block's state is huge so it is written back to the backing
+storage. It's a debugging feature so anyone shouldn't rely on it to work
+properly.
+
Nitin Gupta
ngupta@vflare.org
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index ecccb51c7279..0c8bdd38cefd 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -155,6 +155,26 @@ noinline_data Disable the inline data feature, inline data feature is
enabled by default.
data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
+fault_injection=%d Enable fault injection in all supported types with
+ specified injection rate.
+fault_type=%d Support configuring fault injection type, should be
+ enabled with fault_injection option, fault type value
+ is shown below, it supports single or combined type.
+ Type_Name Type_Value
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
new file mode 100644
index 000000000000..48b424de85bb
--- /dev/null
+++ b/Documentation/filesystems/fscrypt.rst
@@ -0,0 +1,626 @@
+=====================================
+Filesystem-level encryption (fscrypt)
+=====================================
+
+Introduction
+============
+
+fscrypt is a library which filesystems can hook into to support
+transparent encryption of files and directories.
+
+Note: "fscrypt" in this document refers to the kernel-level portion,
+implemented in ``fs/crypto/``, as opposed to the userspace tool
+`fscrypt <https://github.com/google/fscrypt>`_. This document only
+covers the kernel-level portion. For command-line examples of how to
+use encryption, see the documentation for the userspace tool `fscrypt
+<https://github.com/google/fscrypt>`_. Also, it is recommended to use
+the fscrypt userspace tool, or other existing userspace tools such as
+`fscryptctl <https://github.com/google/fscryptctl>`_ or `Android's key
+management system
+<https://source.android.com/security/encryption/file-based>`_, over
+using the kernel's API directly. Using existing tools reduces the
+chance of introducing your own security bugs. (Nevertheless, for
+completeness this documentation covers the kernel's API anyway.)
+
+Unlike dm-crypt, fscrypt operates at the filesystem level rather than
+at the block device level. This allows it to encrypt different files
+with different keys and to have unencrypted files on the same
+filesystem. This is useful for multi-user systems where each user's
+data-at-rest needs to be cryptographically isolated from the others.
+However, except for filenames, fscrypt does not encrypt filesystem
+metadata.
+
+Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated
+directly into supported filesystems --- currently ext4, F2FS, and
+UBIFS. This allows encrypted files to be read and written without
+caching both the decrypted and encrypted pages in the pagecache,
+thereby nearly halving the memory used and bringing it in line with
+unencrypted files. Similarly, half as many dentries and inodes are
+needed. eCryptfs also limits encrypted filenames to 143 bytes,
+causing application compatibility issues; fscrypt allows the full 255
+bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API can be
+used by unprivileged users, with no need to mount anything.
+
+fscrypt does not support encrypting files in-place. Instead, it
+supports marking an empty directory as encrypted. Then, after
+userspace provides the key, all regular files, directories, and
+symbolic links created in that directory tree are transparently
+encrypted.
+
+Threat model
+============
+
+Offline attacks
+---------------
+
+Provided that userspace chooses a strong encryption key, fscrypt
+protects the confidentiality of file contents and filenames in the
+event of a single point-in-time permanent offline compromise of the
+block device content. fscrypt does not protect the confidentiality of
+non-filename metadata, e.g. file sizes, file permissions, file
+timestamps, and extended attributes. Also, the existence and location
+of holes (unallocated blocks which logically contain all zeroes) in
+files is not protected.
+
+fscrypt is not guaranteed to protect confidentiality or authenticity
+if an attacker is able to manipulate the filesystem offline prior to
+an authorized user later accessing the filesystem.
+
+Online attacks
+--------------
+
+fscrypt (and storage encryption in general) can only provide limited
+protection, if any at all, against online attacks. In detail:
+
+fscrypt is only resistant to side-channel attacks, such as timing or
+electromagnetic attacks, to the extent that the underlying Linux
+Cryptographic API algorithms are. If a vulnerable algorithm is used,
+such as a table-based implementation of AES, it may be possible for an
+attacker to mount a side channel attack against the online system.
+Side channel attacks may also be mounted against applications
+consuming decrypted data.
+
+After an encryption key has been provided, fscrypt is not designed to
+hide the plaintext file contents or filenames from other users on the
+same system, regardless of the visibility of the keyring key.
+Instead, existing access control mechanisms such as file mode bits,
+POSIX ACLs, LSMs, or mount namespaces should be used for this purpose.
+Also note that as long as the encryption keys are *anywhere* in
+memory, an online attacker can necessarily compromise them by mounting
+a physical attack or by exploiting any kernel security vulnerability
+which provides an arbitrary memory read primitive.
+
+While it is ostensibly possible to "evict" keys from the system,
+recently accessed encrypted files will remain accessible at least
+until the filesystem is unmounted or the VFS caches are dropped, e.g.
+using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the
+RAM is compromised before being powered off, it will likely still be
+possible to recover portions of the plaintext file contents, if not
+some of the encryption keys as well. (Since Linux v4.12, all
+in-kernel keys related to fscrypt are sanitized before being freed.
+However, userspace would need to do its part as well.)
+
+Currently, fscrypt does not prevent a user from maliciously providing
+an incorrect key for another user's existing encrypted files. A
+protection against this is planned.
+
+Key hierarchy
+=============
+
+Master Keys
+-----------
+
+Each encrypted directory tree is protected by a *master key*. Master
+keys can be up to 64 bytes long, and must be at least as long as the
+greater of the key length needed by the contents and filenames
+encryption modes being used. For example, if AES-256-XTS is used for
+contents encryption, the master key must be 64 bytes (512 bits). Note
+that the XTS mode is defined to require a key twice as long as that
+required by the underlying block cipher.
+
+To "unlock" an encrypted directory tree, userspace must provide the
+appropriate master key. There can be any number of master keys, each
+of which protects any number of directory trees on any number of
+filesystems.
+
+Userspace should generate master keys either using a cryptographically
+secure random number generator, or by using a KDF (Key Derivation
+Function). Note that whenever a KDF is used to "stretch" a
+lower-entropy secret such as a passphrase, it is critical that a KDF
+designed for this purpose be used, such as scrypt, PBKDF2, or Argon2.
+
+Per-file keys
+-------------
+
+Master keys are not used to encrypt file contents or names directly.
+Instead, a unique key is derived for each encrypted file, including
+each regular file, directory, and symbolic link. This has several
+advantages:
+
+- In cryptosystems, the same key material should never be used for
+ different purposes. Using the master key as both an XTS key for
+ contents encryption and as a CTS-CBC key for filenames encryption
+ would violate this rule.
+- Per-file keys simplify the choice of IVs (Initialization Vectors)
+ for contents encryption. Without per-file keys, to ensure IV
+ uniqueness both the inode and logical block number would need to be
+ encoded in the IVs. This would make it impossible to renumber
+ inodes, which e.g. ``resize2fs`` can do when resizing an ext4
+ filesystem. With per-file keys, it is sufficient to encode just the
+ logical block number in the IVs.
+- Per-file keys strengthen the encryption of filenames, where IVs are
+ reused out of necessity. With a unique key per directory, IV reuse
+ is limited to within a single directory.
+- Per-file keys allow individual files to be securely erased simply by
+ securely erasing their keys. (Not yet implemented.)
+
+A KDF (Key Derivation Function) is used to derive per-file keys from
+the master key. This is done instead of wrapping a randomly-generated
+key for each file because it reduces the size of the encryption xattr,
+which for some filesystems makes the xattr more likely to fit in-line
+in the filesystem's inode table. With a KDF, only a 16-byte nonce is
+required --- long enough to make key reuse extremely unlikely. A
+wrapped key, on the other hand, would need to be up to 64 bytes ---
+the length of an AES-256-XTS key. Furthermore, currently there is no
+requirement to support unlocking a file with multiple alternative
+master keys or to support rotating master keys. Instead, the master
+keys may be wrapped in userspace, e.g. as done by the `fscrypt
+<https://github.com/google/fscrypt>`_ tool.
+
+The current KDF encrypts the master key using the 16-byte nonce as an
+AES-128-ECB key. The output is used as the derived key. If the
+output is longer than needed, then it is truncated to the needed
+length. Truncation is the norm for directories and symlinks, since
+those use the CTS-CBC encryption mode which requires a key half as
+long as that required by the XTS encryption mode.
+
+Note: this KDF meets the primary security requirement, which is to
+produce unique derived keys that preserve the entropy of the master
+key, assuming that the master key is already a good pseudorandom key.
+However, it is nonstandard and has some problems such as being
+reversible, so it is generally considered to be a mistake! It may be
+replaced with HKDF or another more standard KDF in the future.
+
+Encryption modes and usage
+==========================
+
+fscrypt allows one encryption mode to be specified for file contents
+and one encryption mode to be specified for filenames. Different
+directory trees are permitted to use different encryption modes.
+Currently, the following pairs of encryption modes are supported:
+
+- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
+- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
+
+It is strongly recommended to use AES-256-XTS for contents encryption.
+AES-128-CBC was added only for low-powered embedded devices with
+crypto accelerators such as CAAM or CESA that do not support XTS.
+
+Similarly, Speck128/256 support was only added for older or low-end
+CPUs which cannot do AES fast enough -- especially ARM CPUs which have
+NEON instructions but not the Cryptography Extensions -- and for which
+it would not otherwise be feasible to use encryption at all. It is
+not recommended to use Speck on CPUs that have AES instructions.
+Speck support is only available if it has been enabled in the crypto
+API via CONFIG_CRYPTO_SPECK. Also, on ARM platforms, to get
+acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
+
+New encryption modes can be added relatively easily, without changes
+to individual filesystems. However, authenticated encryption (AE)
+modes are not currently supported because of the difficulty of dealing
+with ciphertext expansion.
+
+For file contents, each filesystem block is encrypted independently.
+Currently, only the case where the filesystem block size is equal to
+the system's page size (usually 4096 bytes) is supported. With the
+XTS mode of operation (recommended), the logical block number within
+the file is used as the IV. With the CBC mode of operation (not
+recommended), ESSIV is used; specifically, the IV for CBC is the
+logical block number encrypted with AES-256, where the AES-256 key is
+the SHA-256 hash of the inode's data encryption key.
+
+For filenames, the full filename is encrypted at once. Because of the
+requirements to retain support for efficient directory lookups and
+filenames of up to 255 bytes, a constant initialization vector (IV) is
+used. However, each encrypted directory uses a unique key, which
+limits IV reuse to within a single directory. Note that IV reuse in
+the context of CTS-CBC encryption means that when the original
+filenames share a common prefix at least as long as the cipher block
+size (16 bytes for AES), the corresponding encrypted filenames will
+also share a common prefix. This is undesirable; it may be fixed in
+the future by switching to an encryption mode that is a strong
+pseudorandom permutation on arbitrary-length messages, e.g. the HEH
+(Hash-Encrypt-Hash) mode.
+
+Since filenames are encrypted with the CTS-CBC mode of operation, the
+plaintext and ciphertext filenames need not be multiples of the AES
+block size, i.e. 16 bytes. However, the minimum size that can be
+encrypted is 16 bytes, so shorter filenames are NUL-padded to 16 bytes
+before being encrypted. In addition, to reduce leakage of filename
+lengths via their ciphertexts, all filenames are NUL-padded to the
+next 4, 8, 16, or 32-byte boundary (configurable). 32 is recommended
+since this provides the best confidentiality, at the cost of making
+directory entries consume slightly more space. Note that since NUL
+(``\0``) is not otherwise a valid character in filenames, the padding
+will never produce duplicate plaintexts.
+
+Symbolic link targets are considered a type of filename and are
+encrypted in the same way as filenames in directory entries. Each
+symlink also uses a unique key; hence, the hardcoded IV is not a
+problem for symlinks.
+
+User API
+========
+
+Setting an encryption policy
+----------------------------
+
+The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an
+empty directory or verifies that a directory or regular file already
+has the specified encryption policy. It takes in a pointer to a
+:c:type:`struct fscrypt_policy`, defined as follows::
+
+ #define FS_KEY_DESCRIPTOR_SIZE 8
+
+ struct fscrypt_policy {
+ __u8 version;
+ __u8 contents_encryption_mode;
+ __u8 filenames_encryption_mode;
+ __u8 flags;
+ __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ };
+
+This structure must be initialized as follows:
+
+- ``version`` must be 0.
+
+- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
+ be set to constants from ``<linux/fs.h>`` which identify the
+ encryption modes to use. If unsure, use
+ FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode``
+ and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
+ ``filenames_encryption_mode``.
+
+- ``flags`` must be set to a value from ``<linux/fs.h>`` which
+ identifies the amount of NUL-padding to use when encrypting
+ filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
+
+- ``master_key_descriptor`` specifies how to find the master key in
+ the keyring; see `Adding keys`_. It is up to userspace to choose a
+ unique ``master_key_descriptor`` for each master key. The e4crypt
+ and fscrypt tools use the first 8 bytes of
+ ``SHA-512(SHA-512(master_key))``, but this particular scheme is not
+ required. Also, the master key need not be in the keyring yet when
+ FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added
+ before any files can be created in the encrypted directory.
+
+If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY
+verifies that the file is an empty directory. If so, the specified
+encryption policy is assigned to the directory, turning it into an
+encrypted directory. After that, and after providing the
+corresponding master key as described in `Adding keys`_, all regular
+files, directories (recursively), and symlinks created in the
+directory will be encrypted, inheriting the same encryption policy.
+The filenames in the directory's entries will be encrypted as well.
+
+Alternatively, if the file is already encrypted, then
+FS_IOC_SET_ENCRYPTION_POLICY validates that the specified encryption
+policy exactly matches the actual one. If they match, then the ioctl
+returns 0. Otherwise, it fails with EEXIST. This works on both
+regular files and directories, including nonempty directories.
+
+Note that the ext4 filesystem does not allow the root directory to be
+encrypted, even if it is empty. Users who want to encrypt an entire
+filesystem with one key should consider using dm-crypt instead.
+
+FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
+
+- ``EACCES``: the file is not owned by the process's uid, nor does the
+ process have the CAP_FOWNER capability in a namespace with the file
+ owner's uid mapped
+- ``EEXIST``: the file is already encrypted with an encryption policy
+ different from the one specified
+- ``EINVAL``: an invalid encryption policy was specified (invalid
+ version, mode(s), or flags)
+- ``ENOTDIR``: the file is unencrypted and is a regular file, not a
+ directory
+- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory
+- ``ENOTTY``: this type of filesystem does not implement encryption
+- ``EOPNOTSUPP``: the kernel was not configured with encryption
+ support for this filesystem, or the filesystem superblock has not
+ had encryption enabled on it. (For example, to use encryption on an
+ ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the
+ kernel config, and the superblock must have had the "encrypt"
+ feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O
+ encrypt``.)
+- ``EPERM``: this directory may not be encrypted, e.g. because it is
+ the root directory of an ext4 filesystem
+- ``EROFS``: the filesystem is readonly
+
+Getting an encryption policy
+----------------------------
+
+The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct
+fscrypt_policy`, if any, for a directory or regular file. See above
+for the struct definition. No additional permissions are required
+beyond the ability to open the file.
+
+FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors:
+
+- ``EINVAL``: the file is encrypted, but it uses an unrecognized
+ encryption context format
+- ``ENODATA``: the file is not encrypted
+- ``ENOTTY``: this type of filesystem does not implement encryption
+- ``EOPNOTSUPP``: the kernel was not configured with encryption
+ support for this filesystem
+
+Note: if you only need to know whether a file is encrypted or not, on
+most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl
+and check for FS_ENCRYPT_FL, or to use the statx() system call and
+check for STATX_ATTR_ENCRYPTED in stx_attributes.
+
+Getting the per-filesystem salt
+-------------------------------
+
+Some filesystems, such as ext4 and F2FS, also support the deprecated
+ioctl FS_IOC_GET_ENCRYPTION_PWSALT. This ioctl retrieves a randomly
+generated 16-byte value stored in the filesystem superblock. This
+value is intended to used as a salt when deriving an encryption key
+from a passphrase or other low-entropy user credential.
+
+FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to
+generate and manage any needed salt(s) in userspace.
+
+Adding keys
+-----------
+
+To provide a master key, userspace must add it to an appropriate
+keyring using the add_key() system call (see:
+``Documentation/security/keys/core.rst``). The key type must be
+"logon"; keys of this type are kept in kernel memory and cannot be
+read back by userspace. The key description must be "fscrypt:"
+followed by the 16-character lower case hex representation of the
+``master_key_descriptor`` that was set in the encryption policy. The
+key payload must conform to the following structure::
+
+ #define FS_MAX_KEY_SIZE 64
+
+ struct fscrypt_key {
+ u32 mode;
+ u8 raw[FS_MAX_KEY_SIZE];
+ u32 size;
+ };
+
+``mode`` is ignored; just set it to 0. The actual key is provided in
+``raw`` with ``size`` indicating its size in bytes. That is, the
+bytes ``raw[0..size-1]`` (inclusive) are the actual key.
+
+The key description prefix "fscrypt:" may alternatively be replaced
+with a filesystem-specific prefix such as "ext4:". However, the
+filesystem-specific prefixes are deprecated and should not be used in
+new programs.
+
+There are several different types of keyrings in which encryption keys
+may be placed, such as a session keyring, a user session keyring, or a
+user keyring. Each key must be placed in a keyring that is "attached"
+to all processes that might need to access files encrypted with it, in
+the sense that request_key() will find the key. Generally, if only
+processes belonging to a specific user need to access a given
+encrypted directory and no session keyring has been installed, then
+that directory's key should be placed in that user's user session
+keyring or user keyring. Otherwise, a session keyring should be
+installed if needed, and the key should be linked into that session
+keyring, or in a keyring linked into that session keyring.
+
+Note: introducing the complex visibility semantics of keyrings here
+was arguably a mistake --- especially given that by design, after any
+process successfully opens an encrypted file (thereby setting up the
+per-file key), possessing the keyring key is not actually required for
+any process to read/write the file until its in-memory inode is
+evicted. In the future there probably should be a way to provide keys
+directly to the filesystem instead, which would make the intended
+semantics clearer.
+
+Access semantics
+================
+
+With the key
+------------
+
+With the encryption key, encrypted regular files, directories, and
+symlinks behave very similarly to their unencrypted counterparts ---
+after all, the encryption is intended to be transparent. However,
+astute users may notice some differences in behavior:
+
+- Unencrypted files, or files encrypted with a different encryption
+ policy (i.e. different key, modes, or flags), cannot be renamed or
+ linked into an encrypted directory; see `Encryption policy
+ enforcement`_. Attempts to do so will fail with EPERM. However,
+ encrypted files can be renamed within an encrypted directory, or
+ into an unencrypted directory.
+
+- Direct I/O is not supported on encrypted files. Attempts to use
+ direct I/O on such files will fall back to buffered I/O.
+
+- The fallocate operations FALLOC_FL_COLLAPSE_RANGE,
+ FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported
+ on encrypted files and will fail with EOPNOTSUPP.
+
+- Online defragmentation of encrypted files is not supported. The
+ EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with
+ EOPNOTSUPP.
+
+- The ext4 filesystem does not support data journaling with encrypted
+ regular files. It will fall back to ordered data mode instead.
+
+- DAX (Direct Access) is not supported on encrypted files.
+
+- The st_size of an encrypted symlink will not necessarily give the
+ length of the symlink target as required by POSIX. It will actually
+ give the length of the ciphertext, which will be slightly longer
+ than the plaintext due to NUL-padding and an extra 2-byte overhead.
+
+- The maximum length of an encrypted symlink is 2 bytes shorter than
+ the maximum length of an unencrypted symlink. For example, on an
+ EXT4 filesystem with a 4K block size, unencrypted symlinks can be up
+ to 4095 bytes long, while encrypted symlinks can only be up to 4093
+ bytes long (both lengths excluding the terminating null).
+
+Note that mmap *is* supported. This is possible because the pagecache
+for an encrypted file contains the plaintext, not the ciphertext.
+
+Without the key
+---------------
+
+Some filesystem operations may be performed on encrypted regular
+files, directories, and symlinks even before their encryption key has
+been provided:
+
+- File metadata may be read, e.g. using stat().
+
+- Directories may be listed, in which case the filenames will be
+ listed in an encoded form derived from their ciphertext. The
+ current encoding algorithm is described in `Filename hashing and
+ encoding`_. The algorithm is subject to change, but it is
+ guaranteed that the presented filenames will be no longer than
+ NAME_MAX bytes, will not contain the ``/`` or ``\0`` characters, and
+ will uniquely identify directory entries.
+
+ The ``.`` and ``..`` directory entries are special. They are always
+ present and are not encrypted or encoded.
+
+- Files may be deleted. That is, nondirectory files may be deleted
+ with unlink() as usual, and empty directories may be deleted with
+ rmdir() as usual. Therefore, ``rm`` and ``rm -r`` will work as
+ expected.
+
+- Symlink targets may be read and followed, but they will be presented
+ in encrypted form, similar to filenames in directories. Hence, they
+ are unlikely to point to anywhere useful.
+
+Without the key, regular files cannot be opened or truncated.
+Attempts to do so will fail with ENOKEY. This implies that any
+regular file operations that require a file descriptor, such as
+read(), write(), mmap(), fallocate(), and ioctl(), are also forbidden.
+
+Also without the key, files of any type (including directories) cannot
+be created or linked into an encrypted directory, nor can a name in an
+encrypted directory be the source or target of a rename, nor can an
+O_TMPFILE temporary file be created in an encrypted directory. All
+such operations will fail with ENOKEY.
+
+It is not currently possible to backup and restore encrypted files
+without the encryption key. This would require special APIs which
+have not yet been implemented.
+
+Encryption policy enforcement
+=============================
+
+After an encryption policy has been set on a directory, all regular
+files, directories, and symbolic links created in that directory
+(recursively) will inherit that encryption policy. Special files ---
+that is, named pipes, device nodes, and UNIX domain sockets --- will
+not be encrypted.
+
+Except for those special files, it is forbidden to have unencrypted
+files, or files encrypted with a different encryption policy, in an
+encrypted directory tree. Attempts to link or rename such a file into
+an encrypted directory will fail with EPERM. This is also enforced
+during ->lookup() to provide limited protection against offline
+attacks that try to disable or downgrade encryption in known locations
+where applications may later write sensitive data. It is recommended
+that systems implementing a form of "verified boot" take advantage of
+this by validating all top-level encryption policies prior to access.
+
+Implementation details
+======================
+
+Encryption context
+------------------
+
+An encryption policy is represented on-disk by a :c:type:`struct
+fscrypt_context`. It is up to individual filesystems to decide where
+to store it, but normally it would be stored in a hidden extended
+attribute. It should *not* be exposed by the xattr-related system
+calls such as getxattr() and setxattr() because of the special
+semantics of the encryption xattr. (In particular, there would be
+much confusion if an encryption policy were to be added to or removed
+from anything other than an empty directory.) The struct is defined
+as follows::
+
+ #define FS_KEY_DESCRIPTOR_SIZE 8
+ #define FS_KEY_DERIVATION_NONCE_SIZE 16
+
+ struct fscrypt_context {
+ u8 format;
+ u8 contents_encryption_mode;
+ u8 filenames_encryption_mode;
+ u8 flags;
+ u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+ };
+
+Note that :c:type:`struct fscrypt_context` contains the same
+information as :c:type:`struct fscrypt_policy` (see `Setting an
+encryption policy`_), except that :c:type:`struct fscrypt_context`
+also contains a nonce. The nonce is randomly generated by the kernel
+and is used to derive the inode's encryption key as described in
+`Per-file keys`_.
+
+Data path changes
+-----------------
+
+For the read path (->readpage()) of regular files, filesystems can
+read the ciphertext into the page cache and decrypt it in-place. The
+page lock must be held until decryption has finished, to prevent the
+page from becoming visible to userspace prematurely.
+
+For the write path (->writepage()) of regular files, filesystems
+cannot encrypt data in-place in the page cache, since the cached
+plaintext must be preserved. Instead, filesystems must encrypt into a
+temporary buffer or "bounce page", then write out the temporary
+buffer. Some filesystems, such as UBIFS, already use temporary
+buffers regardless of encryption. Other filesystems, such as ext4 and
+F2FS, have to allocate bounce pages specially for encryption.
+
+Filename hashing and encoding
+-----------------------------
+
+Modern filesystems accelerate directory lookups by using indexed
+directories. An indexed directory is organized as a tree keyed by
+filename hashes. When a ->lookup() is requested, the filesystem
+normally hashes the filename being looked up so that it can quickly
+find the corresponding directory entry, if any.
+
+With encryption, lookups must be supported and efficient both with and
+without the encryption key. Clearly, it would not work to hash the
+plaintext filenames, since the plaintext filenames are unavailable
+without the key. (Hashing the plaintext filenames would also make it
+impossible for the filesystem's fsck tool to optimize encrypted
+directories.) Instead, filesystems hash the ciphertext filenames,
+i.e. the bytes actually stored on-disk in the directory entries. When
+asked to do a ->lookup() with the key, the filesystem just encrypts
+the user-supplied name to get the ciphertext.
+
+Lookups without the key are more complicated. The raw ciphertext may
+contain the ``\0`` and ``/`` characters, which are illegal in
+filenames. Therefore, readdir() must base64-encode the ciphertext for
+presentation. For most filenames, this works fine; on ->lookup(), the
+filesystem just base64-decodes the user-supplied name to get back to
+the raw ciphertext.
+
+However, for very long filenames, base64 encoding would cause the
+filename length to exceed NAME_MAX. To prevent this, readdir()
+actually presents long filenames in an abbreviated form which encodes
+a strong "hash" of the ciphertext filename, along with the optional
+filesystem-specific hash(es) needed for directory lookups. This
+allows the filesystem to still, with a high degree of confidence, map
+the filename given in ->lookup() back to a particular directory entry
+that was previously listed by readdir(). See :c:type:`struct
+fscrypt_digested_name` in the source for more details.
+
+Note that the precise way that filenames are presented to userspace
+without the key is subject to change in the future. It is only meant
+as a way to temporarily present valid filenames so that commands like
+``rm -r`` work as expected on encrypted directories.
diff --git a/Makefile b/Makefile
index c033e03a5a33..774f3158681f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
-SUBLEVEL = 154
+SUBLEVEL = 159
EXTRAVERSION =
NAME = Blurry Fish Butt
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 63f06a2b1f7f..bbc7cb9faa01 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -526,24 +526,19 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
SYSCALL_DEFINE1(osf_utsname, char __user *, name)
{
int error;
+ char tmp[5 * 32];
down_read(&uts_sem);
- error = -EFAULT;
- if (copy_to_user(name + 0, utsname()->sysname, 32))
- goto out;
- if (copy_to_user(name + 32, utsname()->nodename, 32))
- goto out;
- if (copy_to_user(name + 64, utsname()->release, 32))
- goto out;
- if (copy_to_user(name + 96, utsname()->version, 32))
- goto out;
- if (copy_to_user(name + 128, utsname()->machine, 32))
- goto out;
+ memcpy(tmp + 0 * 32, utsname()->sysname, 32);
+ memcpy(tmp + 1 * 32, utsname()->nodename, 32);
+ memcpy(tmp + 2 * 32, utsname()->release, 32);
+ memcpy(tmp + 3 * 32, utsname()->version, 32);
+ memcpy(tmp + 4 * 32, utsname()->machine, 32);
+ up_read(&uts_sem);
- error = 0;
- out:
- up_read(&uts_sem);
- return error;
+ if (copy_to_user(name, tmp, sizeof(tmp)))
+ return -EFAULT;
+ return 0;
}
SYSCALL_DEFINE0(getpagesize)
@@ -561,24 +556,22 @@ SYSCALL_DEFINE0(getdtablesize)
*/
SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen)
{
- unsigned len;
- int i;
+ int len, err = 0;
+ char *kname;
+ char tmp[32];
- if (!access_ok(VERIFY_WRITE, name, namelen))
- return -EFAULT;
-
- len = namelen;
- if (len > 32)
- len = 32;
+ if (namelen < 0 || namelen > 32)
+ namelen = 32;
down_read(&uts_sem);
- for (i = 0; i < len; ++i) {
- __put_user(utsname()->domainname[i], name + i);
- if (utsname()->domainname[i] == '\0')
- break;
- }
+ kname = utsname()->domainname;
+ len = strnlen(kname, namelen);
+ len = min(len + 1, namelen);
+ memcpy(tmp, kname, len);
up_read(&uts_sem);
+ if (copy_to_user(name, tmp, len))
+ return -EFAULT;
return 0;
}
@@ -741,13 +734,14 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
};
unsigned long offset;
const char *res;
- long len, err = -EINVAL;
+ long len;
+ char tmp[__NEW_UTS_LEN + 1];
offset = command-1;
if (offset >= ARRAY_SIZE(sysinfo_table)) {
/* Digital UNIX has a few unpublished interfaces here */
printk("sysinfo(%d)", command);
- goto out;
+ return -EINVAL;
}
down_read(&uts_sem);
@@ -755,13 +749,11 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
len = strlen(res)+1;
if ((unsigned long)len > (unsigned long)count)
len = count;
- if (copy_to_user(buf, res, len))
- err = -EFAULT;
- else
- err = 0;
+ memcpy(tmp, res, len);
up_read(&uts_sem);
- out:
- return err;
+ if (copy_to_user(buf, tmp, len))
+ return -EFAULT;
+ return 0;
}
SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index f1ac9818b751..dbee1934dfc6 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -1,6 +1,5 @@
CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 323486d6ee83..561eac854cc3 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -1,6 +1,5 @@
CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 66191cd0447e..aa4f261b6508 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -1,6 +1,5 @@
CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi
index bb1ca158273c..1922e7a93e40 100644
--- a/arch/arm/boot/dts/tegra30-cardhu.dtsi
+++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi
@@ -201,6 +201,7 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0x70>;
+ reset-gpio = <&gpio TEGRA_GPIO(BB, 0) GPIO_ACTIVE_LOW>;
};
};
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index b3490c1c49d1..4187f69f6630 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -261,7 +261,6 @@ CONFIG_USB_STORAGE=y
CONFIG_USB_CHIPIDEA=y
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CHIPIDEA_HOST=y
-CONFIG_USB_CHIPIDEA_ULPI=y
CONFIG_USB_SERIAL=m
CONFIG_USB_SERIAL_GENERIC=y
CONFIG_USB_SERIAL_FTDI_SIO=m
@@ -288,7 +287,6 @@ CONFIG_USB_G_NCM=m
CONFIG_USB_GADGETFS=m
CONFIG_USB_MASS_STORAGE=m
CONFIG_USB_G_SERIAL=m
-CONFIG_USB_ULPI_BUS=y
CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 7da5503c0591..e08d15184056 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -117,6 +117,7 @@ static inline u32 gic_read_iar(void)
u32 irqstat;
asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
+ dsb(sy);
return irqstat;
}
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 30a7228eaceb..9440b320a8a3 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/unwind.h>
+#include <asm/memory.h>
#ifdef CONFIG_NEED_RET_TO_USER
#include <mach/entry-macro.S>
@@ -35,6 +36,9 @@ ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq_notrace @ disable interrupts
+ ldr r2, [tsk, #TI_ADDR_LIMIT]
+ cmp r2, #TASK_SIZE
+ blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
bne fast_work_pending
@@ -61,6 +65,9 @@ ret_fast_syscall:
UNWIND(.cantunwind )
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
disable_irq_notrace @ disable interrupts
+ ldr r2, [tsk, #TI_ADDR_LIMIT]
+ cmp r2, #TASK_SIZE
+ blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
beq no_work_pending
@@ -93,6 +100,9 @@ ENTRY(ret_to_user)
ret_slow_syscall:
disable_irq_notrace @ disable interrupts
ENTRY(ret_to_user_from_irq)
+ ldr r2, [tsk, #TI_ADDR_LIMIT]
+ cmp r2, #TASK_SIZE
+ blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS]
tst r1, #_TIF_WORK_MASK
bne slow_work_pending
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 7b8f2141427b..304e68408f9c 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/tracehook.h>
#include <linux/uprobes.h>
+#include <linux/syscalls.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
@@ -631,3 +632,9 @@ struct page *get_signal_page(void)
return page;
}
+
+/* Defer to generic check */
+asmlinkage void addr_limit_check_failed(void)
+{
+ addr_limit_user_check();
+}
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index c169cc3049aa..e8adb428dddb 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -260,6 +260,7 @@ static int __init exynos_pmu_irq_init(struct device_node *node,
NULL);
if (!domain) {
iounmap(pmu_base_addr);
+ pmu_base_addr = NULL;
return -ENOMEM;
}
diff --git a/arch/arm/mach-hisi/hotplug.c b/arch/arm/mach-hisi/hotplug.c
index a129aae72602..909bb2493781 100644
--- a/arch/arm/mach-hisi/hotplug.c
+++ b/arch/arm/mach-hisi/hotplug.c
@@ -148,13 +148,20 @@ static int hi3xxx_hotplug_init(void)
struct device_node *node;
node = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl");
- if (node) {
- ctrl_base = of_iomap(node, 0);
- id = HI3620_CTRL;
- return 0;
+ if (!node) {
+ id = ERROR_CTRL;
+ return -ENOENT;
}
- id = ERROR_CTRL;
- return -ENOENT;
+
+ ctrl_base = of_iomap(node, 0);
+ of_node_put(node);
+ if (!ctrl_base) {
+ id = ERROR_CTRL;
+ return -ENOMEM;
+ }
+
+ id = HI3620_CTRL;
+ return 0;
}
void hi3xxx_set_cpu(int cpu, bool enable)
@@ -173,11 +180,15 @@ static bool hix5hd2_hotplug_init(void)
struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "hisilicon,cpuctrl");
- if (np) {
- ctrl_base = of_iomap(np, 0);
- return true;
- }
- return false;
+ if (!np)
+ return false;
+
+ ctrl_base = of_iomap(np, 0);
+ of_node_put(np);
+ if (!ctrl_base)
+ return false;
+
+ return true;
}
void hix5hd2_set_cpu(int cpu, bool enable)
@@ -219,10 +230,10 @@ void hip01_set_cpu(int cpu, bool enable)
if (!ctrl_base) {
np = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl");
- if (np)
- ctrl_base = of_iomap(np, 0);
- else
- BUG();
+ BUG_ON(!np);
+ ctrl_base = of_iomap(np, 0);
+ of_node_put(np);
+ BUG_ON(!ctrl_base);
}
if (enable) {
diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
index 6b8abbe68746..3011c88bd2f3 100644
--- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
@@ -105,7 +105,7 @@
led@6 {
label = "apq8016-sbc:blue:bt";
gpios = <&pm8916_mpps 3 GPIO_ACTIVE_HIGH>;
- linux,default-trigger = "bt";
+ linux,default-trigger = "bluetooth-power";
default-state = "off";
};
};
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 67dd228c3f17..8c22d1618260 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -120,6 +120,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
+#define TIF_FSCHECK 4 /* Check FS is USER_DS on return */
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -140,10 +141,12 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ _TIF_FSCHECK)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index d39d8bde42d7..d0919bcb1953 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -73,6 +73,9 @@ static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
+ /* On user-mode return, check fs is correct */
+ set_thread_flag(TIF_FSCHECK);
+
/*
* Enable/disable UAO so that copy_to_user() etc can access
* kernel memory with the unprivileged instructions.
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8eafdbc7cb8..0bed9a899850 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -25,6 +25,7 @@
#include <linux/uaccess.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
@@ -402,6 +403,9 @@ static void do_signal(struct pt_regs *regs)
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned int thread_flags)
{
+ /* Check valid user FS if needed */
+ addr_limit_user_check();
+
if (thread_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 8755d618e116..961c393c0f55 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -44,6 +44,7 @@ static char ath79_sys_type[ATH79_SYS_TYPE_LEN];
static void ath79_restart(char *command)
{
+ local_irq_disable();
ath79_device_reset_set(AR71XX_RESET_FULL_CHIP);
for (;;)
if (cpu_wait)
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index d113c8ded6e2..6df3a4ea77fc 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -349,6 +349,7 @@ static int __init octeon_ehci_device_init(void)
return 0;
pd = of_find_device_by_node(ehci_node);
+ of_node_put(ehci_node);
if (!pd)
return 0;
@@ -411,6 +412,7 @@ static int __init octeon_ohci_device_init(void)
return 0;
pd = of_find_device_by_node(ohci_node);
+ of_node_put(ohci_node);
if (!pd)
return 0;
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 75fa296836fc..ab1df19b0957 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -141,14 +141,14 @@ static inline void * phys_to_virt(unsigned long address)
/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
*/
-static inline unsigned long isa_virt_to_bus(volatile void * address)
+static inline unsigned long isa_virt_to_bus(volatile void *address)
{
- return (unsigned long)address - PAGE_OFFSET;
+ return virt_to_phys(address);
}
-static inline void * isa_bus_to_virt(unsigned long address)
+static inline void *isa_bus_to_virt(unsigned long address)
{
- return (void *)(address + PAGE_OFFSET);
+ return phys_to_virt(address);
}
#define isa_page_to_bus page_to_phys
diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h
index 4eee221b0cf0..d2be8e4f7a35 100644
--- a/arch/mips/include/asm/mach-ath79/ath79.h
+++ b/arch/mips/include/asm/mach-ath79/ath79.h
@@ -133,6 +133,7 @@ static inline u32 ath79_pll_rr(unsigned reg)
static inline void ath79_reset_wr(unsigned reg, u32 val)
{
__raw_writel(val, ath79_reset_base + reg);
+ (void) __raw_readl(ath79_reset_base + reg); /* flush */
}
static inline u32 ath79_reset_rr(unsigned reg)
diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform
index 28448d358c10..a2a5a85ea1f9 100644
--- a/arch/mips/jz4740/Platform
+++ b/arch/mips/jz4740/Platform
@@ -1,4 +1,4 @@
platform-$(CONFIG_MACH_INGENIC) += jz4740/
cflags-$(CONFIG_MACH_INGENIC) += -I$(srctree)/arch/mips/include/asm/mach-jz4740
load-$(CONFIG_MACH_INGENIC) += 0xffffffff80010000
-zload-$(CONFIG_MACH_INGENIC) += 0xffffffff80600000
+zload-$(CONFIG_MACH_INGENIC) += 0xffffffff81000000
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 9684a0d22d97..dd92540fbc75 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -117,7 +117,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs, *regs = current_pt_regs();
unsigned long childksp;
- p->set_child_tid = p->clear_child_tid = NULL;
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3fcc833b316d..cf2882fd0c17 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -14,12 +14,14 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/irqchip/mips-gic.h>
+#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <asm/abi.h>
+#include <asm/page.h>
#include <asm/vdso.h>
/* Kernel-provided data used by the VDSO. */
@@ -128,12 +130,30 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vvar_size = gic_size + PAGE_SIZE;
size = vvar_size + image->size;
+ /*
+ * Find a region that's large enough for us to perform the
+ * colour-matching alignment below.
+ */
+ if (cpu_has_dc_aliases)
+ size += shm_align_mask + 1;
+
base = get_unmapped_area(NULL, 0, size, 0, 0);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
}
+ /*
+ * If we suffer from dcache aliasing, ensure that the VDSO data page
+ * mapping is coloured the same as the kernel's mapping of that memory.
+ * This ensures that when the kernel updates the VDSO data userland
+ * will observe it without requiring cache invalidations.
+ */
+ if (cpu_has_dc_aliases) {
+ base = __ALIGN_MASK(base, shm_align_mask);
+ base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask;
+ }
+
data_addr = base + gic_size;
vdso_addr = data_addr + PAGE_SIZE;
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
index f7c905e50dc4..92dc6bafc127 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
+++ b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
@@ -138,7 +138,7 @@ u32 pci_ohci_read_reg(int reg)
break;
case PCI_OHCI_INT_REG:
_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
- if ((lo & 0x00000f00) == CS5536_USB_INTR)
+ if (((lo >> PIC_YSEL_LOW_USB_SHIFT) & 0xf) == CS5536_USB_INTR)
conf_data = 1;
break;
default:
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index d66a61efb143..52cb3e09d172 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -712,7 +712,8 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end)
static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
{
/* Catch bad driver code */
- BUG_ON(size == 0);
+ if (WARN_ON(size == 0))
+ return;
preempt_disable();
if (cpu_has_inclusive_pcaches) {
@@ -745,7 +746,8 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
{
/* Catch bad driver code */
- BUG_ON(size == 0);
+ if (WARN_ON(size == 0))
+ return;
preempt_disable();
if (cpu_has_inclusive_pcaches) {
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 7095dfe7666b..962372143fda 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -152,8 +152,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
top_of_kernel_stack = sp;
- p->set_child_tid = p->clear_child_tid = NULL;
-
/* Locate userspace context on stack... */
sp -= STACK_FRAME_OVERHEAD; /* redzone */
sp -= sizeof(struct pt_regs);
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 493e72f64b35..5768ec3c1781 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -194,9 +194,6 @@ struct fadump_crash_info_header {
struct cpumask cpu_online_mask;
};
-/* Crash memory ranges */
-#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2)
-
struct fad_crash_memory_ranges {
unsigned long long base;
unsigned long long size;
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 791d4c3329c3..c3c835290131 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -35,6 +35,7 @@
#include <linux/crash_dump.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <linux/slab.h>
#include <asm/page.h>
#include <asm/prom.h>
@@ -48,8 +49,10 @@ static struct fadump_mem_struct fdm;
static const struct fadump_mem_struct *fdm_active;
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+struct fad_crash_memory_ranges *crash_memory_ranges;
+int crash_memory_ranges_size;
int crash_mem_ranges;
+int max_crash_mem_ranges;
/* Scan the Firmware Assisted dump configuration details. */
int __init early_init_dt_scan_fw_dump(unsigned long node,
@@ -726,38 +729,88 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
return 0;
}
-static inline void fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static void free_crash_memory_ranges(void)
+{
+ kfree(crash_memory_ranges);
+ crash_memory_ranges = NULL;
+ crash_memory_ranges_size = 0;
+ max_crash_mem_ranges = 0;
+}
+
+/*
+ * Allocate or reallocate crash memory ranges array in incremental units
+ * of PAGE_SIZE.
+ */
+static int allocate_crash_memory_ranges(void)
+{
+ struct fad_crash_memory_ranges *new_array;
+ u64 new_size;
+
+ new_size = crash_memory_ranges_size + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
+ new_size);
+
+ new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+ if (new_array == NULL) {
+ pr_err("Insufficient memory for setting up crash memory ranges\n");
+ free_crash_memory_ranges();
+ return -ENOMEM;
+ }
+
+ crash_memory_ranges = new_array;
+ crash_memory_ranges_size = new_size;
+ max_crash_mem_ranges = (new_size /
+ sizeof(struct fad_crash_memory_ranges));
+ return 0;
+}
+
+static inline int fadump_add_crash_memory(unsigned long long base,
+ unsigned long long end)
{
if (base == end)
- return;
+ return 0;
+
+ if (crash_mem_ranges == max_crash_mem_ranges) {
+ int ret;
+
+ ret = allocate_crash_memory_ranges();
+ if (ret)
+ return ret;
+ }
pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
crash_mem_ranges, base, end - 1, (end - base));
crash_memory_ranges[crash_mem_ranges].base = base;
crash_memory_ranges[crash_mem_ranges].size = end - base;
crash_mem_ranges++;
+ return 0;
}
-static void fadump_exclude_reserved_area(unsigned long long start,
+static int fadump_exclude_reserved_area(unsigned long long start,
unsigned long long end)
{
unsigned long long ra_start, ra_end;
+ int ret = 0;
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
if ((ra_start < end) && (ra_end > start)) {
if ((start < ra_start) && (end > ra_end)) {
- fadump_add_crash_memory(start, ra_start);
- fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_crash_memory(start, ra_start);
+ if (ret)
+ return ret;
+
+ ret = fadump_add_crash_memory(ra_end, end);
} else if (start < ra_start) {
- fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_crash_memory(start, ra_start);
} else if (ra_end < end) {
- fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_crash_memory(ra_end, end);
}
} else
- fadump_add_crash_memory(start, end);
+ ret = fadump_add_crash_memory(start, end);
+
+ return ret;
}
static int fadump_init_elfcore_header(char *bufp)
@@ -793,10 +846,11 @@ static int fadump_init_elfcore_header(char *bufp)
* Traverse through memblock structure and setup crash memory ranges. These
* ranges will be used create PT_LOAD program headers in elfcore header.
*/
-static void fadump_setup_crash_memory_ranges(void)
+static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
unsigned long long start, end;
+ int ret;
pr_debug("Setup crash memory ranges.\n");
crash_mem_ranges = 0;
@@ -807,7 +861,9 @@ static void fadump_setup_crash_memory_ranges(void)
* specified during fadump registration. We need to create a separate
* program header for this chunk with the correct offset.
*/
- fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+ ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+ if (ret)
+ return ret;
for_each_memblock(memory, reg) {
start = (unsigned long long)reg->base;
@@ -816,8 +872,12 @@ static void fadump_setup_crash_memory_ranges(void)
start = fw_dump.boot_memory_size;
/* add this range excluding the reserved dump area. */
- fadump_exclude_reserved_area(start, end);
+ ret = fadump_exclude_reserved_area(start, end);
+ if (ret)
+ return ret;
}
+
+ return 0;
}
/*
@@ -941,6 +1001,7 @@ static void register_fadump(void)
{
unsigned long addr;
void *vaddr;
+ int ret;
/*
* If no memory is reserved then we can not register for firmware-
@@ -949,7 +1010,9 @@ static void register_fadump(void)
if (!fw_dump.reserve_dump_area_size)
return;
- fadump_setup_crash_memory_ranges();
+ ret = fadump_setup_crash_memory_ranges();
+ if (ret)
+ return ret;
addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
/* Initialize fadump crash info header. */
@@ -1028,6 +1091,7 @@ void fadump_cleanup(void)
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
fadump_unregister_dump(&fdm);
+ free_crash_memory_ranges();
}
}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index e48826aa314c..b40606051efe 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -371,7 +371,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
/* Closed or other error drop */
if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
rc != OPAL_BUSY_EVENT) {
- written = total_len;
+ written += total_len;
break;
}
if (rc == OPAL_SUCCESS) {
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 3b6647e574b6..9795e52bab3d 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -300,7 +300,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
}
savep = __va(regs->gpr[3]);
- regs->gpr[3] = savep[0]; /* restore original r3 */
+ regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
/* If it isn't an extended log we can use the per cpu 64bit buffer */
h = (struct rtas_error_log *)&savep[1];
@@ -311,7 +311,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
int len, error_log_length;
error_log_length = 8 + rtas_error_extended_log_length(h);
- len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
+ len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
memcpy(global_mce_data_buf, h, len);
errhdr = (struct rtas_error_log *)global_mce_data_buf;
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 3f165d972a0e..994fe73c2ed0 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -196,7 +196,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
/* IO map the message register block. */
of_address_to_resource(np, 0, &rsrc);
- msgr_block_addr = ioremap(rsrc.start, rsrc.end - rsrc.start);
+ msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc));
if (!msgr_block_addr) {
dev_err(&dev->dev, "Failed to iomap MPIC message registers");
return -EFAULT;
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 16c5998b9792..4254c477e8e0 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -26,7 +26,7 @@
*/
ENTRY(memset)
ltgr %r4,%r4
- bzr %r14
+ jz .Lmemset_exit
ltgr %r3,%r3
jnz .Lmemset_fill
aghi %r4,-1
@@ -41,12 +41,13 @@ ENTRY(memset)
.Lmemset_clear_rest:
larl %r3,.Lmemset_xc
ex %r4,0(%r3)
+.Lmemset_exit:
BR_EX %r14
.Lmemset_fill:
stc %r3,0(%r2)
cghi %r4,1
lgr %r1,%r2
- ber %r14
+ je .Lmemset_fill_exit
aghi %r4,-2
srlg %r3,%r4,8
ltgr %r3,%r3
@@ -58,6 +59,7 @@ ENTRY(memset)
.Lmemset_fill_rest:
larl %r3,.Lmemset_mvc
ex %r4,0(%r3)
+.Lmemset_fill_exit:
BR_EX %r14
.Lmemset_xc:
xc 0(1,%r1),0(%r1)
@@ -71,7 +73,7 @@ ENTRY(memset)
*/
ENTRY(memcpy)
ltgr %r4,%r4
- bzr %r14
+ jz .Lmemcpy_exit
aghi %r4,-1
srlg %r5,%r4,8
ltgr %r5,%r5
@@ -80,6 +82,7 @@ ENTRY(memcpy)
.Lmemcpy_rest:
larl %r5,.Lmemcpy_mvc
ex %r4,0(%r5)
+.Lmemcpy_exit:
BR_EX %r14
.Lmemcpy_loop:
mvc 0(256,%r1),0(%r3)
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 646988d4c1a3..740f43b9b541 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -201,23 +201,27 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig,
asmlinkage long sys_getdomainname(char __user *name, int len)
{
- int nlen, err;
-
+ int nlen, err;
+ char tmp[__NEW_UTS_LEN + 1];
+
if (len < 0)
return -EINVAL;
- down_read(&uts_sem);
-
+ down_read(&uts_sem);
+
nlen = strlen(utsname()->domainname) + 1;
err = -EINVAL;
if (nlen > len)
- goto out;
+ goto out_unlock;
+ memcpy(tmp, utsname()->domainname, nlen);
- err = -EFAULT;
- if (!copy_to_user(name, utsname()->domainname, nlen))
- err = 0;
+ up_read(&uts_sem);
-out:
+ if (copy_to_user(name, tmp, nlen))
+ return -EFAULT;
+ return 0;
+
+out_unlock:
up_read(&uts_sem);
return err;
}
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 98a5cf313d39..7301fa2091bc 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -524,23 +524,27 @@ extern void check_pending(int signum);
SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
{
- int nlen, err;
+ int nlen, err;
+ char tmp[__NEW_UTS_LEN + 1];
if (len < 0)
return -EINVAL;
- down_read(&uts_sem);
-
+ down_read(&uts_sem);
+
nlen = strlen(utsname()->domainname) + 1;
err = -EINVAL;
if (nlen > len)
- goto out;
+ goto out_unlock;
+ memcpy(tmp, utsname()->domainname, nlen);
+
+ up_read(&uts_sem);
- err = -EFAULT;
- if (!copy_to_user(name, utsname()->domainname, nlen))
- err = 0;
+ if (copy_to_user(name, tmp, nlen))
+ return -EFAULT;
+ return 0;
-out:
+out_unlock:
up_read(&uts_sem);
return err;
}
diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig
index 71026930c04c..38dd54633384 100644
--- a/arch/x86/configs/x86_64_cuttlefish_defconfig
+++ b/arch/x86/configs/x86_64_cuttlefish_defconfig
@@ -48,6 +48,7 @@ CONFIG_X86_CPUID=y
CONFIG_KSM=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZSMALLOC=y
# CONFIG_MTRR is not set
CONFIG_HZ_100=y
CONFIG_KEXEC=y
@@ -199,6 +200,7 @@ CONFIG_DEBUG_DEVRES=y
CONFIG_OF=y
CONFIG_OF_UNITTEST=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
+CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -214,13 +216,17 @@ CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_SCSI_VIRTIO=y
CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=y
CONFIG_DM_MIRROR=y
CONFIG_DM_ZERO=y
CONFIG_DM_UEVENT=y
CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE=1
CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_ANDROID_VERITY=y
CONFIG_NETDEVICES=y
CONFIG_NETCONSOLE=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -445,5 +451,11 @@ CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
-CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_ZSTD=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_SYSTEM_TRUSTED_KEYS="verity_dev_keys.x509"
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 071582a3b5c0..a9e501303e15 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
#include <linux/user-return-notifier.h>
#include <linux/nospec.h>
#include <linux/uprobes.h>
+#include <linux/syscalls.h>
#include <asm/desc.h>
#include <asm/traps.h>
@@ -273,6 +274,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
struct thread_info *ti = pt_regs_to_thread_info(regs);
u32 cached_flags;
+ addr_limit_user_check();
+
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
local_irq_disable();
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 9016b4b70375..6c5020163db0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle);
#define arch_phys_wc_add arch_phys_wc_add
#endif
+#ifdef CONFIG_X86_PAT
+extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size);
+extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size);
+#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
+#endif
+
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 5c686382d84b..095dbc25122a 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
#define _ASM_X86_PGTABLE_3LEVEL_H
+#include <asm/atomic64_32.h>
+
/*
* Intel Physical Address Extension (PAE) Mode - three-level page
* tables on PPro+ CPUs.
@@ -142,10 +144,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
pte_t res;
- /* xchg acts as a barrier before the setting of the high bits */
- res.pte_low = xchg(&ptep->pte_low, 0);
- res.pte_high = ptep->pte_high;
- ptep->pte_high = 0;
+ res.pte = (pteval_t)atomic64_xchg((atomic64_t *)ptep, 0);
return res;
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 68a55273ce0f..a67d7f210b7c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -385,7 +385,7 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
{
- phys_addr_t pfn = page_nr << PAGE_SHIFT;
+ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
pfn ^= protnone_mask(pgprot_val(pgprot));
pfn &= PHYSICAL_PUD_PAGE_MASK;
return __pud(pfn | massage_pgprot(pgprot));
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 337c52192278..440a948c4feb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -104,6 +104,8 @@ struct cpuinfo_x86 {
__u8 x86_phys_bits;
/* CPUID returned core id bits: */
__u8 x86_coreid_bits;
+
+ __u8 x86_cache_bits;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
@@ -174,7 +176,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
static inline unsigned long long l1tf_pfn_limit(void)
{
- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
extern void early_cpu_init(void);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 128a7105cbe2..561be63b61ab 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -111,6 +111,7 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
#define TIF_X32 30 /* 32-bit native x86-64 binary */
+#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -135,6 +136,7 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
+#define _TIF_FSCHECK (1 << TIF_FSCHECK)
/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
@@ -145,7 +147,7 @@ struct thread_info {
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ _TIF_NOHZ | _TIF_FSCHECK)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 6a07c05956a6..8857f6f4daa9 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -30,7 +30,12 @@
#define get_ds() (KERNEL_DS)
#define get_fs() (current_thread_info()->addr_limit)
-#define set_fs(x) (current_thread_info()->addr_limit = (x))
+static inline void set_fs(mm_segment_t fs)
+{
+ current_thread_info()->addr_limit = fs;
+ /* On user-mode return, check fs is correct */
+ set_thread_flag(TIF_FSCHECK);
+}
#define segment_eq(a, b) ((a).seg == (b).seg)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b9e6b60df148..621bc6561189 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -634,6 +634,46 @@ void x86_spec_ctrl_setup_ap(void)
#undef pr_fmt
#define pr_fmt(fmt) "L1TF: " fmt
+
+/*
+ * These CPUs all support 44bits physical address space internally in the
+ * cache but CPUID can report a smaller number of physical address bits.
+ *
+ * The L1TF mitigation uses the top most address bit for the inversion of
+ * non present PTEs. When the installed memory reaches into the top most
+ * address bit due to memory holes, which has been observed on machines
+ * which report 36bits physical address bits and have 32G RAM installed,
+ * then the mitigation range check in l1tf_select_mitigation() triggers.
+ * This is a false positive because the mitigation is still possible due to
+ * the fact that the cache uses 44bit internally. Use the cache bits
+ * instead of the reported physical bits and adjust them on the affected
+ * machines to 44bit if the reported bits are less than 44.
+ */
+static void override_cache_bits(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 6)
+ return;
+
+ switch (c->x86_model) {
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ if (c->x86_cache_bits < 44)
+ c->x86_cache_bits = 44;
+ break;
+ }
+}
+
static void __init l1tf_select_mitigation(void)
{
u64 half_pa;
@@ -641,16 +681,13 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ override_cache_bits(&boot_cpu_data);
+
#if CONFIG_PGTABLE_LEVELS == 2
pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
return;
#endif
- /*
- * This is extremely unlikely to happen because almost all
- * systems have far more MAX_PA/2 than RAM can be fit into
- * DIMM slots.
- */
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4d3fa79c0f09..b12c0287d6cf 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -798,6 +798,8 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36;
#endif
+ c->x86_cache_bits = c->x86_phys_bits;
+
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e0a34b0d381e..c4dffae5d939 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -273,8 +273,6 @@ static noinline int vmalloc_fault(unsigned long address)
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
- WARN_ON_ONCE(in_nmi());
-
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7bd28d45e327..17ea653bf281 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1079,7 +1079,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
* Map everything starting from the Gb boundary, possibly with 1G pages
*/
while (end - start >= PUD_SIZE) {
- set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
+ set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn >> PAGE_SHIFT,
canon_pgprot(pud_pgprot))));
start += PUD_SIZE;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3146b1da6d72..5ff0cb74de55 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -726,6 +726,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end)
free_memtype(start, end);
}
+int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
+{
+ enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
+
+ return io_reserve_memtype(start, start + size, &type);
+}
+EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
+
+void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
+{
+ io_free_memtype(start, start + size);
+}
+EXPORT_SYMBOL(arch_io_free_memtype_wc);
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 724a08740a04..9c7358110d32 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -477,7 +477,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int err, ret = IRQ_NONE;
- struct pt_regs regs;
+ struct pt_regs regs = {0};
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
uint8_t xenpmu_flags = get_xenpmu_flags();
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 46ba2402c8f9..987361113ecd 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -185,7 +185,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
wb_congested = wb_congested_get_create(&q->backing_dev_info,
- blkcg->css.id, GFP_NOWAIT);
+ blkcg->css.id,
+ GFP_NOWAIT | __GFP_NOWARN);
if (!wb_congested) {
ret = -ENOMEM;
goto err_put_css;
@@ -193,7 +194,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
/* allocate */
if (!new_blkg) {
- new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT);
+ new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
goto err_put_congested;
@@ -1022,7 +1023,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
}
spin_lock_init(&blkcg->lock);
- INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT);
+ INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
INIT_HLIST_HEAD(&blkcg->blkg_list);
#ifdef CONFIG_CGROUP_WRITEBACK
INIT_LIST_HEAD(&blkcg->cgwb_list);
@@ -1238,7 +1239,7 @@ pd_prealloc:
if (blkg->pd[pol->plid])
continue;
- pd = pol->pd_alloc_fn(GFP_NOWAIT, q->node);
+ pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q->node);
if (!pd)
swap(pd, pd_prealloc);
if (!pd) {
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e04a7b8492cf..4e1f49434bbe 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2905,7 +2905,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* for devices that support queuing, otherwise we still have a problem
* with sync vs async workloads.
*/
- if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
+ if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag &&
+ !cfqd->cfq_group_idle)
return;
WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
@@ -3810,7 +3811,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
goto out;
}
- cfqq = kmem_cache_alloc_node(cfq_pool, GFP_NOWAIT | __GFP_ZERO,
+ cfqq = kmem_cache_alloc_node(cfq_pool,
+ GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
cfqd->queue->node);
if (!cfqq) {
cfqq = &cfqd->oom_cfqq;
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index f3ed7b2d89bf..8e7d358e0226 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -177,7 +177,7 @@ int aix_partition(struct parsed_partitions *state)
u32 vgda_sector = 0;
u32 vgda_len = 0;
int numlvs = 0;
- struct pvd *pvd;
+ struct pvd *pvd = NULL;
struct lv_info {
unsigned short pps_per_lv;
unsigned short pps_found;
@@ -231,10 +231,11 @@ int aix_partition(struct parsed_partitions *state)
if (lvip[i].pps_per_lv)
foundlvs += 1;
}
+ /* pvd loops depend on n[].name and lvip[].pps_per_lv */
+ pvd = alloc_pvd(state, vgda_sector + 17);
}
put_dev_sector(sect);
}
- pvd = alloc_pvd(state, vgda_sector + 17);
if (pvd) {
int numpps = be16_to_cpu(pvd->pp_count);
int psn_part1 = be32_to_cpu(pvd->psn_part1);
@@ -281,10 +282,14 @@ int aix_partition(struct parsed_partitions *state)
next_lp_ix += 1;
}
for (i = 0; i < state->limit; i += 1)
- if (lvip[i].pps_found && !lvip[i].lv_is_contiguous)
+ if (lvip[i].pps_found && !lvip[i].lv_is_contiguous) {
+ char tmp[sizeof(n[i].name) + 1]; // null char
+
+ snprintf(tmp, sizeof(tmp), "%s", n[i].name);
pr_warn("partition %s (%u pp's found) is "
"not contiguous\n",
- n[i].name, lvip[i].pps_found);
+ tmp, lvip[i].pps_found);
+ }
kfree(pvd);
}
kfree(n);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7a9f4d3d089b..ba8ca7cedb7e 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1577,6 +1577,15 @@ config CRYPTO_LZ4HC
help
This is the LZ4 high compression mode algorithm.
+config CRYPTO_ZSTD
+ tristate "Zstd compression algorithm"
+ select CRYPTO_ALGAPI
+ select CRYPTO_ACOMP2
+ select ZSTD_COMPRESS
+ select ZSTD_DECOMPRESS
+ help
+ This is the zstd algorithm.
+
comment "Random Number Generation"
config CRYPTO_ANSI_CPRNG
diff --git a/crypto/Makefile b/crypto/Makefile
index 7ae15c47f684..74f36e7d163f 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
+obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
#
# generic algorithms and the async_tx api
diff --git a/crypto/algapi.c b/crypto/algapi.c
index eb58b73ca925..ac70fd5cd404 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -1001,6 +1001,21 @@ unsigned int crypto_alg_extsize(struct crypto_alg *alg)
}
EXPORT_SYMBOL_GPL(crypto_alg_extsize);
+int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
+ u32 type, u32 mask)
+{
+ int ret = 0;
+ struct crypto_alg *alg = crypto_find_alg(name, frontend, type, mask);
+
+ if (!IS_ERR(alg)) {
+ crypto_mod_put(alg);
+ ret = 1;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_type_has_alg);
+
static int __init crypto_algapi_init(void)
{
crypto_init_proc();
diff --git a/crypto/internal.h b/crypto/internal.h
index 00e42a3ed814..7eefcdb00227 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -104,6 +104,9 @@ int crypto_probing_notify(unsigned long val, void *v);
unsigned int crypto_alg_extsize(struct crypto_alg *alg);
+int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
+ u32 type, u32 mask);
+
static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
{
atomic_inc(&alg->cra_refcnt);
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index d199c0b1751c..d248008e7f7b 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -16,7 +16,11 @@
#include <crypto/internal/skcipher.h>
#include <linux/bug.h>
+#include <linux/cryptouser.h>
#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/seq_file.h>
+#include <net/netlink.h>
#include "internal.h"
@@ -25,10 +29,11 @@ static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
if (alg->cra_type == &crypto_blkcipher_type)
return sizeof(struct crypto_blkcipher *);
- BUG_ON(alg->cra_type != &crypto_ablkcipher_type &&
- alg->cra_type != &crypto_givcipher_type);
+ if (alg->cra_type == &crypto_ablkcipher_type ||
+ alg->cra_type == &crypto_givcipher_type)
+ return sizeof(struct crypto_ablkcipher *);
- return sizeof(struct crypto_ablkcipher *);
+ return crypto_alg_extsize(alg);
}
static int skcipher_setkey_blkcipher(struct crypto_skcipher *tfm,
@@ -118,7 +123,7 @@ static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
skcipher->decrypt = skcipher_decrypt_blkcipher;
skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
- skcipher->has_setkey = calg->cra_blkcipher.max_keysize;
+ skcipher->keysize = calg->cra_blkcipher.max_keysize;
return 0;
}
@@ -211,31 +216,123 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
skcipher->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
skcipher->reqsize = crypto_ablkcipher_reqsize(ablkcipher) +
sizeof(struct ablkcipher_request);
- skcipher->has_setkey = calg->cra_ablkcipher.max_keysize;
+ skcipher->keysize = calg->cra_ablkcipher.max_keysize;
return 0;
}
+static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(skcipher);
+
+ alg->exit(skcipher);
+}
+
static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
{
+ struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(skcipher);
+
if (tfm->__crt_alg->cra_type == &crypto_blkcipher_type)
return crypto_init_skcipher_ops_blkcipher(tfm);
- BUG_ON(tfm->__crt_alg->cra_type != &crypto_ablkcipher_type &&
- tfm->__crt_alg->cra_type != &crypto_givcipher_type);
+ if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type ||
+ tfm->__crt_alg->cra_type == &crypto_givcipher_type)
+ return crypto_init_skcipher_ops_ablkcipher(tfm);
+
+ skcipher->setkey = alg->setkey;
+ skcipher->encrypt = alg->encrypt;
+ skcipher->decrypt = alg->decrypt;
+ skcipher->ivsize = alg->ivsize;
+ skcipher->keysize = alg->max_keysize;
+
+ if (alg->exit)
+ skcipher->base.exit = crypto_skcipher_exit_tfm;
- return crypto_init_skcipher_ops_ablkcipher(tfm);
+ if (alg->init)
+ return alg->init(skcipher);
+
+ return 0;
+}
+
+static void crypto_skcipher_free_instance(struct crypto_instance *inst)
+{
+ struct skcipher_instance *skcipher =
+ container_of(inst, struct skcipher_instance, s.base);
+
+ skcipher->free(skcipher);
+}
+
+static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
+ __attribute__ ((unused));
+static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+ struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
+ base);
+
+ seq_printf(m, "type : skcipher\n");
+ seq_printf(m, "async : %s\n",
+ alg->cra_flags & CRYPTO_ALG_ASYNC ? "yes" : "no");
+ seq_printf(m, "blocksize : %u\n", alg->cra_blocksize);
+ seq_printf(m, "min keysize : %u\n", skcipher->min_keysize);
+ seq_printf(m, "max keysize : %u\n", skcipher->max_keysize);
+ seq_printf(m, "ivsize : %u\n", skcipher->ivsize);
+ seq_printf(m, "chunksize : %u\n", skcipher->chunksize);
}
+#ifdef CONFIG_NET
+static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_report_blkcipher rblkcipher;
+ struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
+ base);
+
+ strncpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type));
+ strncpy(rblkcipher.geniv, "<none>", sizeof(rblkcipher.geniv));
+
+ rblkcipher.blocksize = alg->cra_blocksize;
+ rblkcipher.min_keysize = skcipher->min_keysize;
+ rblkcipher.max_keysize = skcipher->max_keysize;
+ rblkcipher.ivsize = skcipher->ivsize;
+
+ if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
+ sizeof(struct crypto_report_blkcipher), &rblkcipher))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+#else
+static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ return -ENOSYS;
+}
+#endif
+
static const struct crypto_type crypto_skcipher_type2 = {
.extsize = crypto_skcipher_extsize,
.init_tfm = crypto_skcipher_init_tfm,
+ .free = crypto_skcipher_free_instance,
+#ifdef CONFIG_PROC_FS
+ .show = crypto_skcipher_show,
+#endif
+ .report = crypto_skcipher_report,
.maskclear = ~CRYPTO_ALG_TYPE_MASK,
.maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
- .type = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.tfmsize = offsetof(struct crypto_skcipher, base),
};
+int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn,
+ const char *name, u32 type, u32 mask)
+{
+ spawn->base.frontend = &crypto_skcipher_type2;
+ return crypto_grab_spawn(&spawn->base, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_skcipher2);
+
struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
u32 type, u32 mask)
{
@@ -243,5 +340,90 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
}
EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
+int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
+{
+ return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
+ type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_has_skcipher2);
+
+static int skcipher_prepare_alg(struct skcipher_alg *alg)
+{
+ struct crypto_alg *base = &alg->base;
+
+ if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8)
+ return -EINVAL;
+
+ if (!alg->chunksize)
+ alg->chunksize = base->cra_blocksize;
+
+ base->cra_type = &crypto_skcipher_type2;
+ base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+ base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER;
+
+ return 0;
+}
+
+int crypto_register_skcipher(struct skcipher_alg *alg)
+{
+ struct crypto_alg *base = &alg->base;
+ int err;
+
+ err = skcipher_prepare_alg(alg);
+ if (err)
+ return err;
+
+ return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_skcipher);
+
+void crypto_unregister_skcipher(struct skcipher_alg *alg)
+{
+ crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_skcipher);
+
+int crypto_register_skciphers(struct skcipher_alg *algs, int count)
+{
+ int i, ret;
+
+ for (i = 0; i < count; i++) {
+ ret = crypto_register_skcipher(&algs[i]);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (--i; i >= 0; --i)
+ crypto_unregister_skcipher(&algs[i]);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_skciphers);
+
+void crypto_unregister_skciphers(struct skcipher_alg *algs, int count)
+{
+ int i;
+
+ for (i = count - 1; i >= 0; --i)
+ crypto_unregister_skcipher(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_skciphers);
+
+int skcipher_register_instance(struct crypto_template *tmpl,
+ struct skcipher_instance *inst)
+{
+ int err;
+
+ err = skcipher_prepare_alg(&inst->alg);
+ if (err)
+ return err;
+
+ return crypto_register_instance(tmpl, skcipher_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(skcipher_register_instance);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Symmetric key cipher type");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index a4aef61e40d8..2329b5f16b8c 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3949,6 +3949,22 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}
+ }, {
+ .alg = "zstd",
+ .test = alg_test_comp,
+ .fips_allowed = 1,
+ .suite = {
+ .comp = {
+ .comp = {
+ .vecs = zstd_comp_tv_template,
+ .count = ZSTD_COMP_TEST_VECTORS
+ },
+ .decomp = {
+ .vecs = zstd_decomp_tv_template,
+ .count = ZSTD_DECOMP_TEST_VECTORS
+ }
+ }
+ }
}
};
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 0bb950ea22ed..58072e1a4def 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -35331,4 +35331,78 @@ static struct comp_testvec lz4hc_decomp_tv_template[] = {
},
};
+#define ZSTD_COMP_TEST_VECTORS 2
+#define ZSTD_DECOMP_TEST_VECTORS 2
+
+static struct comp_testvec zstd_comp_tv_template[] = {
+ {
+ .inlen = 68,
+ .outlen = 39,
+ .input = "The algorithm is zstd. "
+ "The algorithm is zstd. "
+ "The algorithm is zstd.",
+ .output = "\x28\xb5\x2f\xfd\x00\x50\xf5\x00\x00\xb8\x54\x68\x65"
+ "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+ "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+ ,
+ },
+ {
+ .inlen = 244,
+ .outlen = 151,
+ .input = "zstd, short for Zstandard, is a fast lossless "
+ "compression algorithm, targeting real-time "
+ "compression scenarios at zlib-level and better "
+ "compression ratios. The zstd compression library "
+ "provides in-memory compression and decompression "
+ "functions.",
+ .output = "\x28\xb5\x2f\xfd\x00\x50\x75\x04\x00\x42\x4b\x1e\x17"
+ "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+ "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+ "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+ "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+ "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+ "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+ "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+ "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+ "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+ "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+ "\x20\xa9\x0e\x82\xb9\x43\x45\x01",
+ },
+};
+
+static struct comp_testvec zstd_decomp_tv_template[] = {
+ {
+ .inlen = 43,
+ .outlen = 68,
+ .input = "\x28\xb5\x2f\xfd\x04\x50\xf5\x00\x00\xb8\x54\x68\x65"
+ "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+ "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+ "\x6b\xf4\x13\x35",
+ .output = "The algorithm is zstd. "
+ "The algorithm is zstd. "
+ "The algorithm is zstd.",
+ },
+ {
+ .inlen = 155,
+ .outlen = 244,
+ .input = "\x28\xb5\x2f\xfd\x04\x50\x75\x04\x00\x42\x4b\x1e\x17"
+ "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+ "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+ "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+ "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+ "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+ "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+ "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+ "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+ "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+ "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+ "\x20\xa9\x0e\x82\xb9\x43\x45\x01\xaa\x6d\xda\x0d",
+ .output = "zstd, short for Zstandard, is a fast lossless "
+ "compression algorithm, targeting real-time "
+ "compression scenarios at zlib-level and better "
+ "compression ratios. The zstd compression library "
+ "provides in-memory compression and decompression "
+ "functions.",
+ },
+};
#endif /* _CRYPTO_TESTMGR_H */
diff --git a/crypto/zstd.c b/crypto/zstd.c
new file mode 100644
index 000000000000..9bfd28f8cc77
--- /dev/null
+++ b/crypto/zstd.c
@@ -0,0 +1,209 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2017-present, Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vmalloc.h>
+#include <linux/zstd.h>
+
+
+#define ZSTD_DEF_LEVEL 3
+
+struct zstd_ctx {
+ ZSTD_CCtx *cctx;
+ ZSTD_DCtx *dctx;
+ void *cwksp;
+ void *dwksp;
+};
+
+static ZSTD_parameters zstd_params(void)
+{
+ return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+}
+
+static int zstd_comp_init(struct zstd_ctx *ctx)
+{
+ int ret = 0;
+ const ZSTD_parameters params = zstd_params();
+ const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+
+ ctx->cwksp = vzalloc(wksp_size);
+ if (!ctx->cwksp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+ if (!ctx->cctx) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+out:
+ return ret;
+out_free:
+ vfree(ctx->cwksp);
+ goto out;
+}
+
+static int zstd_decomp_init(struct zstd_ctx *ctx)
+{
+ int ret = 0;
+ const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+
+ ctx->dwksp = vzalloc(wksp_size);
+ if (!ctx->dwksp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+ if (!ctx->dctx) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+out:
+ return ret;
+out_free:
+ vfree(ctx->dwksp);
+ goto out;
+}
+
+static void zstd_comp_exit(struct zstd_ctx *ctx)
+{
+ vfree(ctx->cwksp);
+ ctx->cwksp = NULL;
+ ctx->cctx = NULL;
+}
+
+static void zstd_decomp_exit(struct zstd_ctx *ctx)
+{
+ vfree(ctx->dwksp);
+ ctx->dwksp = NULL;
+ ctx->dctx = NULL;
+}
+
+static int __zstd_init(void *ctx)
+{
+ int ret;
+
+ ret = zstd_comp_init(ctx);
+ if (ret)
+ return ret;
+ ret = zstd_decomp_init(ctx);
+ if (ret)
+ zstd_comp_exit(ctx);
+ return ret;
+}
+
+static int zstd_init(struct crypto_tfm *tfm)
+{
+ struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return __zstd_init(ctx);
+}
+
+static void __zstd_exit(void *ctx)
+{
+ zstd_comp_exit(ctx);
+ zstd_decomp_exit(ctx);
+}
+
+static void zstd_exit(struct crypto_tfm *tfm)
+{
+ struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ __zstd_exit(ctx);
+}
+
+static int __zstd_compress(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen, void *ctx)
+{
+ size_t out_len;
+ struct zstd_ctx *zctx = ctx;
+ const ZSTD_parameters params = zstd_params();
+
+ out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
+ if (ZSTD_isError(out_len))
+ return -EINVAL;
+ *dlen = out_len;
+ return 0;
+}
+
+static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
+ unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+ struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return __zstd_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __zstd_decompress(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen, void *ctx)
+{
+ size_t out_len;
+ struct zstd_ctx *zctx = ctx;
+
+ out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
+ if (ZSTD_isError(out_len))
+ return -EINVAL;
+ *dlen = out_len;
+ return 0;
+}
+
+static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
+ unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+ struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return __zstd_decompress(src, slen, dst, dlen, ctx);
+}
+
+static struct crypto_alg alg = {
+ .cra_name = "zstd",
+ .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+ .cra_ctxsize = sizeof(struct zstd_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = zstd_init,
+ .cra_exit = zstd_exit,
+ .cra_u = { .compress = {
+ .coa_compress = zstd_compress,
+ .coa_decompress = zstd_decompress } }
+};
+
+static int __init zstd_mod_init(void)
+{
+ int ret;
+
+ ret = crypto_register_alg(&alg);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
+static void __exit zstd_mod_fini(void)
+{
+ crypto_unregister_alg(&alg);
+}
+
+module_init(zstd_mod_init);
+module_exit(zstd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Zstd Compression Algorithm");
+MODULE_ALIAS_CRYPTO("zstd");
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d1dfbe34fa7c..d739baf6cad2 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -4655,6 +4655,42 @@ out:
return ret;
}
+static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc,
+ struct binder_node_info_for_ref *info)
+{
+ struct binder_node *node;
+ struct binder_context *context = proc->context;
+ __u32 handle = info->handle;
+
+ if (info->strong_count || info->weak_count || info->reserved1 ||
+ info->reserved2 || info->reserved3) {
+ binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.",
+ proc->pid);
+ return -EINVAL;
+ }
+
+ /* This ioctl may only be used by the context manager */
+ mutex_lock(&context->context_mgr_node_lock);
+ if (!context->binder_context_mgr_node ||
+ context->binder_context_mgr_node->proc != proc) {
+ mutex_unlock(&context->context_mgr_node_lock);
+ return -EPERM;
+ }
+ mutex_unlock(&context->context_mgr_node_lock);
+
+ node = binder_get_node_from_ref(proc, handle, true, NULL);
+ if (!node)
+ return -EINVAL;
+
+ info->strong_count = node->local_strong_refs +
+ node->internal_strong_refs;
+ info->weak_count = node->local_weak_refs;
+
+ binder_put_node(node);
+
+ return 0;
+}
+
static int binder_ioctl_get_node_debug_info(struct binder_proc *proc,
struct binder_node_debug_info *info) {
struct rb_node *n;
@@ -4748,6 +4784,25 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
break;
}
+ case BINDER_GET_NODE_INFO_FOR_REF: {
+ struct binder_node_info_for_ref info;
+
+ if (copy_from_user(&info, ubuf, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ ret = binder_ioctl_get_node_info_for_ref(proc, &info);
+ if (ret < 0)
+ goto err;
+
+ if (copy_to_user(ubuf, &info, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ break;
+ }
case BINDER_GET_NODE_DEBUG_INFO: {
struct binder_node_debug_info info;
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 9628fa131757..8116cb2fef2d 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -2113,6 +2113,8 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep)
deto = 20;
}
+ /* Make dito, mdat, deto bits to 0s */
+ devslp &= ~GENMASK_ULL(24, 2);
devslp |= ((dito << PORT_DEVSLP_DITO_OFFSET) |
(mdat << PORT_DEVSLP_MDAT_OFFSET) |
(deto << PORT_DEVSLP_DETO_OFFSET) |
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 386ba3d1a6ee..cb53957d58f9 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -1,8 +1,7 @@
config ZRAM
tristate "Compressed RAM block device support"
- depends on BLOCK && SYSFS && ZSMALLOC
- select LZO_COMPRESS
- select LZO_DECOMPRESS
+ depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO
+ select CRYPTO_LZO
default n
help
Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
@@ -13,14 +12,26 @@ config ZRAM
It has several use cases, for example: /tmp storage, use as swap
disks and maybe many more.
- See zram.txt for more information.
+ See Documentation/blockdev/zram.txt for more information.
-config ZRAM_LZ4_COMPRESS
- bool "Enable LZ4 algorithm support"
- depends on ZRAM
- select LZ4_COMPRESS
- select LZ4_DECOMPRESS
- default n
+config ZRAM_WRITEBACK
+ bool "Write back incompressible page to backing device"
+ depends on ZRAM
+ default n
+ help
+ With incompressible page, there is no memory saving to keep it
+ in memory. Instead, write it out to backing device.
+ For this feature, admin should set up backing device via
+ /sys/block/zramX/backing_dev.
+
+ See Documentation/blockdev/zram.txt for more information.
+
+config ZRAM_MEMORY_TRACKING
+ bool "Track zRam block status"
+ depends on ZRAM && DEBUG_FS
help
- This option enables LZ4 compression algorithm support. Compression
- algorithm can be changed using `comp_algorithm' device attribute. \ No newline at end of file
+ With this feature, admin can track the state of allocated blocks
+ of zRAM. Admin could see the information via
+ /sys/kernel/debug/zram/zramX/block_state.
+
+ See Documentation/blockdev/zram.txt for more information.
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
index be0763ff57a2..9e2b79e9a990 100644
--- a/drivers/block/zram/Makefile
+++ b/drivers/block/zram/Makefile
@@ -1,5 +1,3 @@
-zram-y := zcomp_lzo.o zcomp.o zram_drv.o
-
-zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o
+zram-y := zcomp.o zram_drv.o
obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index c53617752b93..c084a7f9763d 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -13,315 +13,233 @@
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <linux/crypto.h>
#include "zcomp.h"
-#include "zcomp_lzo.h"
-#ifdef CONFIG_ZRAM_LZ4_COMPRESS
-#include "zcomp_lz4.h"
-#endif
-
-/*
- * single zcomp_strm backend
- */
-struct zcomp_strm_single {
- struct mutex strm_lock;
- struct zcomp_strm *zstrm;
-};
-
-/*
- * multi zcomp_strm backend
- */
-struct zcomp_strm_multi {
- /* protect strm list */
- spinlock_t strm_lock;
- /* max possible number of zstrm streams */
- int max_strm;
- /* number of available zstrm streams */
- int avail_strm;
- /* list of available strms */
- struct list_head idle_strm;
- wait_queue_head_t strm_wait;
-};
-static struct zcomp_backend *backends[] = {
- &zcomp_lzo,
-#ifdef CONFIG_ZRAM_LZ4_COMPRESS
- &zcomp_lz4,
+static const char * const backends[] = {
+ "lzo",
+#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
+ "lz4",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_DEFLATE)
+ "deflate",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
+ "lz4hc",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_842)
+ "842",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_ZSTD)
+ "zstd",
#endif
NULL
};
-static struct zcomp_backend *find_backend(const char *compress)
+static void zcomp_strm_free(struct zcomp_strm *zstrm)
{
- int i = 0;
- while (backends[i]) {
- if (sysfs_streq(compress, backends[i]->name))
- break;
- i++;
- }
- return backends[i];
-}
-
-static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm)
-{
- if (zstrm->private)
- comp->backend->destroy(zstrm->private);
+ if (!IS_ERR_OR_NULL(zstrm->tfm))
+ crypto_free_comp(zstrm->tfm);
free_pages((unsigned long)zstrm->buffer, 1);
kfree(zstrm);
}
/*
- * allocate new zcomp_strm structure with ->private initialized by
+ * allocate new zcomp_strm structure with ->tfm initialized by
* backend, return NULL on error
*/
static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
{
- struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_NOIO);
+ struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
if (!zstrm)
return NULL;
- zstrm->private = comp->backend->create();
+ zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0);
/*
* allocate 2 pages. 1 for compressed data, plus 1 extra for the
* case when compressed size is larger than the original one
*/
- zstrm->buffer = (void *)__get_free_pages(GFP_NOIO | __GFP_ZERO, 1);
- if (!zstrm->private || !zstrm->buffer) {
- zcomp_strm_free(comp, zstrm);
+ zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
+ zcomp_strm_free(zstrm);
zstrm = NULL;
}
return zstrm;
}
-/*
- * get idle zcomp_strm or wait until other process release
- * (zcomp_strm_release()) one for us
- */
-static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
+bool zcomp_available_algorithm(const char *comp)
{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- while (1) {
- spin_lock(&zs->strm_lock);
- if (!list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- spin_unlock(&zs->strm_lock);
- return zstrm;
- }
- /* zstrm streams limit reached, wait for idle stream */
- if (zs->avail_strm >= zs->max_strm) {
- spin_unlock(&zs->strm_lock);
- wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
- continue;
- }
- /* allocate new zstrm stream */
- zs->avail_strm++;
- spin_unlock(&zs->strm_lock);
-
- zstrm = zcomp_strm_alloc(comp);
- if (!zstrm) {
- spin_lock(&zs->strm_lock);
- zs->avail_strm--;
- spin_unlock(&zs->strm_lock);
- wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
- continue;
- }
- break;
- }
- return zstrm;
-}
+ int i = 0;
-/* add stream back to idle list and wake up waiter or free the stream */
-static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
-{
- struct zcomp_strm_multi *zs = comp->stream;
-
- spin_lock(&zs->strm_lock);
- if (zs->avail_strm <= zs->max_strm) {
- list_add(&zstrm->list, &zs->idle_strm);
- spin_unlock(&zs->strm_lock);
- wake_up(&zs->strm_wait);
- return;
+ while (backends[i]) {
+ if (sysfs_streq(comp, backends[i]))
+ return true;
+ i++;
}
- zs->avail_strm--;
- spin_unlock(&zs->strm_lock);
- zcomp_strm_free(comp, zstrm);
-}
-
-/* change max_strm limit */
-static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- spin_lock(&zs->strm_lock);
- zs->max_strm = num_strm;
/*
- * if user has lowered the limit and there are idle streams,
- * immediately free as much streams (and memory) as we can.
+ * Crypto does not ignore a trailing new line symbol,
+ * so make sure you don't supply a string containing
+ * one.
+ * This also means that we permit zcomp initialisation
+ * with any compressing algorithm known to crypto api.
*/
- while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- zcomp_strm_free(comp, zstrm);
- zs->avail_strm--;
- }
- spin_unlock(&zs->strm_lock);
- return true;
+ return crypto_has_comp(comp, 0, 0) == 1;
}
-static void zcomp_strm_multi_destroy(struct zcomp *comp)
+/* show available compressors */
+ssize_t zcomp_available_show(const char *comp, char *buf)
{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
+ bool known_algorithm = false;
+ ssize_t sz = 0;
+ int i = 0;
- while (!list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- zcomp_strm_free(comp, zstrm);
+ for (; backends[i]; i++) {
+ if (!strcmp(comp, backends[i])) {
+ known_algorithm = true;
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
+ "[%s] ", backends[i]);
+ } else {
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
+ "%s ", backends[i]);
+ }
}
- kfree(zs);
-}
-static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
-{
- struct zcomp_strm *zstrm;
- struct zcomp_strm_multi *zs;
-
- comp->destroy = zcomp_strm_multi_destroy;
- comp->strm_find = zcomp_strm_multi_find;
- comp->strm_release = zcomp_strm_multi_release;
- comp->set_max_streams = zcomp_strm_multi_set_max_streams;
- zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
- if (!zs)
- return -ENOMEM;
-
- comp->stream = zs;
- spin_lock_init(&zs->strm_lock);
- INIT_LIST_HEAD(&zs->idle_strm);
- init_waitqueue_head(&zs->strm_wait);
- zs->max_strm = max_strm;
- zs->avail_strm = 1;
+ /*
+ * Out-of-tree module known to crypto api or a missing
+ * entry in `backends'.
+ */
+ if (!known_algorithm && crypto_has_comp(comp, 0, 0) == 1)
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
+ "[%s] ", comp);
- zstrm = zcomp_strm_alloc(comp);
- if (!zstrm) {
- kfree(zs);
- return -ENOMEM;
- }
- list_add(&zstrm->list, &zs->idle_strm);
- return 0;
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
+ return sz;
}
-static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
+struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
{
- struct zcomp_strm_single *zs = comp->stream;
- mutex_lock(&zs->strm_lock);
- return zs->zstrm;
+ return *get_cpu_ptr(comp->stream);
}
-static void zcomp_strm_single_release(struct zcomp *comp,
- struct zcomp_strm *zstrm)
+void zcomp_stream_put(struct zcomp *comp)
{
- struct zcomp_strm_single *zs = comp->stream;
- mutex_unlock(&zs->strm_lock);
+ put_cpu_ptr(comp->stream);
}
-static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm)
+int zcomp_compress(struct zcomp_strm *zstrm,
+ const void *src, unsigned int *dst_len)
{
- /* zcomp_strm_single support only max_comp_streams == 1 */
- return false;
-}
+ /*
+ * Our dst memory (zstrm->buffer) is always `2 * PAGE_SIZE' sized
+ * because sometimes we can endup having a bigger compressed data
+ * due to various reasons: for example compression algorithms tend
+ * to add some padding to the compressed buffer. Speaking of padding,
+ * comp algorithm `842' pads the compressed length to multiple of 8
+ * and returns -ENOSP when the dst memory is not big enough, which
+ * is not something that ZRAM wants to see. We can handle the
+ * `compressed_size > PAGE_SIZE' case easily in ZRAM, but when we
+ * receive -ERRNO from the compressing backend we can't help it
+ * anymore. To make `842' happy we need to tell the exact size of
+ * the dst buffer, zram_drv will take care of the fact that
+ * compressed buffer is too big.
+ */
+ *dst_len = PAGE_SIZE * 2;
-static void zcomp_strm_single_destroy(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs = comp->stream;
- zcomp_strm_free(comp, zs->zstrm);
- kfree(zs);
+ return crypto_comp_compress(zstrm->tfm,
+ src, PAGE_SIZE,
+ zstrm->buffer, dst_len);
}
-static int zcomp_strm_single_create(struct zcomp *comp)
+int zcomp_decompress(struct zcomp_strm *zstrm,
+ const void *src, unsigned int src_len, void *dst)
{
- struct zcomp_strm_single *zs;
-
- comp->destroy = zcomp_strm_single_destroy;
- comp->strm_find = zcomp_strm_single_find;
- comp->strm_release = zcomp_strm_single_release;
- comp->set_max_streams = zcomp_strm_single_set_max_streams;
- zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL);
- if (!zs)
- return -ENOMEM;
+ unsigned int dst_len = PAGE_SIZE;
- comp->stream = zs;
- mutex_init(&zs->strm_lock);
- zs->zstrm = zcomp_strm_alloc(comp);
- if (!zs->zstrm) {
- kfree(zs);
- return -ENOMEM;
- }
- return 0;
+ return crypto_comp_decompress(zstrm->tfm,
+ src, src_len,
+ dst, &dst_len);
}
-/* show available compressors */
-ssize_t zcomp_available_show(const char *comp, char *buf)
+static int __zcomp_cpu_notifier(struct zcomp *comp,
+ unsigned long action, unsigned long cpu)
{
- ssize_t sz = 0;
- int i = 0;
+ struct zcomp_strm *zstrm;
- while (backends[i]) {
- if (!strcmp(comp, backends[i]->name))
- sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
- "[%s] ", backends[i]->name);
- else
- sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
- "%s ", backends[i]->name);
- i++;
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
+ break;
+ zstrm = zcomp_strm_alloc(comp);
+ if (IS_ERR_OR_NULL(zstrm)) {
+ pr_err("Can't allocate a compression stream\n");
+ return NOTIFY_BAD;
+ }
+ *per_cpu_ptr(comp->stream, cpu) = zstrm;
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ zstrm = *per_cpu_ptr(comp->stream, cpu);
+ if (!IS_ERR_OR_NULL(zstrm))
+ zcomp_strm_free(zstrm);
+ *per_cpu_ptr(comp->stream, cpu) = NULL;
+ break;
+ default:
+ break;
}
- sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
- return sz;
+ return NOTIFY_OK;
}
-bool zcomp_available_algorithm(const char *comp)
+static int zcomp_cpu_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
{
- return find_backend(comp) != NULL;
-}
+ unsigned long cpu = (unsigned long)pcpu;
+ struct zcomp *comp = container_of(nb, typeof(*comp), notifier);
-bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
-{
- return comp->set_max_streams(comp, num_strm);
+ return __zcomp_cpu_notifier(comp, action, cpu);
}
-struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
+static int zcomp_init(struct zcomp *comp)
{
- return comp->strm_find(comp);
-}
+ unsigned long cpu;
+ int ret;
-void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
-{
- comp->strm_release(comp, zstrm);
-}
+ comp->notifier.notifier_call = zcomp_cpu_notifier;
-int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
- const unsigned char *src, size_t *dst_len)
-{
- return comp->backend->compress(src, zstrm->buffer, dst_len,
- zstrm->private);
-}
+ comp->stream = alloc_percpu(struct zcomp_strm *);
+ if (!comp->stream)
+ return -ENOMEM;
-int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
- size_t src_len, unsigned char *dst)
-{
- return comp->backend->decompress(src, src_len, dst);
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu) {
+ ret = __zcomp_cpu_notifier(comp, CPU_UP_PREPARE, cpu);
+ if (ret == NOTIFY_BAD)
+ goto cleanup;
+ }
+ __register_cpu_notifier(&comp->notifier);
+ cpu_notifier_register_done();
+ return 0;
+
+cleanup:
+ for_each_online_cpu(cpu)
+ __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
+ cpu_notifier_register_done();
+ return -ENOMEM;
}
void zcomp_destroy(struct zcomp *comp)
{
- comp->destroy(comp);
+ unsigned long cpu;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
+ __unregister_cpu_notifier(&comp->notifier);
+ cpu_notifier_register_done();
+
+ free_percpu(comp->stream);
kfree(comp);
}
@@ -331,27 +249,22 @@ void zcomp_destroy(struct zcomp *comp)
* backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL)
* if requested algorithm is not supported, ERR_PTR(-ENOMEM) in
* case of allocation error, or any other error potentially
- * returned by functions zcomp_strm_{multi,single}_create.
+ * returned by zcomp_init().
*/
-struct zcomp *zcomp_create(const char *compress, int max_strm)
+struct zcomp *zcomp_create(const char *compress)
{
struct zcomp *comp;
- struct zcomp_backend *backend;
int error;
- backend = find_backend(compress);
- if (!backend)
+ if (!zcomp_available_algorithm(compress))
return ERR_PTR(-EINVAL);
comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
if (!comp)
return ERR_PTR(-ENOMEM);
- comp->backend = backend;
- if (max_strm > 1)
- error = zcomp_strm_multi_create(comp, max_strm);
- else
- error = zcomp_strm_single_create(comp);
+ comp->name = compress;
+ error = zcomp_init(comp);
if (error) {
kfree(comp);
return ERR_PTR(error);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 46e2b9f8f1f0..478cac2ed465 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -10,60 +10,34 @@
#ifndef _ZCOMP_H_
#define _ZCOMP_H_
-#include <linux/mutex.h>
-
struct zcomp_strm {
/* compression/decompression buffer */
void *buffer;
- /*
- * The private data of the compression stream, only compression
- * stream backend can touch this (e.g. compression algorithm
- * working memory)
- */
- void *private;
- /* used in multi stream backend, protected by backend strm_lock */
- struct list_head list;
-};
-
-/* static compression backend */
-struct zcomp_backend {
- int (*compress)(const unsigned char *src, unsigned char *dst,
- size_t *dst_len, void *private);
-
- int (*decompress)(const unsigned char *src, size_t src_len,
- unsigned char *dst);
-
- void *(*create)(void);
- void (*destroy)(void *private);
-
- const char *name;
+ struct crypto_comp *tfm;
};
/* dynamic per-device compression frontend */
struct zcomp {
- void *stream;
- struct zcomp_backend *backend;
+ struct zcomp_strm * __percpu *stream;
+ struct notifier_block notifier;
- struct zcomp_strm *(*strm_find)(struct zcomp *comp);
- void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm);
- bool (*set_max_streams)(struct zcomp *comp, int num_strm);
- void (*destroy)(struct zcomp *comp);
+ const char *name;
};
ssize_t zcomp_available_show(const char *comp, char *buf);
bool zcomp_available_algorithm(const char *comp);
-struct zcomp *zcomp_create(const char *comp, int max_strm);
+struct zcomp *zcomp_create(const char *comp);
void zcomp_destroy(struct zcomp *comp);
-struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
-void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm);
+struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
+void zcomp_stream_put(struct zcomp *comp);
-int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
- const unsigned char *src, size_t *dst_len);
+int zcomp_compress(struct zcomp_strm *zstrm,
+ const void *src, unsigned int *dst_len);
-int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
- size_t src_len, unsigned char *dst);
+int zcomp_decompress(struct zcomp_strm *zstrm,
+ const void *src, unsigned int src_len, void *dst);
bool zcomp_set_max_streams(struct zcomp *comp, int num_strm);
#endif /* _ZCOMP_H_ */
diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c
deleted file mode 100644
index dd6083124276..000000000000
--- a/drivers/block/zram/zcomp_lz4.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/lz4.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-
-#include "zcomp_lz4.h"
-
-static void *zcomp_lz4_create(void)
-{
- void *ret;
-
- /*
- * This function can be called in swapout/fs write path
- * so we can't use GFP_FS|IO. And it assumes we already
- * have at least one stream in zram initialization so we
- * don't do best effort to allocate more stream in here.
- * A default stream will work well without further multiple
- * streams. That's why we use NORETRY | NOWARN.
- */
- ret = kzalloc(LZ4_MEM_COMPRESS, GFP_NOIO | __GFP_NORETRY |
- __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(LZ4_MEM_COMPRESS,
- GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN |
- __GFP_ZERO | __GFP_HIGHMEM,
- PAGE_KERNEL);
- return ret;
-}
-
-static void zcomp_lz4_destroy(void *private)
-{
- kvfree(private);
-}
-
-static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst,
- size_t *dst_len, void *private)
-{
- /* return : Success if return 0 */
- return lz4_compress(src, PAGE_SIZE, dst, dst_len, private);
-}
-
-static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len,
- unsigned char *dst)
-{
- size_t dst_len = PAGE_SIZE;
- /* return : Success if return 0 */
- return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len);
-}
-
-struct zcomp_backend zcomp_lz4 = {
- .compress = zcomp_lz4_compress,
- .decompress = zcomp_lz4_decompress,
- .create = zcomp_lz4_create,
- .destroy = zcomp_lz4_destroy,
- .name = "lz4",
-};
diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h
deleted file mode 100644
index 60613fb29dd8..000000000000
--- a/drivers/block/zram/zcomp_lz4.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ZCOMP_LZ4_H_
-#define _ZCOMP_LZ4_H_
-
-#include "zcomp.h"
-
-extern struct zcomp_backend zcomp_lz4;
-
-#endif /* _ZCOMP_LZ4_H_ */
diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c
deleted file mode 100644
index edc549920fa0..000000000000
--- a/drivers/block/zram/zcomp_lzo.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/lzo.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-
-#include "zcomp_lzo.h"
-
-static void *lzo_create(void)
-{
- void *ret;
-
- /*
- * This function can be called in swapout/fs write path
- * so we can't use GFP_FS|IO. And it assumes we already
- * have at least one stream in zram initialization so we
- * don't do best effort to allocate more stream in here.
- * A default stream will work well without further multiple
- * streams. That's why we use NORETRY | NOWARN.
- */
- ret = kzalloc(LZO1X_MEM_COMPRESS, GFP_NOIO | __GFP_NORETRY |
- __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(LZO1X_MEM_COMPRESS,
- GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN |
- __GFP_ZERO | __GFP_HIGHMEM,
- PAGE_KERNEL);
- return ret;
-}
-
-static void lzo_destroy(void *private)
-{
- kvfree(private);
-}
-
-static int lzo_compress(const unsigned char *src, unsigned char *dst,
- size_t *dst_len, void *private)
-{
- int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private);
- return ret == LZO_E_OK ? 0 : ret;
-}
-
-static int lzo_decompress(const unsigned char *src, size_t src_len,
- unsigned char *dst)
-{
- size_t dst_len = PAGE_SIZE;
- int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len);
- return ret == LZO_E_OK ? 0 : ret;
-}
-
-struct zcomp_backend zcomp_lzo = {
- .compress = lzo_compress,
- .decompress = lzo_decompress,
- .create = lzo_create,
- .destroy = lzo_destroy,
- .name = "lzo",
-};
diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h
deleted file mode 100644
index 128c5807fa14..000000000000
--- a/drivers/block/zram/zcomp_lzo.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2014 Sergey Senozhatsky.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ZCOMP_LZO_H_
-#define _ZCOMP_LZO_H_
-
-#include "zcomp.h"
-
-extern struct zcomp_backend zcomp_lzo;
-
-#endif /* _ZCOMP_LZO_H_ */
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 502406c9e6e1..7ccc2e3e4ca3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -25,11 +25,13 @@
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
+#include <linux/backing-dev.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/sysfs.h>
+#include <linux/debugfs.h>
#include "zram_drv.h"
@@ -42,74 +44,105 @@ static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
+/*
+ * Pages that compress to sizes equals or greater than this are stored
+ * uncompressed in memory.
+ */
+static size_t huge_class_size;
-static inline void deprecated_attr_warn(const char *name)
+static void zram_free_page(struct zram *zram, size_t index);
+
+static void zram_slot_lock(struct zram *zram, u32 index)
{
- pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
- task_pid_nr(current),
- current->comm,
- name,
- "See zram documentation.");
+ bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
}
-#define ZRAM_ATTR_RO(name) \
-static ssize_t name##_show(struct device *d, \
- struct device_attribute *attr, char *b) \
-{ \
- struct zram *zram = dev_to_zram(d); \
- \
- deprecated_attr_warn(__stringify(name)); \
- return scnprintf(b, PAGE_SIZE, "%llu\n", \
- (u64)atomic64_read(&zram->stats.name)); \
-} \
-static DEVICE_ATTR_RO(name);
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+ bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
+}
static inline bool init_done(struct zram *zram)
{
return zram->disksize;
}
+static inline bool zram_allocated(struct zram *zram, u32 index)
+{
+
+ return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
+ zram->table[index].handle;
+}
+
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
}
+static unsigned long zram_get_handle(struct zram *zram, u32 index)
+{
+ return zram->table[index].handle;
+}
+
+static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
+{
+ zram->table[index].handle = handle;
+}
+
/* flag operations require table entry bit_spin_lock() being held */
-static int zram_test_flag(struct zram_meta *meta, u32 index,
+static bool zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- return meta->table[index].value & BIT(flag);
+ return zram->table[index].value & BIT(flag);
}
-static void zram_set_flag(struct zram_meta *meta, u32 index,
+static void zram_set_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].value |= BIT(flag);
+ zram->table[index].value |= BIT(flag);
}
-static void zram_clear_flag(struct zram_meta *meta, u32 index,
+static void zram_clear_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].value &= ~BIT(flag);
+ zram->table[index].value &= ~BIT(flag);
}
-static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+static inline void zram_set_element(struct zram *zram, u32 index,
+ unsigned long element)
{
- return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ zram->table[index].element = element;
}
-static void zram_set_obj_size(struct zram_meta *meta,
+static unsigned long zram_get_element(struct zram *zram, u32 index)
+{
+ return zram->table[index].element;
+}
+
+static size_t zram_get_obj_size(struct zram *zram, u32 index)
+{
+ return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram *zram,
u32 index, size_t size)
{
- unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+ unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
- meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+ zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
+#if PAGE_SIZE != 4096
static inline bool is_partial_io(struct bio_vec *bvec)
{
return bvec->bv_len != PAGE_SIZE;
}
+#else
+static inline bool is_partial_io(struct bio_vec *bvec)
+{
+ return false;
+}
+#endif
/*
* Check if request is within bounds and aligned on zram logical blocks.
@@ -137,8 +170,7 @@ static inline bool valid_io_request(struct zram *zram,
static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
+ *index += (*offset + bvec->bv_len) / PAGE_SIZE;
*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}
@@ -157,34 +189,39 @@ static inline void update_used_max(struct zram *zram,
} while (old_max != cur_max);
}
-static bool page_zero_filled(void *ptr)
+static inline void zram_fill_page(char *ptr, unsigned long len,
+ unsigned long value)
+{
+ int i;
+ unsigned long *page = (unsigned long *)ptr;
+
+ WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
+
+ if (likely(value == 0)) {
+ memset(ptr, 0, len);
+ } else {
+ for (i = 0; i < len / sizeof(*page); i++)
+ page[i] = value;
+ }
+}
+
+static bool page_same_filled(void *ptr, unsigned long *element)
{
unsigned int pos;
unsigned long *page;
+ unsigned long val;
page = (unsigned long *)ptr;
+ val = page[0];
- for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
- if (page[pos])
+ for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
+ if (val != page[pos])
return false;
}
- return true;
-}
+ *element = val;
-static void handle_zero_page(struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- void *user_mem;
-
- user_mem = kmap_atomic(page);
- if (is_partial_io(bvec))
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
- else
- clear_page(user_mem);
- kunmap_atomic(user_mem);
-
- flush_dcache_page(page);
+ return true;
}
static ssize_t initstate_show(struct device *dev,
@@ -208,142 +245,535 @@ static ssize_t disksize_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}
-static ssize_t orig_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t mem_limit_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
+ u64 limit;
+ char *tmp;
struct zram *zram = dev_to_zram(dev);
- deprecated_attr_warn("orig_data_size");
- return scnprintf(buf, PAGE_SIZE, "%llu\n",
- (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
+ limit = memparse(buf, &tmp);
+ if (buf == tmp) /* no chars parsed, invalid input */
+ return -EINVAL;
+
+ down_write(&zram->init_lock);
+ zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
+ up_write(&zram->init_lock);
+
+ return len;
}
-static ssize_t mem_used_total_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t mem_used_max_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- u64 val = 0;
+ int err;
+ unsigned long val;
struct zram *zram = dev_to_zram(dev);
- deprecated_attr_warn("mem_used_total");
+ err = kstrtoul(buf, 10, &val);
+ if (err || val != 0)
+ return -EINVAL;
+
down_read(&zram->init_lock);
if (init_done(zram)) {
- struct zram_meta *meta = zram->meta;
- val = zs_get_total_pages(meta->mem_pool);
+ atomic_long_set(&zram->stats.max_used_pages,
+ zs_get_total_pages(zram->mem_pool));
}
up_read(&zram->init_lock);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
+ return len;
+}
+
+#ifdef CONFIG_ZRAM_WRITEBACK
+static bool zram_wb_enabled(struct zram *zram)
+{
+ return zram->backing_dev;
}
-static ssize_t mem_limit_show(struct device *dev,
+static void reset_bdev(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ if (!zram_wb_enabled(zram))
+ return;
+
+ bdev = zram->bdev;
+ if (zram->old_block_size)
+ set_blocksize(bdev, zram->old_block_size);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ /* hope filp_close flush all of IO */
+ filp_close(zram->backing_dev, NULL);
+ zram->backing_dev = NULL;
+ zram->old_block_size = 0;
+ zram->bdev = NULL;
+
+ kvfree(zram->bitmap);
+ zram->bitmap = NULL;
+}
+
+static ssize_t backing_dev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- u64 val;
struct zram *zram = dev_to_zram(dev);
+ struct file *file = zram->backing_dev;
+ char *p;
+ ssize_t ret;
- deprecated_attr_warn("mem_limit");
down_read(&zram->init_lock);
- val = zram->limit_pages;
- up_read(&zram->init_lock);
+ if (!zram_wb_enabled(zram)) {
+ memcpy(buf, "none\n", 5);
+ up_read(&zram->init_lock);
+ return 5;
+ }
+
+ p = file_path(file, buf, PAGE_SIZE - 1);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ goto out;
+ }
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
+ ret = strlen(p);
+ memmove(buf, p, ret);
+ buf[ret++] = '\n';
+out:
+ up_read(&zram->init_lock);
+ return ret;
}
-static ssize_t mem_limit_store(struct device *dev,
+static ssize_t backing_dev_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
- u64 limit;
- char *tmp;
+ char *file_name;
+ size_t sz;
+ struct file *backing_dev = NULL;
+ struct inode *inode;
+ struct address_space *mapping;
+ unsigned int bitmap_sz, old_block_size = 0;
+ unsigned long nr_pages, *bitmap = NULL;
+ struct block_device *bdev = NULL;
+ int err;
struct zram *zram = dev_to_zram(dev);
+ gfp_t kmalloc_flags;
- limit = memparse(buf, &tmp);
- if (buf == tmp) /* no chars parsed, invalid input */
- return -EINVAL;
+ file_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!file_name)
+ return -ENOMEM;
down_write(&zram->init_lock);
- zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
+ if (init_done(zram)) {
+ pr_info("Can't setup backing device for initialized device\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ strlcpy(file_name, buf, PATH_MAX);
+ /* ignore trailing newline */
+ sz = strlen(file_name);
+ if (sz > 0 && file_name[sz - 1] == '\n')
+ file_name[sz - 1] = 0x00;
+
+ backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
+ if (IS_ERR(backing_dev)) {
+ err = PTR_ERR(backing_dev);
+ backing_dev = NULL;
+ goto out;
+ }
+
+ mapping = backing_dev->f_mapping;
+ inode = mapping->host;
+
+ /* Support only block device in this moment */
+ if (!S_ISBLK(inode->i_mode)) {
+ err = -ENOTBLK;
+ goto out;
+ }
+
+ bdev = bdgrab(I_BDEV(inode));
+ err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+ if (err < 0)
+ goto out;
+
+ nr_pages = i_size_read(inode) >> PAGE_SHIFT;
+ bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
+ kmalloc_flags = GFP_KERNEL | __GFP_ZERO;
+ if (bitmap_sz > PAGE_SIZE)
+ kmalloc_flags |= __GFP_NOWARN | __GFP_NORETRY;
+
+ bitmap = kmalloc_node(bitmap_sz, kmalloc_flags, NUMA_NO_NODE);
+ if (!bitmap && bitmap_sz > PAGE_SIZE)
+ bitmap = vzalloc(bitmap_sz);
+
+ if (!bitmap) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ old_block_size = block_size(bdev);
+ err = set_blocksize(bdev, PAGE_SIZE);
+ if (err)
+ goto out;
+
+ reset_bdev(zram);
+ spin_lock_init(&zram->bitmap_lock);
+
+ zram->old_block_size = old_block_size;
+ zram->bdev = bdev;
+ zram->backing_dev = backing_dev;
+ zram->bitmap = bitmap;
+ zram->nr_pages = nr_pages;
up_write(&zram->init_lock);
+ pr_info("setup backing device %s\n", file_name);
+ kfree(file_name);
+
return len;
+out:
+ if (bitmap)
+ kvfree(bitmap);
+
+ if (bdev)
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+
+ if (backing_dev)
+ filp_close(backing_dev, NULL);
+
+ up_write(&zram->init_lock);
+
+ kfree(file_name);
+
+ return err;
}
-static ssize_t mem_used_max_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static unsigned long get_entry_bdev(struct zram *zram)
{
- u64 val = 0;
- struct zram *zram = dev_to_zram(dev);
+ unsigned long entry;
- deprecated_attr_warn("mem_used_max");
- down_read(&zram->init_lock);
- if (init_done(zram))
- val = atomic_long_read(&zram->stats.max_used_pages);
- up_read(&zram->init_lock);
+ spin_lock(&zram->bitmap_lock);
+ /* skip 0 bit to confuse zram.handle = 0 */
+ entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
+ if (entry == zram->nr_pages) {
+ spin_unlock(&zram->bitmap_lock);
+ return 0;
+ }
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
+ set_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+
+ return entry;
}
-static ssize_t mem_used_max_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static void put_entry_bdev(struct zram *zram, unsigned long entry)
{
- int err;
- unsigned long val;
- struct zram *zram = dev_to_zram(dev);
+ int was_set;
- err = kstrtoul(buf, 10, &val);
- if (err || val != 0)
- return -EINVAL;
+ spin_lock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+ WARN_ON_ONCE(!was_set);
+}
- down_read(&zram->init_lock);
- if (init_done(zram)) {
- struct zram_meta *meta = zram->meta;
- atomic_long_set(&zram->stats.max_used_pages,
- zs_get_total_pages(meta->mem_pool));
+static void zram_page_end_io(struct bio *bio)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ page_endio(page, bio_data_dir(bio), bio->bi_error);
+ bio_put(bio);
+}
+
+/*
+ * Returns 1 if the submission is successful.
+ */
+static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio->bi_bdev = zram->bdev;
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
+ bio_put(bio);
+ return -EIO;
}
- up_read(&zram->init_lock);
- return len;
+ if (!parent) {
+ bio->bi_rw = 0;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_rw = parent->bi_rw;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(READ, bio);
+ return 1;
}
-static ssize_t max_comp_streams_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+struct zram_work {
+ struct work_struct work;
+ struct zram *zram;
+ unsigned long entry;
+ struct bio *bio;
+};
+
+#if PAGE_SIZE != 4096
+static void zram_sync_read(struct work_struct *work)
{
- int val;
- struct zram *zram = dev_to_zram(dev);
+ struct bio_vec bvec;
+ struct zram_work *zw = container_of(work, struct zram_work, work);
+ struct zram *zram = zw->zram;
+ unsigned long entry = zw->entry;
+ struct bio *bio = zw->bio;
- down_read(&zram->init_lock);
- val = zram->max_comp_streams;
- up_read(&zram->init_lock);
+ read_from_bdev_async(zram, &bvec, entry, bio);
+}
+
+/*
+ * Block layer want one ->make_request_fn to be active at a time
+ * so if we use chained IO with parent IO in same context,
+ * it's a deadlock. To avoid, it, it uses worker thread context.
+ */
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ struct zram_work work;
+
+ work.zram = zram;
+ work.entry = entry;
+ work.bio = bio;
- return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+ INIT_WORK_ONSTACK(&work.work, zram_sync_read);
+ queue_work(system_unbound_wq, &work.work);
+ flush_work(&work.work);
+ destroy_work_on_stack(&work.work);
+
+ return 1;
+}
+#else
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ WARN_ON(1);
+ return -EIO;
}
+#endif
-static ssize_t max_comp_streams_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
{
- int num;
- struct zram *zram = dev_to_zram(dev);
- int ret;
+ if (sync)
+ return read_from_bdev_sync(zram, bvec, entry, parent);
+ else
+ return read_from_bdev_async(zram, bvec, entry, parent);
+}
- ret = kstrtoint(buf, 0, &num);
- if (ret < 0)
- return ret;
- if (num < 1)
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio->bi_bdev = zram->bdev;
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_rw = REQ_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_rw = parent->bi_rw;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(WRITE, bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
+#else
+static bool zram_wb_enabled(struct zram *zram) { return false; }
+static inline void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ return -EIO;
+}
+static void zram_wb_clear(struct zram *zram, u32 index) {}
+#endif
+
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+
+static struct dentry *zram_debugfs_root;
+
+static void zram_debugfs_create(void)
+{
+ zram_debugfs_root = debugfs_create_dir("zram", NULL);
+}
+
+static void zram_debugfs_destroy(void)
+{
+ debugfs_remove_recursive(zram_debugfs_root);
+}
+
+static void zram_accessed(struct zram *zram, u32 index)
+{
+ zram->table[index].ac_time = ktime_get_boottime();
+}
+
+static void zram_reset_access(struct zram *zram, u32 index)
+{
+ zram->table[index].ac_time.tv64 = 0;
+}
+
+static ssize_t read_block_state(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ char *kbuf;
+ ssize_t index, written = 0;
+ struct zram *zram = file->private_data;
+ unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ struct timespec64 ts;
+ gfp_t kmalloc_flags;
+
+ kmalloc_flags = GFP_KERNEL;
+ if (count > PAGE_SIZE)
+ kmalloc_flags |= __GFP_NOWARN | __GFP_NORETRY;
+
+ kbuf = kmalloc_node(count, kmalloc_flags, NUMA_NO_NODE);
+ if (!kbuf && count > PAGE_SIZE)
+ kbuf = vmalloc(count);
+
+ if (!kbuf)
+ return -ENOMEM;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ kvfree(kbuf);
return -EINVAL;
+ }
- down_write(&zram->init_lock);
- if (init_done(zram)) {
- if (!zcomp_set_max_streams(zram->comp, num)) {
- pr_info("Cannot change max compression streams\n");
- ret = -EINVAL;
- goto out;
+ for (index = *ppos; index < nr_pages; index++) {
+ int copied;
+
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ ts = ktime_to_timespec64(zram->table[index].ac_time);
+ copied = snprintf(kbuf + written, count,
+ "%12zd %12lld.%06lu %c%c%c\n",
+ index, (s64)ts.tv_sec,
+ ts.tv_nsec / NSEC_PER_USEC,
+ zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
+ zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
+ zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
+
+ if (count < copied) {
+ zram_slot_unlock(zram, index);
+ break;
}
+ written += copied;
+ count -= copied;
+next:
+ zram_slot_unlock(zram, index);
+ *ppos += 1;
}
- zram->max_comp_streams = num;
- ret = len;
-out:
- up_write(&zram->init_lock);
- return ret;
+ up_read(&zram->init_lock);
+ if (copy_to_user(buf, kbuf, written))
+ written = -EFAULT;
+ kvfree(kbuf);
+
+ return written;
+}
+
+static const struct file_operations proc_zram_block_state_op = {
+ .open = simple_open,
+ .read = read_block_state,
+ .llseek = default_llseek,
+};
+
+static void zram_debugfs_register(struct zram *zram)
+{
+ if (!zram_debugfs_root)
+ return;
+
+ zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
+ zram_debugfs_root);
+ debugfs_create_file("block_state", 0400, zram->debugfs_dir,
+ zram, &proc_zram_block_state_op);
+}
+
+static void zram_debugfs_unregister(struct zram *zram)
+{
+ debugfs_remove_recursive(zram->debugfs_dir);
+}
+#else
+static void zram_debugfs_create(void) {};
+static void zram_debugfs_destroy(void) {};
+static void zram_accessed(struct zram *zram, u32 index) {};
+static void zram_reset_access(struct zram *zram, u32 index) {};
+static void zram_debugfs_register(struct zram *zram) {};
+static void zram_debugfs_unregister(struct zram *zram) {};
+#endif
+
+/*
+ * We switched to per-cpu streams and this attr is not needed anymore.
+ * However, we will keep it around for some time, because:
+ * a) we may revert per-cpu streams in the future
+ * b) it's visible to user space and we need to follow our 2 years
+ * retirement rule; but we already have a number of 'soon to be
+ * altered' attrs, so max_comp_streams need to wait for the next
+ * layoff cycle.
+ */
+static ssize_t max_comp_streams_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
+}
+
+static ssize_t max_comp_streams_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ return len;
}
static ssize_t comp_algorithm_show(struct device *dev,
@@ -363,9 +793,16 @@ static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
+ char compressor[ARRAY_SIZE(zram->compressor)];
size_t sz;
- if (!zcomp_available_algorithm(buf))
+ strlcpy(compressor, buf, sizeof(compressor));
+ /* ignore trailing newline */
+ sz = strlen(compressor);
+ if (sz > 0 && compressor[sz - 1] == '\n')
+ compressor[sz - 1] = 0x00;
+
+ if (!zcomp_available_algorithm(compressor))
return -EINVAL;
down_write(&zram->init_lock);
@@ -374,13 +811,8 @@ static ssize_t comp_algorithm_store(struct device *dev,
pr_info("Can't change algorithm for initialized device\n");
return -EBUSY;
}
- strlcpy(zram->compressor, buf, sizeof(zram->compressor));
-
- /* ignore trailing newline */
- sz = strlen(zram->compressor);
- if (sz > 0 && zram->compressor[sz - 1] == '\n')
- zram->compressor[sz - 1] = 0x00;
+ strcpy(zram->compressor, compressor);
up_write(&zram->init_lock);
return len;
}
@@ -389,7 +821,6 @@ static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
- struct zram_meta *meta;
down_read(&zram->init_lock);
if (!init_done(zram)) {
@@ -397,8 +828,7 @@ static ssize_t compact_store(struct device *dev,
return -EINVAL;
}
- meta = zram->meta;
- zs_compact(meta->mem_pool);
+ zs_compact(zram->mem_pool);
up_read(&zram->init_lock);
return len;
@@ -435,97 +865,80 @@ static ssize_t mm_stat_show(struct device *dev,
down_read(&zram->init_lock);
if (init_done(zram)) {
- mem_used = zs_get_total_pages(zram->meta->mem_pool);
- zs_pool_stats(zram->meta->mem_pool, &pool_stats);
+ mem_used = zs_get_total_pages(zram->mem_pool);
+ zs_pool_stats(zram->mem_pool, &pool_stats);
}
orig_size = atomic64_read(&zram->stats.pages_stored);
max_used = atomic_long_read(&zram->stats.max_used_pages);
ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
+ "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
orig_size << PAGE_SHIFT,
(u64)atomic64_read(&zram->stats.compr_data_size),
mem_used << PAGE_SHIFT,
zram->limit_pages << PAGE_SHIFT,
max_used << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.zero_pages),
- pool_stats.pages_compacted);
+ (u64)atomic64_read(&zram->stats.same_pages),
+ pool_stats.pages_compacted,
+ (u64)atomic64_read(&zram->stats.huge_pages));
up_read(&zram->init_lock);
return ret;
}
-static DEVICE_ATTR_RO(io_stat);
-static DEVICE_ATTR_RO(mm_stat);
-ZRAM_ATTR_RO(num_reads);
-ZRAM_ATTR_RO(num_writes);
-ZRAM_ATTR_RO(failed_reads);
-ZRAM_ATTR_RO(failed_writes);
-ZRAM_ATTR_RO(invalid_io);
-ZRAM_ATTR_RO(notify_free);
-ZRAM_ATTR_RO(zero_pages);
-ZRAM_ATTR_RO(compr_data_size);
-
-static inline bool zram_meta_get(struct zram *zram)
-{
- if (atomic_inc_not_zero(&zram->refcount))
- return true;
- return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
+static ssize_t debug_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- atomic_dec(&zram->refcount);
+ int version = 1;
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "version: %d\n%8llu\n",
+ version,
+ (u64)atomic64_read(&zram->stats.writestall));
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static void zram_meta_free(struct zram_meta *meta, u64 disksize)
+static DEVICE_ATTR_RO(io_stat);
+static DEVICE_ATTR_RO(mm_stat);
+static DEVICE_ATTR_RO(debug_stat);
+
+static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
size_t index;
/* Free all pages that are still in this zram device */
- for (index = 0; index < num_pages; index++) {
- unsigned long handle = meta->table[index].handle;
-
- if (!handle)
- continue;
-
- zs_free(meta->mem_pool, handle);
- }
+ for (index = 0; index < num_pages; index++)
+ zram_free_page(zram, index);
- zs_destroy_pool(meta->mem_pool);
- vfree(meta->table);
- kfree(meta);
+ zs_destroy_pool(zram->mem_pool);
+ vfree(zram->table);
}
-static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
+static bool zram_meta_alloc(struct zram *zram, u64 disksize)
{
size_t num_pages;
- struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
-
- if (!meta)
- return NULL;
num_pages = disksize >> PAGE_SHIFT;
- meta->table = vzalloc(num_pages * sizeof(*meta->table));
- if (!meta->table) {
- pr_err("Error allocating zram address table\n");
- goto out_error;
- }
+ zram->table = vzalloc(num_pages * sizeof(*zram->table));
+ if (!zram->table)
+ return false;
- meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
- if (!meta->mem_pool) {
- pr_err("Error creating memory pool\n");
- goto out_error;
+ zram->mem_pool = zs_create_pool(zram->disk->disk_name);
+ if (!zram->mem_pool) {
+ vfree(zram->table);
+ return false;
}
- return meta;
-
-out_error:
- vfree(meta->table);
- kfree(meta);
- return NULL;
+ if (!huge_class_size)
+ huge_class_size = zs_huge_class_size(zram->mem_pool);
+ return true;
}
/*
@@ -535,238 +948,312 @@ out_error:
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- struct zram_meta *meta = zram->meta;
- unsigned long handle = meta->table[index].handle;
+ unsigned long handle;
- if (unlikely(!handle)) {
- /*
- * No memory is allocated for zero filled pages.
- * Simply clear zero page flag.
- */
- if (zram_test_flag(meta, index, ZRAM_ZERO)) {
- zram_clear_flag(meta, index, ZRAM_ZERO);
- atomic64_dec(&zram->stats.zero_pages);
- }
+ zram_reset_access(zram, index);
+
+ if (zram_test_flag(zram, index, ZRAM_HUGE)) {
+ zram_clear_flag(zram, index, ZRAM_HUGE);
+ atomic64_dec(&zram->stats.huge_pages);
+ }
+
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
return;
}
- zs_free(meta->mem_pool, handle);
+ /*
+ * No memory is allocated for same element filled pages.
+ * Simply clear same page flag.
+ */
+ if (zram_test_flag(zram, index, ZRAM_SAME)) {
+ zram_clear_flag(zram, index, ZRAM_SAME);
+ zram_set_element(zram, index, 0);
+ atomic64_dec(&zram->stats.same_pages);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
+ }
+
+ handle = zram_get_handle(zram, index);
+ if (!handle)
+ return;
+
+ zs_free(zram->mem_pool, handle);
- atomic64_sub(zram_get_obj_size(meta, index),
+ atomic64_sub(zram_get_obj_size(zram, index),
&zram->stats.compr_data_size);
atomic64_dec(&zram->stats.pages_stored);
- meta->table[index].handle = 0;
- zram_set_obj_size(meta, index, 0);
+ zram_set_handle(zram, index, 0);
+ zram_set_obj_size(zram, index, 0);
}
-static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
{
- int ret = 0;
- unsigned char *cmem;
- struct zram_meta *meta = zram->meta;
+ int ret;
unsigned long handle;
- size_t size;
-
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
- handle = meta->table[index].handle;
- size = zram_get_obj_size(meta, index);
+ unsigned int size;
+ void *src, *dst;
+
+ if (zram_wb_enabled(zram)) {
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
+
+ zram_slot_unlock(zram, index);
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
+ }
+ zram_slot_unlock(zram, index);
+ }
- if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- memset(mem, 0, PAGE_SIZE);
+ zram_slot_lock(zram, index);
+ handle = zram_get_handle(zram, index);
+ if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
+ unsigned long value;
+ void *mem;
+
+ value = handle ? zram_get_element(zram, index) : 0;
+ mem = kmap_atomic(page);
+ zram_fill_page(mem, PAGE_SIZE, value);
+ kunmap_atomic(mem);
+ zram_slot_unlock(zram, index);
return 0;
}
- cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
- if (size == PAGE_SIZE)
- memcpy(mem, cmem, PAGE_SIZE);
- else
- ret = zcomp_decompress(zram->comp, cmem, size, mem);
- zs_unmap_object(meta->mem_pool, handle);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ size = zram_get_obj_size(zram, index);
+
+ src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
+ if (size == PAGE_SIZE) {
+ dst = kmap_atomic(page);
+ memcpy(dst, src, PAGE_SIZE);
+ kunmap_atomic(dst);
+ ret = 0;
+ } else {
+ struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
+
+ dst = kmap_atomic(page);
+ ret = zcomp_decompress(zstrm, src, size, dst);
+ kunmap_atomic(dst);
+ zcomp_stream_put(zram->comp);
+ }
+ zs_unmap_object(zram->mem_pool, handle);
+ zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret)) {
+ if (unlikely(ret))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
- return ret;
- }
- return 0;
+ return ret;
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
- unsigned char *user_mem, *uncmem = NULL;
- struct zram_meta *meta = zram->meta;
- page = bvec->bv_page;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
- if (unlikely(!meta->table[index].handle) ||
- zram_test_flag(meta, index, ZRAM_ZERO)) {
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- handle_zero_page(bvec);
- return 0;
+ page = bvec->bv_page;
+ if (is_partial_io(bvec)) {
+ /* Use a temporary buffer to decompress the page */
+ page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
+ if (!page)
+ return -ENOMEM;
}
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- if (is_partial_io(bvec))
- /* Use a temporary buffer to decompress the page */
- uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
+ ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
+ if (unlikely(ret))
+ goto out;
- user_mem = kmap_atomic(page);
- if (!is_partial_io(bvec))
- uncmem = user_mem;
+ if (is_partial_io(bvec)) {
+ void *dst = kmap_atomic(bvec->bv_page);
+ void *src = kmap_atomic(page);
- if (!uncmem) {
- pr_err("Unable to allocate temp memory\n");
- ret = -ENOMEM;
- goto out_cleanup;
+ memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
+ kunmap_atomic(src);
+ kunmap_atomic(dst);
}
-
- ret = zram_decompress_page(zram, uncmem, index);
- /* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret))
- goto out_cleanup;
-
+out:
if (is_partial_io(bvec))
- memcpy(user_mem + bvec->bv_offset, uncmem + offset,
- bvec->bv_len);
+ __free_page(page);
- flush_dcache_page(page);
- ret = 0;
-out_cleanup:
- kunmap_atomic(user_mem);
- if (is_partial_io(bvec))
- kfree(uncmem);
return ret;
}
-static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
int ret = 0;
- size_t clen;
- unsigned long handle;
- struct page *page;
- unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
- struct zram_meta *meta = zram->meta;
- struct zcomp_strm *zstrm = NULL;
unsigned long alloced_pages;
+ unsigned long handle = 0;
+ unsigned int comp_len = 0;
+ void *src, *dst, *mem;
+ struct zcomp_strm *zstrm;
+ struct page *page = bvec->bv_page;
+ unsigned long element = 0;
+ enum zram_pageflags flags = 0;
+ bool allow_wb = true;
- page = bvec->bv_page;
- if (is_partial_io(bvec)) {
- /*
- * This is a partial IO. We need to read the full page
- * before to write the changes.
- */
- uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
- if (!uncmem) {
- ret = -ENOMEM;
- goto out;
- }
- ret = zram_decompress_page(zram, uncmem, index);
- if (ret)
- goto out;
- }
-
- zstrm = zcomp_strm_find(zram->comp);
- user_mem = kmap_atomic(page);
-
- if (is_partial_io(bvec)) {
- memcpy(uncmem + offset, user_mem + bvec->bv_offset,
- bvec->bv_len);
- kunmap_atomic(user_mem);
- user_mem = NULL;
- } else {
- uncmem = user_mem;
- }
-
- if (page_zero_filled(uncmem)) {
- if (user_mem)
- kunmap_atomic(user_mem);
+ mem = kmap_atomic(page);
+ if (page_same_filled(mem, &element)) {
+ kunmap_atomic(mem);
/* Free memory associated with this sector now. */
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
- zram_free_page(zram, index);
- zram_set_flag(meta, index, ZRAM_ZERO);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-
- atomic64_inc(&zram->stats.zero_pages);
- ret = 0;
+ flags = ZRAM_SAME;
+ atomic64_inc(&zram->stats.same_pages);
goto out;
}
+ kunmap_atomic(mem);
- ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
- if (!is_partial_io(bvec)) {
- kunmap_atomic(user_mem);
- user_mem = NULL;
- uncmem = NULL;
- }
+compress_again:
+ zstrm = zcomp_stream_get(zram->comp);
+ src = kmap_atomic(page);
+ ret = zcomp_compress(zstrm, src, &comp_len);
+ kunmap_atomic(src);
if (unlikely(ret)) {
+ zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
- goto out;
+ zs_free(zram->mem_pool, handle);
+ return ret;
}
- src = zstrm->buffer;
- if (unlikely(clen > max_zpage_size)) {
- clen = PAGE_SIZE;
- if (is_partial_io(bvec))
- src = uncmem;
+
+ if (unlikely(comp_len >= huge_class_size)) {
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
}
- handle = zs_malloc(meta->mem_pool, clen);
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to allocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (!handle)
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
if (!handle) {
- pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
- index, clen);
- ret = -ENOMEM;
- goto out;
+ zcomp_stream_put(zram->comp);
+ atomic64_inc(&zram->stats.writestall);
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ GFP_NOIO | __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (handle)
+ goto compress_again;
+ return -ENOMEM;
}
- alloced_pages = zs_get_total_pages(meta->mem_pool);
+ alloced_pages = zs_get_total_pages(zram->mem_pool);
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
- zs_free(meta->mem_pool, handle);
- ret = -ENOMEM;
- goto out;
+ zcomp_stream_put(zram->comp);
+ zs_free(zram->mem_pool, handle);
+ return -ENOMEM;
}
- cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
+ dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
- if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
+ src = zstrm->buffer;
+ if (comp_len == PAGE_SIZE)
src = kmap_atomic(page);
- memcpy(cmem, src, PAGE_SIZE);
+ memcpy(dst, src, comp_len);
+ if (comp_len == PAGE_SIZE)
kunmap_atomic(src);
- } else {
- memcpy(cmem, src, clen);
- }
-
- zcomp_strm_release(zram->comp, zstrm);
- zstrm = NULL;
- zs_unmap_object(meta->mem_pool, handle);
+ zcomp_stream_put(zram->comp);
+ zs_unmap_object(zram->mem_pool, handle);
+ atomic64_add(comp_len, &zram->stats.compr_data_size);
+out:
/*
* Free memory associated with this sector
* before overwriting unused sectors.
*/
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
- meta->table[index].handle = handle;
- zram_set_obj_size(meta, index, clen);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ if (comp_len == PAGE_SIZE) {
+ zram_set_flag(zram, index, ZRAM_HUGE);
+ atomic64_inc(&zram->stats.huge_pages);
+ }
+
+ if (flags) {
+ zram_set_flag(zram, index, flags);
+ zram_set_element(zram, index, element);
+ } else {
+ zram_set_handle(zram, index, handle);
+ zram_set_obj_size(zram, index, comp_len);
+ }
+ zram_slot_unlock(zram, index);
/* Update stats */
- atomic64_add(clen, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
+ return ret;
+}
+
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset, struct bio *bio)
+{
+ int ret;
+ struct page *page = NULL;
+ void *src;
+ struct bio_vec vec;
+
+ vec = *bvec;
+ if (is_partial_io(bvec)) {
+ void *dst;
+ /*
+ * This is a partial IO. We need to read the full page
+ * before to write the changes.
+ */
+ page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
+ if (!page)
+ return -ENOMEM;
+
+ ret = __zram_bvec_read(zram, page, index, bio, true);
+ if (ret)
+ goto out;
+
+ src = kmap_atomic(bvec->bv_page);
+ dst = kmap_atomic(page);
+ memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ vec.bv_page = page;
+ vec.bv_len = PAGE_SIZE;
+ vec.bv_offset = 0;
+ }
+
+ ret = __zram_bvec_write(zram, &vec, index, bio);
out:
- if (zstrm)
- zcomp_strm_release(zram->comp, zstrm);
if (is_partial_io(bvec))
- kfree(uncmem);
+ __free_page(page);
return ret;
}
@@ -779,7 +1266,6 @@ static void zram_bio_discard(struct zram *zram, u32 index,
int offset, struct bio *bio)
{
size_t n = bio->bi_iter.bi_size;
- struct zram_meta *meta = zram->meta;
/*
* zram manages data in physical block size units. Because logical block
@@ -800,17 +1286,22 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
while (n >= PAGE_SIZE) {
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
index++;
n -= PAGE_SIZE;
}
}
+/*
+ * Returns errno if it has some problem. Otherwise return 0 or 1.
+ * Returns 0 if IO request was done synchronously
+ * Returns 1 if IO request was successfully submitted.
+ */
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, int rw)
+ int offset, int rw, struct bio *bio)
{
unsigned long start_time = jiffies;
int ret;
@@ -820,15 +1311,20 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
if (rw == READ) {
atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
+ flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
+ ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
generic_end_io_acct(rw, &zram->disk->part0, start_time);
- if (unlikely(ret)) {
+ zram_slot_lock(zram, index);
+ zram_accessed(zram, index);
+ zram_slot_unlock(zram, index);
+
+ if (unlikely(ret < 0)) {
if (rw == READ)
atomic64_inc(&zram->stats.failed_reads);
else
@@ -857,31 +1353,20 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
rw = bio_data_dir(bio);
bio_for_each_segment(bvec, bio, iter) {
- int max_transfer_size = PAGE_SIZE - offset;
-
- if (bvec.bv_len > max_transfer_size) {
- /*
- * zram_bvec_rw() can only make operation on a single
- * zram page. Split the bio vector.
- */
- struct bio_vec bv;
+ struct bio_vec bv = bvec;
+ unsigned int unwritten = bvec.bv_len;
- bv.bv_page = bvec.bv_page;
- bv.bv_len = max_transfer_size;
- bv.bv_offset = bvec.bv_offset;
-
- if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
+ do {
+ bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
+ unwritten);
+ if (zram_bvec_rw(zram, &bv, index, offset, rw, bio) < 0)
goto out;
- bv.bv_len = bvec.bv_len - max_transfer_size;
- bv.bv_offset += max_transfer_size;
- if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
- goto out;
- } else
- if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
- goto out;
+ bv.bv_offset += bv.bv_len;
+ unwritten -= bv.bv_len;
- update_position(&index, &offset, &bvec);
+ update_position(&index, &offset, &bv);
+ } while (unwritten);
}
bio_endio(bio);
@@ -898,22 +1383,15 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
{
struct zram *zram = queue->queuedata;
- if (unlikely(!zram_meta_get(zram)))
- goto error;
-
- blk_queue_split(queue, &bio, queue->bio_split);
-
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
atomic64_inc(&zram->stats.invalid_io);
- goto put_zram;
+ goto error;
}
__zram_make_request(zram, bio);
- zram_meta_put(zram);
return BLK_QC_T_NONE;
-put_zram:
- zram_meta_put(zram);
+
error:
bio_io_error(bio);
return BLK_QC_T_NONE;
@@ -923,45 +1401,39 @@ static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
- struct zram_meta *meta;
zram = bdev->bd_disk->private_data;
- meta = zram->meta;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
}
static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, int rw)
{
- int offset, err = -EIO;
+ int offset, ret;
u32 index;
struct zram *zram;
struct bio_vec bv;
zram = bdev->bd_disk->private_data;
- if (unlikely(!zram_meta_get(zram)))
- goto out;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
- err = -EINVAL;
- goto put_zram;
+ ret = -EINVAL;
+ goto out;
}
index = sector >> SECTORS_PER_PAGE_SHIFT;
- offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
+ offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
bv.bv_page = page;
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- err = zram_bvec_rw(zram, &bv, index, offset, rw);
-put_zram:
- zram_meta_put(zram);
+ ret = zram_bvec_rw(zram, &bv, index, offset, rw, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -971,14 +1443,24 @@ out:
* bio->bi_end_io does things to handle the error
* (e.g., SetPageError, set_page_dirty and extra works).
*/
- if (err == 0)
+ if (unlikely(ret < 0))
+ return ret;
+
+ switch (ret) {
+ case 0:
page_endio(page, rw, 0);
- return err;
+ break;
+ case 1:
+ ret = 0;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
}
static void zram_reset_device(struct zram *zram)
{
- struct zram_meta *meta;
struct zcomp *comp;
u64 disksize;
@@ -991,33 +1473,19 @@ static void zram_reset_device(struct zram *zram)
return;
}
- meta = zram->meta;
comp = zram->comp;
disksize = zram->disksize;
- /*
- * Refcount will go down to 0 eventually and r/w handler
- * cannot handle further I/O so it will bail out by
- * check zram_meta_get.
- */
- zram_meta_put(zram);
- /*
- * We want to free zram_meta in process context to avoid
- * deadlock between reclaim path and any other locks.
- */
- wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
-
- /* Reset stats */
- memset(&zram->stats, 0, sizeof(zram->stats));
zram->disksize = 0;
- zram->max_comp_streams = 1;
set_capacity(zram->disk, 0);
part_stat_set_all(&zram->disk->part0, 0);
up_write(&zram->init_lock);
/* I/O operation under all of CPU are done so let's free */
- zram_meta_free(meta, disksize);
+ zram_meta_free(zram, disksize);
+ memset(&zram->stats, 0, sizeof(zram->stats));
zcomp_destroy(comp);
+ reset_bdev(zram);
}
static ssize_t disksize_store(struct device *dev,
@@ -1025,7 +1493,6 @@ static ssize_t disksize_store(struct device *dev,
{
u64 disksize;
struct zcomp *comp;
- struct zram_meta *meta;
struct zram *zram = dev_to_zram(dev);
int err;
@@ -1033,12 +1500,20 @@ static ssize_t disksize_store(struct device *dev,
if (!disksize)
return -EINVAL;
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Cannot change disksize for initialized device\n");
+ err = -EBUSY;
+ goto out_unlock;
+ }
+
disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(zram->disk->disk_name, disksize);
- if (!meta)
- return -ENOMEM;
+ if (!zram_meta_alloc(zram, disksize)) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
- comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ comp = zcomp_create(zram->compressor);
if (IS_ERR(comp)) {
pr_err("Cannot initialise %s compressing backend\n",
zram->compressor);
@@ -1046,35 +1521,19 @@ static ssize_t disksize_store(struct device *dev,
goto out_free_meta;
}
- down_write(&zram->init_lock);
- if (init_done(zram)) {
- pr_info("Cannot change disksize for initialized device\n");
- err = -EBUSY;
- goto out_destroy_comp;
- }
-
- init_waitqueue_head(&zram->io_done);
- atomic_set(&zram->refcount, 1);
- zram->meta = meta;
zram->comp = comp;
zram->disksize = disksize;
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- up_write(&zram->init_lock);
- /*
- * Revalidate disk out of the init_lock to avoid lockdep splat.
- * It's okay because disk's capacity is protected by init_lock
- * so that revalidate_disk always sees up-to-date capacity.
- */
revalidate_disk(zram->disk);
+ up_write(&zram->init_lock);
return len;
-out_destroy_comp:
- up_write(&zram->init_lock);
- zcomp_destroy(comp);
out_free_meta:
- zram_meta_free(meta, disksize);
+ zram_meta_free(zram, disksize);
+out_unlock:
+ up_write(&zram->init_lock);
return err;
}
@@ -1149,38 +1608,33 @@ static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate);
static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR_RO(orig_data_size);
-static DEVICE_ATTR_RO(mem_used_total);
-static DEVICE_ATTR_RW(mem_limit);
-static DEVICE_ATTR_RW(mem_used_max);
+static DEVICE_ATTR_WO(mem_limit);
+static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RW(backing_dev);
+#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
&dev_attr_initstate.attr,
&dev_attr_reset.attr,
- &dev_attr_num_reads.attr,
- &dev_attr_num_writes.attr,
- &dev_attr_failed_reads.attr,
- &dev_attr_failed_writes.attr,
&dev_attr_compact.attr,
- &dev_attr_invalid_io.attr,
- &dev_attr_notify_free.attr,
- &dev_attr_zero_pages.attr,
- &dev_attr_orig_data_size.attr,
- &dev_attr_compr_data_size.attr,
- &dev_attr_mem_used_total.attr,
&dev_attr_mem_limit.attr,
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_backing_dev.attr,
+#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
+ &dev_attr_debug_stat.attr,
NULL,
};
-static struct attribute_group zram_disk_attr_group = {
+static const struct attribute_group zram_disk_attr_group = {
.attrs = zram_disk_attrs,
};
@@ -1237,6 +1691,7 @@ static int zram_add(void)
/* zram devices sort of resembles non-rotational disks */
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
+
/*
* To ensure that we always get PAGE_SIZE aligned
* and n*PAGE_SIZED sized I/O requests.
@@ -1247,8 +1702,6 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
- zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
- zram->disk->queue->limits.chunk_sectors = 0;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
@@ -1264,6 +1717,8 @@ static int zram_add(void)
zram->disk->queue->limits.discard_zeroes_data = 0;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+ zram->disk->queue->backing_dev_info.capabilities |=
+ BDI_CAP_STABLE_WRITES;
add_disk(zram->disk);
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
@@ -1274,9 +1729,8 @@ static int zram_add(void)
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
- zram->meta = NULL;
- zram->max_comp_streams = 1;
+ zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
@@ -1310,6 +1764,7 @@ static int zram_remove(struct zram *zram)
zram->claim = true;
mutex_unlock(&bdev->bd_mutex);
+ zram_debugfs_unregister(zram);
/*
* Remove sysfs first, so no one will perform a disksize
* store while we destroy the devices. This also helps during
@@ -1327,8 +1782,8 @@ static int zram_remove(struct zram *zram)
pr_info("Removed device: %s\n", zram->disk->disk_name);
- blk_cleanup_queue(zram->disk->queue);
del_gendisk(zram->disk);
+ blk_cleanup_queue(zram->disk->queue);
put_disk(zram->disk);
kfree(zram);
return 0;
@@ -1408,6 +1863,7 @@ static void destroy_devices(void)
{
class_unregister(&zram_control_class);
idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
+ zram_debugfs_destroy();
idr_destroy(&zram_index_idr);
unregister_blkdev(zram_major, "zram");
}
@@ -1422,6 +1878,7 @@ static int __init zram_init(void)
return ret;
}
+ zram_debugfs_create();
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
pr_err("Unable to get major number\n");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 8e92339686d7..3a1cac486e96 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -15,27 +15,12 @@
#ifndef _ZRAM_DRV_H_
#define _ZRAM_DRV_H_
-#include <linux/spinlock.h>
+#include <linux/rwsem.h>
#include <linux/zsmalloc.h>
+#include <linux/crypto.h>
#include "zcomp.h"
-/*-- Configurable parameters */
-
-/*
- * Pages that compress to size greater than this are stored
- * uncompressed in memory.
- */
-static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
-
-/*
- * NOTE: max_zpage_size must be less than or equal to:
- * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
- * always return failure.
- */
-
-/*-- End of configurable params */
-
#define SECTOR_SHIFT 9
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
@@ -59,9 +44,11 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
- /* Page consists entirely of zeros */
- ZRAM_ZERO = ZRAM_FLAG_SHIFT,
- ZRAM_ACCESS, /* page is now accessed */
+ /* zram slot is locked */
+ ZRAM_LOCK = ZRAM_FLAG_SHIFT,
+ ZRAM_SAME, /* Page consists the same element */
+ ZRAM_WB, /* page is stored on backing_device */
+ ZRAM_HUGE, /* Incompressible page */
__NR_ZRAM_PAGEFLAGS,
};
@@ -70,8 +57,14 @@ enum zram_pageflags {
/* Allocated for each disk page */
struct zram_table_entry {
- unsigned long handle;
+ union {
+ unsigned long handle;
+ unsigned long element;
+ };
unsigned long value;
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ ktime_t ac_time;
+#endif
};
struct zram_stats {
@@ -82,18 +75,16 @@ struct zram_stats {
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
atomic64_t notify_free; /* no. of swap slot free notifications */
- atomic64_t zero_pages; /* no. of zero filled pages */
+ atomic64_t same_pages; /* no. of same element filled pages */
+ atomic64_t huge_pages; /* no. of huge pages */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
+ atomic64_t writestall; /* no. of write slow paths */
};
-struct zram_meta {
+struct zram {
struct zram_table_entry *table;
struct zs_pool *mem_pool;
-};
-
-struct zram {
- struct zram_meta *meta;
struct zcomp *comp;
struct gendisk *disk;
/* Prevent concurrent execution of device init */
@@ -102,21 +93,28 @@ struct zram {
* the number of pages zram can consume for storing compressed data
*/
unsigned long limit_pages;
- int max_comp_streams;
struct zram_stats stats;
- atomic_t refcount; /* refcount for zram_meta */
- /* wait all IO under all of cpu are done */
- wait_queue_head_t io_done;
/*
* This is the limit on amount of *uncompressed* worth of data
* we can store in a disk.
*/
u64 disksize; /* bytes */
- char compressor[10];
+ char compressor[CRYPTO_MAX_ALG_NAME];
/*
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
+#ifdef CONFIG_ZRAM_WRITEBACK
+ struct file *backing_dev;
+ struct block_device *bdev;
+ unsigned int old_block_size;
+ unsigned long *bitmap;
+ unsigned long nr_pages;
+ spinlock_t bitmap_lock;
+#endif
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ struct dentry *debugfs_dir;
+#endif
};
#endif
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 162a9886dc02..d25ffd9d5438 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -131,6 +131,7 @@ config BT_HCIUART_LL
config BT_HCIUART_3WIRE
bool "Three-wire UART (H5) protocol support"
depends on BT_HCIUART
+ depends on BT_HCIUART_SERDEV
help
The HCI Three-wire UART Transport Layer makes it possible to
user the Bluetooth HCI over a serial port interface. The HCI
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 01718d05e952..9e8f0e255de2 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -120,6 +120,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-anatop");
base = of_iomap(np, 0);
+ of_node_put(np);
WARN_ON(!base);
clks[IMX6UL_PLL1_BYPASS_SRC] = imx_clk_mux("pll1_bypass_src", base + 0x00, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index f68c24a98277..dedfc96acc66 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1363,7 +1363,7 @@ err_sha_v4_algs:
err_sha_v3_algs:
for (j = 0; j < k; j++)
- crypto_unregister_ahash(&sha_v4_algs[j]);
+ crypto_unregister_ahash(&sha_v3_algs[j]);
err_aes_algs:
for (j = 0; j < i; j++)
@@ -1379,7 +1379,7 @@ static void sahara_unregister_algs(struct sahara_dev *dev)
for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
crypto_unregister_alg(&aes_algs[i]);
- for (i = 0; i < ARRAY_SIZE(sha_v4_algs); i++)
+ for (i = 0; i < ARRAY_SIZE(sha_v3_algs); i++)
crypto_unregister_ahash(&sha_v3_algs[i]);
if (dev->version > SAHARA_VERSION_3)
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 9506e8693c81..d8ef1147b344 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -111,24 +111,23 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src,
nbytes);
} else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_altivec();
- enable_kernel_vsx();
-
blkcipher_walk_init(&walk, dst, src, nbytes);
ret = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ enable_kernel_altivec();
aes_p8_cbc_encrypt(walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & AES_BLOCK_MASK,
&ctx->enc_key, walk.iv, 1);
+ pagefault_enable();
+ preempt_enable();
+
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, &walk, nbytes);
}
-
- pagefault_enable();
- preempt_enable();
}
return ret;
@@ -152,24 +151,23 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src,
nbytes);
} else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_altivec();
- enable_kernel_vsx();
-
blkcipher_walk_init(&walk, dst, src, nbytes);
ret = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ enable_kernel_altivec();
aes_p8_cbc_encrypt(walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & AES_BLOCK_MASK,
&ctx->dec_key, walk.iv, 0);
+ pagefault_enable();
+ preempt_enable();
+
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, &walk, nbytes);
}
-
- pagefault_enable();
- preempt_enable();
}
return ret;
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index b14f5c225401..55723340c180 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2386,13 +2386,14 @@ static int pl330_terminate_all(struct dma_chan *chan)
pm_runtime_get_sync(pl330->ddma.dev);
spin_lock_irqsave(&pch->lock, flags);
+
spin_lock(&pl330->lock);
_stop(pch->thread);
- spin_unlock(&pl330->lock);
-
pch->thread->req[0].desc = NULL;
pch->thread->req[1].desc = NULL;
pch->thread->req_running = -1;
+ spin_unlock(&pl330->lock);
+
power_down = pch->active;
pch->active = false;
diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c
index 5536108aa9db..fe21734bbe5c 100644
--- a/drivers/gpio/gpio-ml-ioh.c
+++ b/drivers/gpio/gpio-ml-ioh.c
@@ -495,9 +495,10 @@ err_irq_alloc_descs:
chip = chip_save;
err_gpiochip_add:
+ chip = chip_save;
while (--i >= 0) {
- chip--;
gpiochip_remove(&chip->gpio);
+ chip++;
}
kfree(chip_save);
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 896bf29776b0..fb2c1df4f588 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -591,4 +591,4 @@ static int __init tegra_gpio_init(void)
{
return platform_driver_register(&tegra_gpio_driver);
}
-postcore_initcall(tegra_gpio_init);
+subsys_initcall(tegra_gpio_init);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 98ab08c0aa2d..07541c5670e6 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -30,7 +30,7 @@ struct acpi_gpio_info {
};
/* gpio suffixes used for ACPI and device tree lookup */
-static const char * const gpio_suffixes[] = { "gpios", "gpio" };
+static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" };
#ifdef CONFIG_ACPI
void acpi_gpiochip_add(struct gpio_chip *chip);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 73628c7599e7..3aca9a9011fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -492,6 +492,10 @@ void amdgpu_bo_force_delete(struct amdgpu_device *adev)
int amdgpu_bo_init(struct amdgpu_device *adev)
{
+ /* reserve PAT memory space to WC for VRAM */
+ arch_io_reserve_memtype_wc(adev->mc.aper_base,
+ adev->mc.aper_size);
+
/* Add an MTRR for the VRAM */
adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
adev->mc.aper_size);
@@ -507,6 +511,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
{
amdgpu_ttm_fini(adev);
arch_phys_wc_del(adev->mc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
}
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index eb1da83c9902..8cdd505784ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -125,6 +125,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
return ERR_PTR(-EINVAL);
process = find_process(thread);
+ if (!process)
+ return ERR_PTR(-EINVAL);
return process;
}
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 08f82eae6939..ac12f74e6b32 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -275,6 +275,8 @@ int ast_mm_init(struct ast_private *ast)
return ret;
}
+ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
pci_resource_len(dev->pdev, 0));
@@ -283,11 +285,15 @@ int ast_mm_init(struct ast_private *ast)
void ast_mm_fini(struct ast_private *ast)
{
+ struct drm_device *dev = ast->dev;
+
ttm_bo_device_release(&ast->ttm.bdev);
ast_ttm_global_release(ast);
arch_phys_wc_del(ast->fb_mtrr);
+ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
}
void ast_ttm_placement(struct ast_bo *bo, int domain)
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index dfffd528517a..393967025043 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -275,6 +275,9 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
return ret;
}
+ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
pci_resource_len(dev->pdev, 0));
@@ -284,6 +287,8 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
void cirrus_mm_fini(struct cirrus_device *cirrus)
{
+ struct drm_device *dev = cirrus->dev;
+
if (!cirrus->mm_inited)
return;
@@ -293,6 +298,8 @@ void cirrus_mm_fini(struct cirrus_device *cirrus)
arch_phys_wc_del(cirrus->fb_mtrr);
cirrus->fb_mtrr = 0;
+ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
}
void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 19fb0bddc1cd..359fe2b8bb8a 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -842,6 +842,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
I915_USERPTR_UNSYNCHRONIZED))
return -EINVAL;
+ if (!args->user_size)
+ return -EINVAL;
+
if (offset_in_page(args->user_ptr | args->user_size))
return -EINVAL;
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 05108b505fbf..d9df8d32fc35 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -274,6 +274,9 @@ int mgag200_mm_init(struct mga_device *mdev)
return ret;
}
+ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
pci_resource_len(dev->pdev, 0));
@@ -282,10 +285,14 @@ int mgag200_mm_init(struct mga_device *mdev)
void mgag200_mm_fini(struct mga_device *mdev)
{
+ struct drm_device *dev = mdev->dev;
+
ttm_bo_device_release(&mdev->ttm.bdev);
mgag200_ttm_global_release(mdev);
+ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
arch_phys_wc_del(mdev->fb_mtrr);
mdev->fb_mtrr = 0;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index ababdaabe870..1855b475cc0b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -253,12 +253,16 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
nv_connector->edid = NULL;
}
- /* Outputs are only polled while runtime active, so acquiring a
- * runtime PM ref here is unnecessary (and would deadlock upon
- * runtime suspend because it waits for polling to finish).
+ /* Outputs are only polled while runtime active, so resuming the
+ * device here is unnecessary (and would deadlock upon runtime suspend
+ * because it waits for polling to finish). We do however, want to
+ * prevent the autosuspend timer from elapsing during this operation
+ * if possible.
*/
- if (!drm_kms_helper_is_poll_worker()) {
- ret = pm_runtime_get_sync(connector->dev->dev);
+ if (drm_kms_helper_is_poll_worker()) {
+ pm_runtime_get_noresume(dev->dev);
+ } else {
+ ret = pm_runtime_get_sync(dev->dev);
if (ret < 0 && ret != -EACCES)
return conn_status;
}
@@ -329,10 +333,8 @@ detect_analog:
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
- }
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return conn_status;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index d2e7d209f651..9835327a3214 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -397,6 +397,9 @@ nouveau_ttm_init(struct nouveau_drm *drm)
/* VRAM init */
drm->gem.vram_available = drm->device.info.ram_user;
+ arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1),
+ device->func->resource_size(device, 1));
+
ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM,
drm->gem.vram_available >> PAGE_SHIFT);
if (ret) {
@@ -429,6 +432,8 @@ nouveau_ttm_init(struct nouveau_drm *drm)
void
nouveau_ttm_fini(struct nouveau_drm *drm)
{
+ struct nvkm_device *device = nvxx_device(&drm->device);
+
ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
@@ -438,4 +443,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm)
arch_phys_wc_del(drm->ttm.mtrr);
drm->ttm.mtrr = 0;
+ arch_io_free_memtype_wc(device->func->resource_addr(device, 1),
+ device->func->resource_size(device, 1));
+
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index e7e581d6a8ff..1bfc4807ce5b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -23,6 +23,10 @@
#ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
#include "priv.h"
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
static int
nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
{
@@ -85,6 +89,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
unsigned long pgsize_bitmap;
int ret;
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+ if (dev->archdata.mapping) {
+ struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+ arm_iommu_detach_device(dev);
+ arm_iommu_release_mapping(mapping);
+ }
+#endif
+
if (!tdev->func->iommu_bit)
return;
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
index a188a3959f1a..6ad827b93ae1 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
@@ -823,7 +823,7 @@ static void s6e8aa0_read_mtp_id(struct s6e8aa0 *ctx)
int ret, i;
ret = s6e8aa0_dcs_read(ctx, 0xd1, id, ARRAY_SIZE(id));
- if (ret < ARRAY_SIZE(id) || id[0] == 0x00) {
+ if (ret < 0 || ret < ARRAY_SIZE(id) || id[0] == 0x00) {
dev_err(ctx->dev, "read id failed\n");
ctx->error = -EIO;
return;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 83aee9e814ba..18ec38d0d3f5 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -447,6 +447,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev)
int radeon_bo_init(struct radeon_device *rdev)
{
+ /* reserve PAT memory space to WC for VRAM */
+ arch_io_reserve_memtype_wc(rdev->mc.aper_base,
+ rdev->mc.aper_size);
+
/* Add an MTRR for the VRAM */
if (!rdev->fastfb_working) {
rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
@@ -464,6 +468,7 @@ void radeon_bo_fini(struct radeon_device *rdev)
{
radeon_ttm_fini(rdev);
arch_phys_wc_del(rdev->mc.vram_mtrr);
+ arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
}
/* Returns how many bytes TTM can move per IB.
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 98e7e235892c..7fa396b5cce0 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2017,6 +2017,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 329bbac02a15..c51892267330 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -889,6 +889,8 @@
#define USB_DEVICE_ID_SONY_PS3_BDREMOTE 0x0306
#define USB_DEVICE_ID_SONY_PS3_CONTROLLER 0x0268
#define USB_DEVICE_ID_SONY_PS4_CONTROLLER 0x05c4
+#define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2 0x09cc
+#define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE 0x0ba0
#define USB_DEVICE_ID_SONY_MOTION_CONTROLLER 0x03d5
#define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER 0x042f
#define USB_DEVICE_ID_SONY_BUZZ_CONTROLLER 0x0002
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 21febbb0d84e..6f3d47185bf0 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2460,6 +2460,12 @@ static const struct hid_device_id sony_devices[] = {
.driver_data = DUALSHOCK4_CONTROLLER_USB },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER),
.driver_data = DUALSHOCK4_CONTROLLER_BT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2),
+ .driver_data = DUALSHOCK4_CONTROLLER_USB },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2),
+ .driver_data = DUALSHOCK4_CONTROLLER_BT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE),
+ .driver_data = DUALSHOCK4_CONTROLLER_USB },
{ }
};
MODULE_DEVICE_TABLE(hid, sony_devices);
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index b71793ba2483..ee4225d73994 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -47,8 +47,9 @@
/** register definition **/
/* FFSR - 0x300 */
-#define FFSR_FT_STOPPED BIT(1)
+#define FFSR_FT_STOPPED_BIT 1
/* FFCR - 0x304 */
+#define FFCR_FON_MAN_BIT 6
#define FFCR_FON_MAN BIT(6)
#define FFCR_STOP_FI BIT(12)
@@ -93,9 +94,9 @@ static void tpiu_disable_hw(struct tpiu_drvdata *drvdata)
/* Generate manual flush */
writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
/* Wait for flush to complete */
- coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN, 0);
+ coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
/* Wait for formatter to stop */
- coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED, 1);
+ coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
CS_LOCK(drvdata->base);
}
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 508532b3fcac..9c5edc887805 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -107,7 +107,7 @@ static int coresight_find_link_inport(struct coresight_device *csdev,
dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n",
dev_name(&parent->dev), dev_name(&csdev->dev));
- return 0;
+ return -ENODEV;
}
static int coresight_find_link_outport(struct coresight_device *csdev,
@@ -125,7 +125,7 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n",
dev_name(&csdev->dev), dev_name(&child->dev));
- return 0;
+ return -ENODEV;
}
static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
@@ -178,6 +178,9 @@ static int coresight_enable_link(struct coresight_device *csdev,
else
refport = 0;
+ if (refport < 0)
+ return refport;
+
if (atomic_inc_return(&csdev->refcnt[refport]) == 1) {
if (link_ops(csdev)->enable) {
ret = link_ops(csdev)->enable(csdev, inport, outport);
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 85f39cc3e276..47581c32b1e1 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -128,6 +128,7 @@
#define SBREG_BAR 0x10
#define SBREG_SMBCTRL 0xc6000c
+#define SBREG_SMBCTRL_DNV 0xcf000c
/* Host status bits for SMBPCISTS */
#define SMBPCISTS_INTS 0x08
@@ -1251,7 +1252,11 @@ static void i801_add_tco(struct i801_priv *priv)
spin_unlock(&p2sb_spinlock);
res = &tco_res[ICH_RES_MEM_OFF];
- res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL;
+ if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS)
+ res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV;
+ else
+ res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL;
+
res->end = res->start + 3;
res->flags = IORESOURCE_MEM;
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 0b20449e48cf..da9acec1a029 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -533,6 +533,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c)
{
u8 rx_watermark;
struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
+ unsigned long flags;
/* Clear and enable Rx full interrupt. */
xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK | XIIC_INTR_TX_ERROR_MASK);
@@ -548,6 +549,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c)
rx_watermark = IIC_RX_FIFO_DEPTH;
xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
+ local_irq_save(flags);
if (!(msg->flags & I2C_M_NOSTART))
/* write the address */
xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
@@ -558,6 +560,8 @@ static void xiic_start_recv(struct xiic_i2c *i2c)
xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
msg->len | ((i2c->nmsgs == 1) ? XIIC_TX_DYN_STOP_MASK : 0));
+ local_irq_restore(flags);
+
if (i2c->nmsgs == 1)
/* very last, enable bus not busy as well */
xiic_irq_clr_en(i2c, XIIC_INTR_BNB_MASK);
diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c
index 44a30f286de1..57b1812a5a18 100644
--- a/drivers/iio/frequency/ad9523.c
+++ b/drivers/iio/frequency/ad9523.c
@@ -507,7 +507,7 @@ static ssize_t ad9523_store(struct device *dev,
return ret;
if (!state)
- return 0;
+ return len;
mutex_lock(&indio_dev->mlock);
switch ((u32)this_attr->address) {
@@ -641,7 +641,7 @@ static int ad9523_read_raw(struct iio_dev *indio_dev,
code = (AD9523_CLK_DIST_DIV_PHASE_REV(ret) * 3141592) /
AD9523_CLK_DIST_DIV_REV(ret);
*val = code / 1000000;
- *val2 = (code % 1000000) * 10;
+ *val2 = code % 1000000;
return IIO_VAL_INT_PLUS_MICRO;
default:
return -EINVAL;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d57a78ec7425..1454290078de 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -544,6 +544,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
dgid = (union ib_gid *) &addr->sib_addr;
pkey = ntohs(addr->sib_pkey);
+ mutex_lock(&lock);
list_for_each_entry(cur_dev, &dev_list, list) {
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
if (!rdma_cap_af_ib(cur_dev->device, p))
@@ -567,18 +568,19 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
+ goto found;
}
}
}
}
-
- if (!cma_dev)
- return -ENODEV;
+ mutex_unlock(&lock);
+ return -ENODEV;
found:
cma_attach_to_dev(id_priv, cma_dev);
- addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
- memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+ mutex_unlock(&lock);
+ addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
+ memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
return 0;
}
@@ -1280,9 +1282,16 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
(addr->src_addr.ss_family == AF_IB ||
cma_protocol_roce_dev_port(id->device, port_num));
- return !addr->dev_addr.bound_dev_if ||
- (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
- addr->dev_addr.bound_dev_if == net_dev->ifindex);
+ /*
+ * Net namespaces must match, and if the listner is listening
+ * on a specific netdevice than netdevice must match as well.
+ */
+ if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
+ (!!addr->dev_addr.bound_dev_if ==
+ (addr->dev_addr.bound_dev_if == net_dev->ifindex)))
+ return true;
+ else
+ return false;
}
static struct rdma_id_private *cma_find_listener(
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 53aa7515f542..04206c600098 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1183,6 +1183,12 @@ static void flush_qp(struct c4iw_qp *qhp)
t4_set_wq_in_error(&qhp->wq);
if (qhp->ibqp.uobject) {
+
+ /* for user qps, qhp->wq.flushed is protected by qhp->mutex */
+ if (qhp->wq.flushed)
+ return;
+
+ qhp->wq.flushed = 1;
t4_set_cq_in_error(&rchp->cq);
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index f74b11542603..a338e60836ee 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -992,12 +992,14 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
skb_queue_head_init(&skqueue);
+ netif_tx_lock_bh(p->dev);
spin_lock_irq(&priv->lock);
set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
if (p->neigh)
while ((skb = __skb_dequeue(&p->neigh->queue)))
__skb_queue_tail(&skqueue, skb);
spin_unlock_irq(&priv->lock);
+ netif_tx_unlock_bh(p->dev);
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = p->dev;
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 88dfe3008cf4..be2f2521c1c5 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -1593,10 +1593,11 @@ static int mxt_get_object_table(struct mxt_data *data)
break;
case MXT_TOUCH_MULTI_T9:
data->multitouch = MXT_TOUCH_MULTI_T9;
+ /* Only handle messages from first T9 instance */
data->T9_reportid_min = min_id;
- data->T9_reportid_max = max_id;
- data->num_touchids = object->num_report_ids
- * mxt_obj_instances(object);
+ data->T9_reportid_max = min_id +
+ object->num_report_ids - 1;
+ data->num_touchids = object->num_report_ids;
break;
case MXT_SPT_MESSAGECOUNT_T44:
data->T44_address = object->start_address;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 347aaaa5a7ea..fc6eb752ab35 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1219,6 +1219,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
/* Sync our overflow flag, as we believe we're up to speed */
q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
+ writel(q->cons, q->cons_reg);
return IRQ_HANDLED;
}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index e913a930ac80..5a63e32a4a6b 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1315,8 +1315,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
qi_submit_sync(&desc, iommu);
}
-void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
- u64 addr, unsigned mask)
+void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u16 qdep, u64 addr, unsigned mask)
{
struct qi_desc desc;
@@ -1331,7 +1331,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
qdep = 0;
desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
- QI_DIOTLB_TYPE;
+ QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
qi_submit_sync(&desc, iommu);
}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 4efec2db4ee2..49b266433f4c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -419,6 +419,7 @@ struct device_domain_info {
struct list_head global; /* link to global list */
u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
+ u16 pfsid; /* SRIOV physical function source ID */
u8 pasid_supported:3;
u8 pasid_enabled:1;
u8 pri_supported:1;
@@ -1479,6 +1480,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
return;
pdev = to_pci_dev(info->dev);
+ /* For IOMMU that supports device IOTLB throttling (DIT), we assign
+ * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
+ * queue depth at PF level. If DIT is not set, PFSID will be treated as
+ * reserved, which should be set to 0.
+ */
+ if (!ecap_dit(info->iommu->ecap))
+ info->pfsid = 0;
+ else {
+ struct pci_dev *pf_pdev;
+
+ /* pdev will be returned if device is not a vf */
+ pf_pdev = pci_physfn(pdev);
+ info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
+ }
#ifdef CONFIG_INTEL_IOMMU_SVM
/* The PCIe spec, in its wisdom, declares that the behaviour of
@@ -1537,7 +1552,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
sid = info->bus << 8 | info->devfn;
qdep = info->ats_qdep;
- qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, addr, mask);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index dfb868e2d129..624e7ff76166 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -44,7 +44,7 @@ struct ipmmu_vmsa_domain {
struct io_pgtable_ops *iop;
unsigned int context_id;
- spinlock_t lock; /* Protects mappings */
+ struct mutex mutex; /* Protects mappings */
};
struct ipmmu_vmsa_archdata {
@@ -464,7 +464,7 @@ static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
if (!domain)
return NULL;
- spin_lock_init(&domain->lock);
+ mutex_init(&domain->mutex);
return &domain->io_domain;
}
@@ -488,7 +488,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
struct ipmmu_vmsa_device *mmu = archdata->mmu;
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
- unsigned long flags;
unsigned int i;
int ret = 0;
@@ -497,7 +496,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
return -ENXIO;
}
- spin_lock_irqsave(&domain->lock, flags);
+ mutex_lock(&domain->mutex);
if (!domain->mmu) {
/* The domain hasn't been used yet, initialize it. */
@@ -513,7 +512,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
ret = -EINVAL;
}
- spin_unlock_irqrestore(&domain->lock, flags);
+ mutex_unlock(&domain->mutex);
if (ret < 0)
return ret;
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index d7af88534971..6fb34bf0f352 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -216,6 +216,7 @@ static int bcm7038_l1_set_affinity(struct irq_data *d,
return 0;
}
+#ifdef CONFIG_SMP
static void bcm7038_l1_cpu_offline(struct irq_data *d)
{
struct cpumask *mask = irq_data_get_affinity_mask(d);
@@ -240,6 +241,7 @@ static void bcm7038_l1_cpu_offline(struct irq_data *d)
}
irq_set_affinity_locked(d, &new_affinity, false);
}
+#endif
static int __init bcm7038_l1_init_one(struct device_node *dn,
unsigned int idx,
@@ -292,7 +294,9 @@ static struct irq_chip bcm7038_l1_irq_chip = {
.irq_mask = bcm7038_l1_mask,
.irq_unmask = bcm7038_l1_unmask,
.irq_set_affinity = bcm7038_l1_set_affinity,
+#ifdef CONFIG_SMP
.irq_cpu_offline = bcm7038_l1_cpu_offline,
+#endif
};
static int bcm7038_l1_map(struct irq_domain *d, unsigned int virq,
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 82e00e3ad0e0..c3d7a1461043 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -67,7 +67,10 @@ struct its_node {
unsigned long phys_base;
struct its_cmd_block *cmd_base;
struct its_cmd_block *cmd_write;
- void *tables[GITS_BASER_NR_REGS];
+ struct {
+ void *base;
+ u32 order;
+ } tables[GITS_BASER_NR_REGS];
struct its_collection *collections;
struct list_head its_device_list;
u64 flags;
@@ -77,6 +80,9 @@ struct its_node {
#define ITS_ITT_ALIGN SZ_256
+/* Convert page order to size in bytes */
+#define PAGE_ORDER_TO_SIZE(o) (PAGE_SIZE << (o))
+
struct event_lpi_map {
unsigned long *lpi_map;
u16 *col_map;
@@ -816,9 +822,10 @@ static void its_free_tables(struct its_node *its)
int i;
for (i = 0; i < GITS_BASER_NR_REGS; i++) {
- if (its->tables[i]) {
- free_page((unsigned long)its->tables[i]);
- its->tables[i] = NULL;
+ if (its->tables[i].base) {
+ free_pages((unsigned long)its->tables[i].base,
+ its->tables[i].order);
+ its->tables[i].base = NULL;
}
}
}
@@ -851,7 +858,6 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
u64 type = GITS_BASER_TYPE(val);
u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
int order = get_order(psz);
- int alloc_size;
int alloc_pages;
u64 tmp;
void *base;
@@ -883,8 +889,8 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
}
}
- alloc_size = (1 << order) * PAGE_SIZE;
- alloc_pages = (alloc_size / psz);
+retry_alloc_baser:
+ alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
if (alloc_pages > GITS_BASER_PAGES_MAX) {
alloc_pages = GITS_BASER_PAGES_MAX;
order = get_order(GITS_BASER_PAGES_MAX * psz);
@@ -898,7 +904,8 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
goto out_free;
}
- its->tables[i] = base;
+ its->tables[i].base = base;
+ its->tables[i].order = order;
retry_baser:
val = (virt_to_phys(base) |
@@ -936,7 +943,7 @@ retry_baser:
shr = tmp & GITS_BASER_SHAREABILITY_MASK;
if (!shr) {
cache = GITS_BASER_nC;
- __flush_dcache_area(base, alloc_size);
+ __flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
}
goto retry_baser;
}
@@ -947,13 +954,16 @@ retry_baser:
* size and retry. If we reach 4K, then
* something is horribly wrong...
*/
+ free_pages((unsigned long)base, order);
+ its->tables[i].base = NULL;
+
switch (psz) {
case SZ_16K:
psz = SZ_4K;
- goto retry_baser;
+ goto retry_alloc_baser;
case SZ_64K:
psz = SZ_16K;
- goto retry_baser;
+ goto retry_alloc_baser;
}
}
@@ -966,7 +976,7 @@ retry_baser:
}
pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
- (int)(alloc_size / entry_size),
+ (int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
its_base_type_string[type],
(unsigned long)virt_to_phys(base),
psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index d422377f5743..f349196ed7a5 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -345,7 +345,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
irqnr = irqstat & GICC_IAR_INT_ID_MASK;
- if (likely(irqnr > 15 && irqnr < 1021)) {
+ if (likely(irqnr > 15 && irqnr < 1020)) {
if (static_key_true(&supports_deactivate))
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
handle_domain_irq(gic->domain, irqnr, regs);
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index f9512bfa6c3c..0a41132ffba7 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -530,8 +530,9 @@ init_pmu(void)
int timeout;
struct adb_request req;
- out_8(&via[B], via[B] | TREQ); /* negate TREQ */
- out_8(&via[DIRB], (via[DIRB] | TREQ) & ~TACK); /* TACK in, TREQ out */
+ /* Negate TREQ. Set TACK to input and TREQ to output. */
+ out_8(&via[B], in_8(&via[B]) | TREQ);
+ out_8(&via[DIRB], (in_8(&via[DIRB]) | TREQ) & ~TACK);
pmu_request(&req, NULL, 2, PMU_SET_INTR_MASK, pmu_intr_mask);
timeout = 100000;
@@ -1453,8 +1454,8 @@ pmu_sr_intr(void)
struct adb_request *req;
int bite = 0;
- if (via[B] & TREQ) {
- printk(KERN_ERR "PMU: spurious SR intr (%x)\n", via[B]);
+ if (in_8(&via[B]) & TREQ) {
+ printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B]));
out_8(&via[IFR], SR_INT);
return NULL;
}
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f2c0000de613..95a6ae053714 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -462,8 +462,10 @@ static int bch_writeback_thread(void *arg)
* data on cache. BCACHE_DEV_DETACHING flag is set in
* bch_cached_dev_detach().
*/
- if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
+ up_write(&dc->writeback_lock);
break;
+ }
}
up_write(&dc->writeback_lock);
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index d3c55d7754af..905badc6cb17 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -337,7 +337,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
- disk_super->policy_hint_size = 0;
+ disk_super->policy_hint_size = cpu_to_le32(0);
__copy_sm_root(cmd, disk_super);
@@ -652,6 +652,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
+ disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size);
disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 1452ed9aacb4..54c308e6704f 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -454,6 +454,8 @@ static int run_complete_job(struct kcopyd_job *job)
if (atomic_dec_and_test(&kc->nr_jobs))
wake_up(&kc->destroyq);
+ cond_resched();
+
return 0;
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1c67966af2f2..265964e63582 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -508,14 +508,14 @@ static int adjoin(struct dm_table *table, struct dm_target *ti)
* On the other hand, dm-switch needs to process bulk data using messages and
* excessive use of GFP_NOIO could cause trouble.
*/
-static char **realloc_argv(unsigned *array_size, char **old_argv)
+static char **realloc_argv(unsigned *size, char **old_argv)
{
char **argv;
unsigned new_size;
gfp_t gfp;
- if (*array_size) {
- new_size = *array_size * 2;
+ if (*size) {
+ new_size = *size * 2;
gfp = GFP_KERNEL;
} else {
new_size = 8;
@@ -523,8 +523,8 @@ static char **realloc_argv(unsigned *array_size, char **old_argv)
}
argv = kmalloc(new_size * sizeof(*argv), gfp);
if (argv) {
- memcpy(argv, old_argv, *array_size * sizeof(*argv));
- *array_size = new_size;
+ memcpy(argv, old_argv, *size * sizeof(*argv));
+ *size = new_size;
}
kfree(old_argv);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d59b861764a1..0841d8f10a58 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4190,6 +4190,12 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
s->failed++;
if (rdev && !test_bit(Faulty, &rdev->flags))
do_recovery = 1;
+ else if (!rdev) {
+ rdev = rcu_dereference(
+ conf->disks[i].replacement);
+ if (rdev && !test_bit(Faulty, &rdev->flags))
+ do_recovery = 1;
+ }
}
}
if (test_bit(STRIPE_SYNCING, &sh->state)) {
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 76f67f9ad30c..f21157ecadae 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1360,6 +1360,11 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
struct vb2_buffer *vb;
int ret;
+ if (q->error) {
+ dprintk(1, "fatal error occurred on queue\n");
+ return -EIO;
+ }
+
vb = q->bufs[index];
switch (vb->state) {
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index c646784c5a7d..fbec711c4195 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -714,6 +714,7 @@ sm501_create_subdev(struct sm501_devdata *sm, char *name,
smdev->pdev.name = name;
smdev->pdev.id = sm->pdev_id;
smdev->pdev.dev.parent = sm->dev;
+ smdev->pdev.dev.coherent_dma_mask = 0xffffffff;
if (res_count) {
smdev->pdev.resource = (struct resource *)(smdev+1);
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index e4e4b22eebc9..4a0f076c91ba 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -224,14 +224,13 @@ static int ti_tscadc_probe(struct platform_device *pdev)
* The TSC_ADC_SS controller design assumes the OCP clock is
* at least 6x faster than the ADC clock.
*/
- clk = clk_get(&pdev->dev, "adc_tsc_fck");
+ clk = devm_clk_get(&pdev->dev, "adc_tsc_fck");
if (IS_ERR(clk)) {
dev_err(&pdev->dev, "failed to get TSC fck\n");
err = PTR_ERR(clk);
goto err_disable_clk;
}
clock_rate = clk_get_rate(clk);
- clk_put(clk);
tscadc->clk_div = clock_rate / ADC_CLK;
/* TSCADC_CLKDIV needs to be configured to the value minus 1 */
diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
index 90520d76633f..9cde4c5bfba4 100644
--- a/drivers/misc/hmc6352.c
+++ b/drivers/misc/hmc6352.c
@@ -27,6 +27,7 @@
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/sysfs.h>
+#include <linux/nospec.h>
static DEFINE_MUTEX(compass_mutex);
@@ -50,6 +51,7 @@ static int compass_store(struct device *dev, const char *buf, size_t count,
return ret;
if (val >= strlen(map))
return -EINVAL;
+ val = array_index_nospec(val, strlen(map));
mutex_lock(&compass_mutex);
ret = compass_command(c, map[val]);
mutex_unlock(&compass_mutex);
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 2a6eaf1122b4..8e06e1020ad9 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -47,11 +47,18 @@
#include <linux/vmalloc.h>
#include <linux/mman.h>
#include <asm/cacheflush.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
#ifdef CONFIG_IDE
#include <linux/ide.h>
#endif
+struct lkdtm_list {
+ struct list_head node;
+};
+
/*
* Make sure our attempts to over run the kernel stack doesn't trigger
* a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we
@@ -88,6 +95,9 @@ enum ctype {
CT_EXCEPTION,
CT_LOOP,
CT_OVERFLOW,
+ CT_CORRUPT_LIST_ADD,
+ CT_CORRUPT_LIST_DEL,
+ CT_CORRUPT_USER_DS,
CT_CORRUPT_STACK,
CT_UNALIGNED_LOAD_STORE_WRITE,
CT_OVERWRITE_ALLOCATION,
@@ -126,6 +136,9 @@ static char* cp_type[] = {
"EXCEPTION",
"LOOP",
"OVERFLOW",
+ "CORRUPT_LIST_ADD",
+ "CORRUPT_LIST_DEL",
+ "CORRUPT_USER_DS",
"CORRUPT_STACK",
"UNALIGNED_LOAD_STORE_WRITE",
"OVERWRITE_ALLOCATION",
@@ -548,6 +561,75 @@ static void lkdtm_do_action(enum ctype which)
do_overwritten();
break;
}
+ case CT_CORRUPT_LIST_ADD: {
+ /*
+ * Initially, an empty list via LIST_HEAD:
+ * test_head.next = &test_head
+ * test_head.prev = &test_head
+ */
+ LIST_HEAD(test_head);
+ struct lkdtm_list good, bad;
+ void *target[2] = { };
+ void *redirection = &target;
+
+ pr_info("attempting good list addition\n");
+
+ /*
+ * Adding to the list performs these actions:
+ * test_head.next->prev = &good.node
+ * good.node.next = test_head.next
+ * good.node.prev = test_head
+ * test_head.next = good.node
+ */
+ list_add(&good.node, &test_head);
+
+ pr_info("attempting corrupted list addition\n");
+ /*
+ * In simulating this "write what where" primitive, the "what" is
+ * the address of &bad.node, and the "where" is the address held
+ * by "redirection".
+ */
+ test_head.next = redirection;
+ list_add(&bad.node, &test_head);
+
+ if (target[0] == NULL && target[1] == NULL)
+ pr_err("Overwrite did not happen, but no BUG?!\n");
+ else
+ pr_err("list_add() corruption not detected!\n");
+ break;
+ }
+ case CT_CORRUPT_LIST_DEL: {
+ LIST_HEAD(test_head);
+ struct lkdtm_list item;
+ void *target[2] = { };
+ void *redirection = &target;
+
+ list_add(&item.node, &test_head);
+
+ pr_info("attempting good list removal\n");
+ list_del(&item.node);
+
+ pr_info("attempting corrupted list removal\n");
+ list_add(&item.node, &test_head);
+
+ /* As with the list_add() test above, this corrupts "next". */
+ item.node.next = redirection;
+ list_del(&item.node);
+
+ if (target[0] == NULL && target[1] == NULL)
+ pr_err("Overwrite did not happen, but no BUG?!\n");
+ else
+ pr_err("list_del() corruption not detected!\n");
+ break;
+ }
+ case CT_CORRUPT_USER_DS: {
+ pr_info("setting bad task size limit\n");
+ set_fs(KERNEL_DS);
+
+ /* Make sure we do not keep running with a KERNEL_DS! */
+ force_sig(SIGKILL, current);
+ break;
+ }
case CT_NONE:
default:
break;
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index bdc7fcd80eca..9dcdc6f41ceb 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -151,7 +151,7 @@ static int mei_nfc_if_version(struct mei_cl *cl,
ret = 0;
bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
- if (bytes_recv < if_version_length) {
+ if (bytes_recv < 0 || bytes_recv < if_version_length) {
dev_err(bus->dev, "Could not read IF version\n");
ret = -EIO;
goto err;
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index adab5bbb642a..d5b84d68f988 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -230,8 +230,11 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!pci_dev_run_wake(pdev))
mei_me_set_pm_domain(dev);
- if (mei_pg_is_enabled(dev))
+ if (mei_pg_is_enabled(dev)) {
pm_runtime_put_noidle(&pdev->dev);
+ if (hw->d0i3_supported)
+ pm_runtime_allow(&pdev->dev);
+ }
dev_dbg(&pdev->dev, "initialization successful.\n");
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index ddc9e4b08b5c..56efa9d18a9a 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -370,11 +370,10 @@ int scif_bind(scif_epd_t epd, u16 pn)
goto scif_bind_exit;
}
} else {
- pn = scif_get_new_port();
- if (!pn) {
- ret = -ENOSPC;
+ ret = scif_get_new_port();
+ if (ret < 0)
goto scif_bind_exit;
- }
+ pn = ret;
}
ep->state = SCIFEP_BOUND;
@@ -648,13 +647,12 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
err = -EISCONN;
break;
case SCIFEP_UNBOUND:
- ep->port.port = scif_get_new_port();
- if (!ep->port.port) {
- err = -ENOSPC;
- } else {
- ep->port.node = scif_info.nodeid;
- ep->conn_async_state = ASYNC_CONN_IDLE;
- }
+ err = scif_get_new_port();
+ if (err < 0)
+ break;
+ ep->port.port = err;
+ ep->port.node = scif_info.nodeid;
+ ep->conn_async_state = ASYNC_CONN_IDLE;
/* Fall through */
case SCIFEP_BOUND:
/*
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 71b64550b591..a1bca836e506 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -757,14 +757,14 @@ static int kim_probe(struct platform_device *pdev)
err = gpio_request(kim_gdata->nshutdown, "kim");
if (unlikely(err)) {
pr_err(" gpio %d request failed ", kim_gdata->nshutdown);
- return err;
+ goto err_sysfs_group;
}
/* Configure nShutdown GPIO as output=0 */
err = gpio_direction_output(kim_gdata->nshutdown, 0);
if (unlikely(err)) {
pr_err(" unable to configure gpio %d", kim_gdata->nshutdown);
- return err;
+ goto err_sysfs_group;
}
/* get reference of pdev for request_firmware
*/
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 5e047bfc0cc4..5e9122cd3898 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -45,6 +45,7 @@
#include <linux/seq_file.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
+#include <linux/io.h>
#include <asm/hypervisor.h>
MODULE_AUTHOR("VMware, Inc.");
@@ -341,7 +342,13 @@ static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
success = false;
}
- if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
+ /*
+ * 2MB pages are only supported with batching. If batching is for some
+ * reason disabled, do not use 2MB pages, since otherwise the legacy
+ * mechanism is used with 2MB pages, causing a failure.
+ */
+ if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
+ (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
b->supported_page_sizes = 2;
else
b->supported_page_sizes = 1;
@@ -450,7 +457,7 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
pfn32 = (u32)pfn;
if (pfn32 != pfn)
- return -1;
+ return -EINVAL;
STATS_INC(b->stats.lock[false]);
@@ -460,7 +467,7 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
STATS_INC(b->stats.lock_fail[false]);
- return 1;
+ return -EIO;
}
static int vmballoon_send_batched_lock(struct vmballoon *b,
@@ -597,11 +604,12 @@ static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
target);
- if (locked > 0) {
+ if (locked) {
STATS_INC(b->stats.refused_alloc[false]);
- if (hv_status == VMW_BALLOON_ERROR_RESET ||
- hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
+ if (locked == -EIO &&
+ (hv_status == VMW_BALLOON_ERROR_RESET ||
+ hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
vmballoon_free_page(page, false);
return -EIO;
}
@@ -617,7 +625,7 @@ static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
} else {
vmballoon_free_page(page, false);
}
- return -EIO;
+ return locked;
}
/* track allocated page */
@@ -1029,29 +1037,30 @@ static void vmballoon_vmci_cleanup(struct vmballoon *b)
*/
static int vmballoon_vmci_init(struct vmballoon *b)
{
- int error = 0;
+ unsigned long error, dummy;
- if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
- error = vmci_doorbell_create(&b->vmci_doorbell,
- VMCI_FLAG_DELAYED_CB,
- VMCI_PRIVILEGE_FLAG_RESTRICTED,
- vmballoon_doorbell, b);
-
- if (error == VMCI_SUCCESS) {
- VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
- b->vmci_doorbell.context,
- b->vmci_doorbell.resource, error);
- STATS_INC(b->stats.doorbell_set);
- }
- }
+ if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
+ return 0;
- if (error != 0) {
- vmballoon_vmci_cleanup(b);
+ error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
+ VMCI_PRIVILEGE_FLAG_RESTRICTED,
+ vmballoon_doorbell, b);
- return -EIO;
- }
+ if (error != VMCI_SUCCESS)
+ goto fail;
+
+ error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context,
+ b->vmci_doorbell.resource, dummy);
+
+ STATS_INC(b->stats.doorbell_set);
+
+ if (error != VMW_BALLOON_SUCCESS)
+ goto fail;
return 0;
+fail:
+ vmballoon_vmci_cleanup(b);
+ return -EIO;
}
/*
@@ -1289,7 +1298,14 @@ static int __init vmballoon_init(void)
return 0;
}
-module_init(vmballoon_init);
+
+/*
+ * Using late_initcall() instead of module_init() allows the balloon to use the
+ * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
+ * VMCI is probed only after the balloon is initialized. If the balloon is used
+ * as a module, late_initcall() is equivalent to module_init().
+ */
+late_initcall(vmballoon_init);
static void __exit vmballoon_exit(void)
{
diff --git a/drivers/mtd/maps/solutionengine.c b/drivers/mtd/maps/solutionengine.c
index bb580bc16445..c07f21b20463 100644
--- a/drivers/mtd/maps/solutionengine.c
+++ b/drivers/mtd/maps/solutionengine.c
@@ -59,9 +59,9 @@ static int __init init_soleng_maps(void)
return -ENXIO;
}
}
- printk(KERN_NOTICE "Solution Engine: Flash at 0x%08lx, EPROM at 0x%08lx\n",
- soleng_flash_map.phys & 0x1fffffff,
- soleng_eprom_map.phys & 0x1fffffff);
+ printk(KERN_NOTICE "Solution Engine: Flash at 0x%pap, EPROM at 0x%pap\n",
+ &soleng_flash_map.phys,
+ &soleng_eprom_map.phys);
flash_mtd->owner = THIS_MODULE;
eprom_mtd = do_map_probe("map_rom", &soleng_eprom_map);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6d19835b80a9..0d244dac1ccb 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -160,8 +160,12 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
pr_debug("MTD_read\n");
- if (*ppos + count > mtd->size)
- count = mtd->size - *ppos;
+ if (*ppos + count > mtd->size) {
+ if (*ppos < mtd->size)
+ count = mtd->size - *ppos;
+ else
+ count = 0;
+ }
if (!count)
return 0;
@@ -246,7 +250,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
pr_debug("MTD_write\n");
- if (*ppos == mtd->size)
+ if (*ppos >= mtd->size)
return -ENOSPC;
if (*ppos + count > mtd->size)
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index b3c1b8106a68..f4b3ce2b2bc3 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1597,8 +1597,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
cond_resched();
e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
- if (!e)
+ if (!e) {
+ err = -ENOMEM;
goto out_free;
+ }
e->pnum = aeb->pnum;
e->ec = aeb->ec;
@@ -1617,8 +1619,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
cond_resched();
e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
- if (!e)
+ if (!e) {
+ err = -ENOMEM;
goto out_free;
+ }
e->pnum = aeb->pnum;
e->ec = aeb->ec;
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 2e4649655181..4e98e5aff7c5 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -284,8 +284,12 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCFINDIPDDPRT:
spin_lock_bh(&ipddp_route_lock);
rp = __ipddp_find_route(&rcp);
- if (rp)
- memcpy(&rcp2, rp, sizeof(rcp2));
+ if (rp) {
+ memset(&rcp2, 0, sizeof(rcp2));
+ rcp2.ip = rp->ip;
+ rcp2.at = rp->at;
+ rcp2.flags = rp->flags;
+ }
spin_unlock_bh(&ipddp_route_lock);
if (rp) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index cef53f2d9854..ce20bc939b38 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -185,6 +185,9 @@ struct bcmgenet_mib_counters {
#define UMAC_MAC1 0x010
#define UMAC_MAX_FRAME_LEN 0x014
+#define UMAC_MODE 0x44
+#define MODE_LINK_STATUS (1 << 5)
+
#define UMAC_EEE_CTRL 0x064
#define EN_LPI_RX_PAUSE (1 << 0)
#define EN_LPI_TX_PFC (1 << 1)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e96d1f95bb47..4c73feca4842 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -167,8 +167,14 @@ void bcmgenet_mii_setup(struct net_device *dev)
static int bcmgenet_fixed_phy_link_update(struct net_device *dev,
struct fixed_phy_status *status)
{
- if (dev && dev->phydev && status)
- status->link = dev->phydev->link;
+ struct bcmgenet_priv *priv;
+ u32 reg;
+
+ if (dev && dev->phydev && status) {
+ priv = netdev_priv(dev);
+ reg = bcmgenet_umac_readl(priv, UMAC_MODE);
+ status->link = !!(reg & MODE_LINK_STATUS);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index b20bce2c7da1..0433fdebda25 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2683,7 +2683,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
enic->port_mtu = enic->config.mtu;
- (void)enic_change_mtu(netdev, enic->port_mtu);
err = enic_set_mac_addr(netdev, enic->mac_addr);
if (err) {
@@ -2732,6 +2731,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->features |= NETIF_F_HIGHDMA;
netdev->priv_flags |= IFF_UNICAST_FLT;
+ netdev->mtu = enic->port_mtu;
err = register_netdev(netdev);
if (err) {
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 3daf2d4a7ca0..884aa809baac 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -2636,7 +2636,7 @@ static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin)
/* Wait for link to drop */
time = jiffies + (HZ / 10);
do {
- if (~(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
+ if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
break;
if (!in_interrupt())
schedule_timeout_interruptible(1);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index ea693bbf56d8..1c300259d70a 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2569,7 +2569,6 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
}
mvneta_start_dev(pp);
- mvneta_port_up(pp);
netdev_update_features(dev);
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index b28e73ea2c25..f39ad0e66637 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2388,26 +2388,20 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev,
return status;
}
-static netdev_features_t qlge_fix_features(struct net_device *ndev,
- netdev_features_t features)
-{
- int err;
-
- /* Update the behavior of vlan accel in the adapter */
- err = qlge_update_hw_vlan_features(ndev, features);
- if (err)
- return err;
-
- return features;
-}
-
static int qlge_set_features(struct net_device *ndev,
netdev_features_t features)
{
netdev_features_t changed = ndev->features ^ features;
+ int err;
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
+ /* Update the behavior of vlan accel in the adapter */
+ err = qlge_update_hw_vlan_features(ndev, features);
+ if (err)
+ return err;
- if (changed & NETIF_F_HW_VLAN_CTAG_RX)
qlge_vlan_mode(ndev, features);
+ }
return 0;
}
@@ -4720,7 +4714,6 @@ static const struct net_device_ops qlge_netdev_ops = {
.ndo_set_mac_address = qlge_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = qlge_tx_timeout,
- .ndo_fix_features = qlge_fix_features,
.ndo_set_features = qlge_set_features,
.ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid,
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c69b0bdd891d..c1217a87d535 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -371,7 +371,6 @@ struct cpsw_priv {
spinlock_t lock;
struct platform_device *pdev;
struct net_device *ndev;
- struct device_node *phy_node;
struct napi_struct napi_rx;
struct napi_struct napi_tx;
struct device *dev;
@@ -1165,25 +1164,34 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
- if (priv->phy_node)
- slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+ if (slave->data->phy_node) {
+ slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
&cpsw_adjust_link, 0, slave->data->phy_if);
- else
+ if (!slave->phy) {
+ dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
+ slave->data->phy_node->full_name,
+ slave->slave_num);
+ return;
+ }
+ } else {
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
&cpsw_adjust_link, slave->data->phy_if);
- if (IS_ERR(slave->phy)) {
- dev_err(priv->dev, "phy %s not found on slave %d\n",
- slave->data->phy_id, slave->slave_num);
- slave->phy = NULL;
- } else {
- dev_info(priv->dev, "phy found : id is : 0x%x\n",
- slave->phy->phy_id);
- phy_start(slave->phy);
-
- /* Configure GMII_SEL register */
- cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface,
- slave->slave_num);
+ if (IS_ERR(slave->phy)) {
+ dev_err(priv->dev,
+ "phy \"%s\" not found on slave %d, err %ld\n",
+ slave->data->phy_id, slave->slave_num,
+ PTR_ERR(slave->phy));
+ slave->phy = NULL;
+ return;
+ }
}
+
+ dev_info(priv->dev, "phy found : id is : 0x%x\n", slave->phy->phy_id);
+
+ phy_start(slave->phy);
+
+ /* Configure GMII_SEL register */
+ cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num);
}
static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
@@ -1957,12 +1965,11 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
slave->port_vlan = data->dual_emac_res_vlan;
}
-static int cpsw_probe_dt(struct cpsw_priv *priv,
+static int cpsw_probe_dt(struct cpsw_platform_data *data,
struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct device_node *slave_node;
- struct cpsw_platform_data *data = &priv->data;
int i = 0, ret;
u32 prop;
@@ -2050,7 +2057,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
if (strcmp(slave_node->name, "slave"))
continue;
- priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
+ slave_data->phy_node = of_parse_phandle(slave_node,
+ "phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
if (of_phy_is_fixed_link(slave_node)) {
struct device_node *phy_node;
@@ -2087,6 +2095,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
}
snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
PHY_ID_FMT, mdio->name, phyid);
+ put_device(&mdio->dev);
} else {
dev_err(&pdev->dev, "No slave[%d] phy_id or fixed-link property\n", i);
goto no_phy_slave;
@@ -2291,7 +2300,7 @@ static int cpsw_probe(struct platform_device *pdev)
/* Select default pin state */
pinctrl_pm_select_default_state(&pdev->dev);
- if (cpsw_probe_dt(priv, pdev)) {
+ if (cpsw_probe_dt(&priv->data, pdev)) {
dev_err(&pdev->dev, "cpsw: platform data missing\n");
ret = -ENODEV;
goto clean_runtime_disable_ret;
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index 442a7038e660..e50afd1b2eda 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -18,6 +18,7 @@
#include <linux/phy.h>
struct cpsw_slave_data {
+ struct device_node *phy_node;
char phy_id[MII_BUS_ID_SIZE];
int phy_if;
u8 mac_addr[ETH_ALEN];
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 6be315303d61..8ecb24186b7f 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2108,6 +2108,7 @@ static int davinci_emac_remove(struct platform_device *pdev)
cpdma_ctlr_destroy(priv->dma);
unregister_netdev(ndev);
+ of_node_put(priv->phy_node);
free_netdev(ndev);
return 0;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index acec4b565511..1aede726052c 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -902,6 +902,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv);
netif_carrier_on(dev->net);
+
+ tasklet_schedule(&dev->bh);
}
return ret;
@@ -1361,8 +1363,6 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
netif_dbg(dev, ifup, dev->net,
"MAC address set to random addr");
}
-
- tasklet_schedule(&dev->bh);
}
ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 916b9b12edd2..4644357d291a 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2901,6 +2901,13 @@ static int ath10k_update_channel_list(struct ath10k *ar)
passive = channel->flags & IEEE80211_CHAN_NO_IR;
ch->passive = passive;
+ /* the firmware is ignoring the "radar" flag of the
+ * channel and is scanning actively using Probe Requests
+ * on "Radar detection"/DFS channels which are not
+ * marked as "available"
+ */
+ ch->passive |= ch->chan_radar;
+
ch->freq = channel->center_freq;
ch->band_center_freq1 = channel->center_freq;
ch->min_power = 0;
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 02eea3c3b5d3..c72eb4464de9 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -1424,6 +1424,11 @@ static struct sk_buff *ath10k_wmi_tlv_op_gen_init(struct ath10k *ar)
cfg->keep_alive_pattern_size = __cpu_to_le32(0);
cfg->max_tdls_concurrent_sleep_sta = __cpu_to_le32(1);
cfg->max_tdls_concurrent_buffer_sta = __cpu_to_le32(1);
+ cfg->wmi_send_separate = __cpu_to_le32(0);
+ cfg->num_ocb_vdevs = __cpu_to_le32(0);
+ cfg->num_ocb_channels = __cpu_to_le32(0);
+ cfg->num_ocb_schedules = __cpu_to_le32(0);
+ cfg->host_capab = __cpu_to_le32(0);
ath10k_wmi_put_host_mem_chunks(ar, chunks);
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index ad655c44afdb..f5031f3965c5 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -1209,6 +1209,11 @@ struct wmi_tlv_resource_config {
__le32 keep_alive_pattern_size;
__le32 max_tdls_concurrent_sleep_sta;
__le32 max_tdls_concurrent_buffer_sta;
+ __le32 wmi_send_separate;
+ __le32 num_ocb_vdevs;
+ __le32 num_ocb_channels;
+ __le32 num_ocb_schedules;
+ __le32 host_capab;
} __packed;
struct wmi_tlv_init_cmd {
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 68d0a5c9d437..0a4bd73caae5 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -86,8 +86,7 @@ struct netfront_cb {
/* IRQ name is queue name with "-tx" or "-rx" appended */
#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
-static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
-static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
+static DECLARE_WAIT_QUEUE_HEAD(module_wq);
struct netfront_stats {
u64 packets;
@@ -893,7 +892,11 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
BUG_ON(pull_to <= skb_headlen(skb));
__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
}
- BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
+ if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
+ queue->rx.rsp_cons = ++cons;
+ kfree_skb(nskb);
+ return ~0U;
+ }
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(nfrag),
@@ -1030,6 +1033,8 @@ err:
skb->len += rx->status;
i = xennet_fill_frags(queue, skb, &tmpq);
+ if (unlikely(i == ~0U))
+ goto err;
if (rx->flags & XEN_NETRXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1336,11 +1341,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
netif_carrier_off(netdev);
xenbus_switch_state(dev, XenbusStateInitialising);
- wait_event(module_load_q,
- xenbus_read_driver_state(dev->otherend) !=
- XenbusStateClosed &&
- xenbus_read_driver_state(dev->otherend) !=
- XenbusStateUnknown);
+ wait_event(module_wq,
+ xenbus_read_driver_state(dev->otherend) !=
+ XenbusStateClosed &&
+ xenbus_read_driver_state(dev->otherend) !=
+ XenbusStateUnknown);
return netdev;
exit:
@@ -1608,6 +1613,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
{
unsigned short i;
int err = 0;
+ char *devid;
spin_lock_init(&queue->tx_lock);
spin_lock_init(&queue->rx_lock);
@@ -1615,8 +1621,9 @@ static int xennet_init_queue(struct netfront_queue *queue)
setup_timer(&queue->rx_refill_timer, rx_refill_timeout,
(unsigned long)queue);
- snprintf(queue->name, sizeof(queue->name), "%s-q%u",
- queue->info->netdev->name, queue->id);
+ devid = strrchr(queue->info->xbdev->nodename, '/') + 1;
+ snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
+ devid, queue->id);
/* Initialise tx_skbs as a free chain containing every entry. */
queue->tx_skb_freelist = 0;
@@ -2023,15 +2030,14 @@ static void netback_changed(struct xenbus_device *dev,
dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
+ wake_up_all(&module_wq);
+
switch (backend_state) {
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
- break;
-
case XenbusStateUnknown:
- wake_up_all(&module_unload_q);
break;
case XenbusStateInitWait:
@@ -2047,12 +2053,10 @@ static void netback_changed(struct xenbus_device *dev,
break;
case XenbusStateClosed:
- wake_up_all(&module_unload_q);
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's CLOSING state -- fallthrough */
case XenbusStateClosing:
- wake_up_all(&module_unload_q);
xenbus_frontend_closed(dev);
break;
}
@@ -2160,14 +2164,14 @@ static int xennet_remove(struct xenbus_device *dev)
if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) {
xenbus_switch_state(dev, XenbusStateClosing);
- wait_event(module_unload_q,
+ wait_event(module_wq,
xenbus_read_driver_state(dev->otherend) ==
XenbusStateClosing ||
xenbus_read_driver_state(dev->otherend) ==
XenbusStateUnknown);
xenbus_switch_state(dev, XenbusStateClosed);
- wait_event(module_unload_q,
+ wait_event(module_wq,
xenbus_read_driver_state(dev->otherend) ==
XenbusStateClosed ||
xenbus_read_driver_state(dev->otherend) ==
diff --git a/drivers/parport/parport_sunbpp.c b/drivers/parport/parport_sunbpp.c
index 01cf1c1a841a..8de329546b82 100644
--- a/drivers/parport/parport_sunbpp.c
+++ b/drivers/parport/parport_sunbpp.c
@@ -286,12 +286,16 @@ static int bpp_probe(struct platform_device *op)
ops = kmemdup(&parport_sunbpp_ops, sizeof(struct parport_operations),
GFP_KERNEL);
- if (!ops)
+ if (!ops) {
+ err = -ENOMEM;
goto out_unmap;
+ }
dprintk(("register_port\n"));
- if (!(p = parport_register_port((unsigned long)base, irq, dma, ops)))
+ if (!(p = parport_register_port((unsigned long)base, irq, dma, ops))) {
+ err = -ENOMEM;
goto out_free_ops;
+ }
p->size = size;
p->dev = &op->dev;
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 379d08f76146..d0a4652bb9ac 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -1235,7 +1235,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
pcie->realio.start = PCIBIOS_MIN_IO;
pcie->realio.end = min_t(resource_size_t,
IO_SPACE_LIMIT,
- resource_size(&pcie->io));
+ resource_size(&pcie->io) - 1);
} else
pcie->realio = pcie->io;
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index 8b50f4840f0b..3a5053093bfb 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -291,31 +291,47 @@ static int pmic_gpio_config_get(struct pinctrl_dev *pctldev,
switch (param) {
case PIN_CONFIG_DRIVE_PUSH_PULL:
- arg = pad->buffer_type == PMIC_GPIO_OUT_BUF_CMOS;
+ if (pad->buffer_type != PMIC_GPIO_OUT_BUF_CMOS)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_DRIVE_OPEN_DRAIN:
- arg = pad->buffer_type == PMIC_GPIO_OUT_BUF_OPEN_DRAIN_NMOS;
+ if (pad->buffer_type != PMIC_GPIO_OUT_BUF_OPEN_DRAIN_NMOS)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_DRIVE_OPEN_SOURCE:
- arg = pad->buffer_type == PMIC_GPIO_OUT_BUF_OPEN_DRAIN_PMOS;
+ if (pad->buffer_type != PMIC_GPIO_OUT_BUF_OPEN_DRAIN_PMOS)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_BIAS_PULL_DOWN:
- arg = pad->pullup == PMIC_GPIO_PULL_DOWN;
+ if (pad->pullup != PMIC_GPIO_PULL_DOWN)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_BIAS_DISABLE:
- arg = pad->pullup = PMIC_GPIO_PULL_DISABLE;
+ if (pad->pullup != PMIC_GPIO_PULL_DISABLE)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_BIAS_PULL_UP:
- arg = pad->pullup == PMIC_GPIO_PULL_UP_30;
+ if (pad->pullup != PMIC_GPIO_PULL_UP_30)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_BIAS_HIGH_IMPEDANCE:
- arg = !pad->is_enabled;
+ if (pad->is_enabled)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_POWER_SOURCE:
arg = pad->power_source;
break;
case PIN_CONFIG_INPUT_ENABLE:
- arg = pad->input_enabled;
+ if (!pad->input_enabled)
+ return -EINVAL;
+ arg = 1;
break;
case PIN_CONFIG_OUTPUT:
arg = pad->out_value;
diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 1e1e59423889..3df47c1b04ec 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c
@@ -463,6 +463,7 @@ static acpi_status alienware_hdmi_command(struct hdmi_args *in_args,
if (obj && obj->type == ACPI_TYPE_INTEGER)
*out_data = (u32) obj->integer.value;
}
+ kfree(output.pointer);
return status;
}
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 0e0403e024c5..852d2de7f69f 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -392,6 +392,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
{ KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
{ KE_IGNORE, 0xC6, }, /* Ambient Light Sensor notification */
+ { KE_KEY, 0xFA, { KEY_PROG2 } }, /* Lid flip action */
{ KE_END, 0},
};
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index f774cb576ffa..1ff95b5a429d 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -34,6 +34,7 @@
#define TOSHIBA_ACPI_VERSION "0.23"
#define PROC_INTERFACE_VERSION 1
+#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -1472,7 +1473,7 @@ static const struct file_operations keys_proc_fops = {
.write = keys_proc_write,
};
-static int version_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused version_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "driver: %s\n", TOSHIBA_ACPI_VERSION);
seq_printf(m, "proc_interface: %d\n", PROC_INTERFACE_VERSION);
diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index 6a41e66015b6..062dff1c902d 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c
@@ -384,6 +384,8 @@ static void ehrpwm_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
aqcsfrc_mask = AQCSFRC_CSFA_MASK;
}
+ /* Update shadow register first before modifying active register */
+ ehrpwm_modify(pc->mmio_base, AQCSFRC, aqcsfrc_mask, aqcsfrc_val);
/*
* Changes to immediate action on Action Qualifier. This puts
* Action Qualifier control on PWM output from next TBCLK
diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c
index bd170cb3361c..5747a54cbd42 100644
--- a/drivers/rtc/rtc-bq4802.c
+++ b/drivers/rtc/rtc-bq4802.c
@@ -164,6 +164,10 @@ static int bq4802_probe(struct platform_device *pdev)
} else if (p->r->flags & IORESOURCE_MEM) {
p->regs = devm_ioremap(&pdev->dev, p->r->start,
resource_size(p->r));
+ if (!p->regs){
+ err = -ENOMEM;
+ goto out;
+ }
p->read = bq4802_read_mem;
p->write = bq4802_write_mem;
} else {
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 21d174e9ebdb..dac2f6883e28 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2101,8 +2101,11 @@ static int dasd_eckd_basic_to_ready(struct dasd_device *device)
static int dasd_eckd_online_to_ready(struct dasd_device *device)
{
- cancel_work_sync(&device->reload_device);
- cancel_work_sync(&device->kick_validate);
+ if (cancel_work_sync(&device->reload_device))
+ dasd_put_device(device);
+ if (cancel_work_sync(&device->kick_validate))
+ dasd_put_device(device);
+
return 0;
};
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 95c631125a20..09ac56317f1b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3505,13 +3505,14 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
if (atomic_read(&queue->set_pci_flags_count))
qdio_flags |= QDIO_FLAG_PCI_OUT;
+ atomic_add(count, &queue->used_buffers);
+
rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
queue->queue_no, index, count);
if (queue->card->options.performance_stats)
queue->card->perf_stats.outbound_do_qdio_time +=
qeth_get_micros() -
queue->card->perf_stats.outbound_do_qdio_start_time;
- atomic_add(count, &queue->used_buffers);
if (rc) {
queue->card->stats.tx_errors += count;
/* ignore temporary SIGA errors without busy condition */
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index fa844b0ff847..7bcf0dae3a65 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -419,6 +419,7 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
if (card->discipline) {
card->discipline->remove(card->gdev);
qeth_core_free_discipline(card);
+ card->options.layer2 = -1;
}
rc = qeth_core_load_discipline(card, newdis);
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 5466246c69b4..b78a2f3745f2 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -2045,6 +2045,7 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
if (twa_initialize_device_extension(tw_dev)) {
TW_PRINTK(tw_dev->host, TW_DRIVER, 0x25, "Failed to initialize device extension");
+ retval = -ENOMEM;
goto out_free_device_extension;
}
@@ -2067,6 +2068,7 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
tw_dev->base_addr = ioremap(mem_addr, mem_len);
if (!tw_dev->base_addr) {
TW_PRINTK(tw_dev->host, TW_DRIVER, 0x35, "Failed to ioremap");
+ retval = -ENOMEM;
goto out_release_mem_region;
}
@@ -2074,8 +2076,10 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
TW_DISABLE_INTERRUPTS(tw_dev);
/* Initialize the card */
- if (twa_reset_sequence(tw_dev, 0))
+ if (twa_reset_sequence(tw_dev, 0)) {
+ retval = -ENOMEM;
goto out_iounmap;
+ }
/* Set host specific parameters */
if ((pdev->device == PCI_DEVICE_ID_3WARE_9650SE) ||
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index f8374850f714..f0a5536a9ff5 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -1600,6 +1600,7 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
if (twl_initialize_device_extension(tw_dev)) {
TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1a, "Failed to initialize device extension");
+ retval = -ENOMEM;
goto out_free_device_extension;
}
@@ -1614,6 +1615,7 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
tw_dev->base_addr = pci_iomap(pdev, 1, 0);
if (!tw_dev->base_addr) {
TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to ioremap");
+ retval = -ENOMEM;
goto out_release_mem_region;
}
@@ -1623,6 +1625,7 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
/* Initialize the card */
if (twl_reset_sequence(tw_dev, 0)) {
TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1d, "Controller reset failed during probe");
+ retval = -ENOMEM;
goto out_iounmap;
}
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 14af38036287..308a4206b636 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -2278,6 +2278,7 @@ static int tw_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
if (tw_initialize_device_extension(tw_dev)) {
printk(KERN_WARNING "3w-xxxx: Failed to initialize device extension.");
+ retval = -ENOMEM;
goto out_free_device_extension;
}
@@ -2292,6 +2293,7 @@ static int tw_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
tw_dev->base_addr = pci_resource_start(pdev, 0);
if (!tw_dev->base_addr) {
printk(KERN_WARNING "3w-xxxx: Failed to get io address.");
+ retval = -ENOMEM;
goto out_release_mem_region;
}
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 662b2321d1b0..913ebb6d0d29 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -1031,8 +1031,10 @@ static int __init aic94xx_init(void)
aic94xx_transport_template =
sas_domain_attach_transport(&aic94xx_transport_functions);
- if (!aic94xx_transport_template)
+ if (!aic94xx_transport_template) {
+ err = -ENOMEM;
goto out_destroy_caches;
+ }
err = pci_register_driver(&aic94xx_pci_driver);
if (err)
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index c872a2e54c4b..2603bee2ce07 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -220,7 +220,7 @@ static void davinci_spi_chipselect(struct spi_device *spi, int value)
pdata = &dspi->pdata;
/* program delay transfers if tx_delay is non zero */
- if (spicfg->wdelay)
+ if (spicfg && spicfg->wdelay)
spidat1 |= SPIDAT1_WDEL;
/*
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 956fda17c2ae..700170e366a9 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -458,18 +458,6 @@ static struct ion_handle *ion_handle_get_by_id_nolock(struct ion_client *client,
return ERR_PTR(-EINVAL);
}
-struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
- int id)
-{
- struct ion_handle *handle;
-
- mutex_lock(&client->lock);
- handle = ion_handle_get_by_id_nolock(client, id);
- mutex_unlock(&client->lock);
-
- return handle;
-}
-
static bool ion_handle_validate(struct ion_client *client,
struct ion_handle *handle)
{
@@ -1466,24 +1454,28 @@ static struct dma_buf_ops dma_buf_ops = {
.vunmap = ion_dma_buf_vunmap,
};
-struct dma_buf *ion_share_dma_buf(struct ion_client *client,
- struct ion_handle *handle)
+static struct dma_buf *__ion_share_dma_buf(struct ion_client *client,
+ struct ion_handle *handle,
+ bool lock_client)
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct ion_buffer *buffer;
struct dma_buf *dmabuf;
bool valid_handle;
- mutex_lock(&client->lock);
+ if (lock_client)
+ mutex_lock(&client->lock);
valid_handle = ion_handle_validate(client, handle);
if (!valid_handle) {
WARN(1, "%s: invalid handle passed to share.\n", __func__);
- mutex_unlock(&client->lock);
+ if (lock_client)
+ mutex_unlock(&client->lock);
return ERR_PTR(-EINVAL);
}
buffer = handle->buffer;
ion_buffer_get(buffer);
- mutex_unlock(&client->lock);
+ if (lock_client)
+ mutex_unlock(&client->lock);
exp_info.ops = &dma_buf_ops;
exp_info.size = buffer->size;
@@ -1498,14 +1490,21 @@ struct dma_buf *ion_share_dma_buf(struct ion_client *client,
return dmabuf;
}
+
+struct dma_buf *ion_share_dma_buf(struct ion_client *client,
+ struct ion_handle *handle)
+{
+ return __ion_share_dma_buf(client, handle, true);
+}
EXPORT_SYMBOL(ion_share_dma_buf);
-int ion_share_dma_buf_fd(struct ion_client *client, struct ion_handle *handle)
+static int __ion_share_dma_buf_fd(struct ion_client *client,
+ struct ion_handle *handle, bool lock_client)
{
struct dma_buf *dmabuf;
int fd;
- dmabuf = ion_share_dma_buf(client, handle);
+ dmabuf = __ion_share_dma_buf(client, handle, lock_client);
if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
@@ -1515,8 +1514,19 @@ int ion_share_dma_buf_fd(struct ion_client *client, struct ion_handle *handle)
return fd;
}
+
+int ion_share_dma_buf_fd(struct ion_client *client, struct ion_handle *handle)
+{
+ return __ion_share_dma_buf_fd(client, handle, true);
+}
EXPORT_SYMBOL(ion_share_dma_buf_fd);
+static int ion_share_dma_buf_fd_nolock(struct ion_client *client,
+ struct ion_handle *handle)
+{
+ return __ion_share_dma_buf_fd(client, handle, false);
+}
+
struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd)
{
struct dma_buf *dmabuf;
@@ -1665,11 +1675,15 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct ion_handle *handle;
- handle = ion_handle_get_by_id(client, data.handle.handle);
- if (IS_ERR(handle))
+ mutex_lock(&client->lock);
+ handle = ion_handle_get_by_id_nolock(client, data.handle.handle);
+ if (IS_ERR(handle)) {
+ mutex_unlock(&client->lock);
return PTR_ERR(handle);
- data.fd.fd = ion_share_dma_buf_fd(client, handle);
- ion_handle_put(handle);
+ }
+ data.fd.fd = ion_share_dma_buf_fd_nolock(client, handle);
+ ion_handle_put_nolock(handle);
+ mutex_unlock(&client->lock);
if (data.fd.fd < 0)
ret = data.fd.fd;
break;
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index 8f181caffca3..619c989c5f37 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -5275,11 +5275,11 @@ static int ni_E_init(struct comedi_device *dev,
/* Digital I/O (PFI) subdevice */
s = &dev->subdevices[NI_PFI_DIO_SUBDEV];
s->type = COMEDI_SUBD_DIO;
- s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_INTERNAL;
s->maxdata = 1;
if (devpriv->is_m_series) {
s->n_chan = 16;
s->insn_bits = ni_pfi_insn_bits;
+ s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_INTERNAL;
ni_writew(dev, s->state, NI_M_PFI_DO_REG);
for (i = 0; i < NUM_PFI_OUTPUT_SELECT_REGS; ++i) {
@@ -5288,6 +5288,7 @@ static int ni_E_init(struct comedi_device *dev,
}
} else {
s->n_chan = 10;
+ s->subdev_flags = SDF_INTERNAL;
}
s->insn_config = ni_pfi_insn_config;
diff --git a/drivers/staging/rts5208/rtsx_scsi.c b/drivers/staging/rts5208/rtsx_scsi.c
index 12a3893b98fd..ade29c4295b7 100644
--- a/drivers/staging/rts5208/rtsx_scsi.c
+++ b/drivers/staging/rts5208/rtsx_scsi.c
@@ -536,7 +536,7 @@ static int inquiry(struct scsi_cmnd *srb, struct rtsx_chip *chip)
if (sendbytes > 8) {
memcpy(buf, inquiry_buf, 8);
- memcpy(buf + 8, inquiry_string, sendbytes - 8);
+ strncpy(buf + 8, inquiry_string, sendbytes - 8);
if (pro_formatter_flag) {
/* Additional Length */
buf[4] = 0x33;
diff --git a/drivers/staging/rts5208/xd.c b/drivers/staging/rts5208/xd.c
index 10fea7bb8f30..3db4a2570b19 100644
--- a/drivers/staging/rts5208/xd.c
+++ b/drivers/staging/rts5208/xd.c
@@ -1252,7 +1252,7 @@ static int xd_copy_page(struct rtsx_chip *chip, u32 old_blk, u32 new_blk,
reg = 0;
rtsx_read_register(chip, XD_CTL, &reg);
if (reg & (XD_ECC1_ERROR | XD_ECC2_ERROR)) {
- wait_timeout(100);
+ mdelay(100);
if (detect_card_cd(chip,
XD_CARD) != STATUS_SUCCESS) {
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index 47e249dccb5f..b380bc7ee10a 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -26,18 +26,6 @@
#include "iscsi_target_nego.h"
#include "iscsi_target_auth.h"
-static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len)
-{
- int j = DIV_ROUND_UP(len, 2), rc;
-
- rc = hex2bin(dst, src, j);
- if (rc < 0)
- pr_debug("CHAP string contains non hex digit symbols\n");
-
- dst[j] = '\0';
- return j;
-}
-
static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
{
int i;
@@ -241,9 +229,16 @@ static int chap_server_compute_md5(
pr_err("Could not find CHAP_R.\n");
goto out;
}
+ if (strlen(chap_r) != MD5_SIGNATURE_SIZE * 2) {
+ pr_err("Malformed CHAP_R\n");
+ goto out;
+ }
+ if (hex2bin(client_digest, chap_r, MD5_SIGNATURE_SIZE) < 0) {
+ pr_err("Malformed CHAP_R\n");
+ goto out;
+ }
pr_debug("[server] Got CHAP_R=%s\n", chap_r);
- chap_string_to_hex(client_digest, chap_r, strlen(chap_r));
tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm)) {
@@ -348,9 +343,7 @@ static int chap_server_compute_md5(
pr_err("Could not find CHAP_C.\n");
goto out;
}
- pr_debug("[server] Got CHAP_C=%s\n", challenge);
- challenge_len = chap_string_to_hex(challenge_binhex, challenge,
- strlen(challenge));
+ challenge_len = DIV_ROUND_UP(strlen(challenge), 2);
if (!challenge_len) {
pr_err("Unable to convert incoming challenge\n");
goto out;
@@ -359,6 +352,11 @@ static int chap_server_compute_md5(
pr_err("CHAP_C exceeds maximum binary size of 1024 bytes\n");
goto out;
}
+ if (hex2bin(challenge_binhex, challenge, challenge_len) < 0) {
+ pr_err("Malformed CHAP_C\n");
+ goto out;
+ }
+ pr_debug("[server] Got CHAP_C=%s\n", challenge);
/*
* During mutual authentication, the CHAP_C generated by the
* initiator must not match the original CHAP_C generated by
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 21f888ac550e..7199bac67333 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -306,6 +306,7 @@ void __transport_register_session(
{
const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo;
unsigned char buf[PR_REG_ISID_LEN];
+ unsigned long flags;
se_sess->se_tpg = se_tpg;
se_sess->fabric_sess_ptr = fabric_sess_ptr;
@@ -342,7 +343,7 @@ void __transport_register_session(
se_sess->sess_bin_isid = get_unaligned_be64(&buf[0]);
}
- spin_lock_irq(&se_nacl->nacl_sess_lock);
+ spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags);
/*
* The se_nacl->nacl_sess pointer will be set to the
* last active I_T Nexus for each struct se_node_acl.
@@ -351,7 +352,7 @@ void __transport_register_session(
list_add_tail(&se_sess->sess_acl_list,
&se_nacl->acl_sess_list);
- spin_unlock_irq(&se_nacl->nacl_sess_lock);
+ spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags);
}
list_add_tail(&se_sess->sess_list, &se_tpg->tpg_sess_list);
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 802eac7e561b..2b8f2e0a4224 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -1915,7 +1915,7 @@ static __init int register_PCI(int i, struct pci_dev *dev)
ByteIO_t UPCIRingInd = 0;
if (!dev || !pci_match_id(rocket_pci_ids, dev) ||
- pci_enable_device(dev))
+ pci_enable_device(dev) || i >= NUM_BOARDS)
return 0;
rcktpt_io_addr[i] = pci_resource_start(dev, 0);
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 97d5a74558a3..a86bc7afb3b2 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -31,6 +31,8 @@
#include <asm/io.h>
#include <asm/uaccess.h>
+#include <linux/nospec.h>
+
#include <linux/kbd_kern.h>
#include <linux/vt_kern.h>
#include <linux/kbd_diacr.h>
@@ -703,6 +705,8 @@ int vt_ioctl(struct tty_struct *tty,
if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
ret = -ENXIO;
else {
+ vsa.console = array_index_nospec(vsa.console,
+ MAX_NR_CONSOLES + 1);
vsa.console--;
console_lock();
ret = vc_allocate(vsa.console);
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index bcc1fc027311..b9823eb9c195 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -833,8 +833,6 @@ int __uio_register_device(struct module *owner,
if (ret)
goto err_uio_dev_add_attributes;
- info->uio_dev = idev;
-
if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
/*
* Note that we deliberately don't use devm_request_irq
@@ -850,6 +848,7 @@ int __uio_register_device(struct module *owner,
goto err_request_irq;
}
+ info->uio_dev = idev;
return 0;
err_request_irq:
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 61ea87917433..4380e4f600ab 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -453,7 +453,7 @@ static int clear_wdm_read_flag(struct wdm_device *desc)
set_bit(WDM_RESPONDING, &desc->flags);
spin_unlock_irq(&desc->iuspin);
- rv = usb_submit_urb(desc->response, GFP_KERNEL);
+ rv = usb_submit_urb(desc->response, GFP_ATOMIC);
spin_lock_irq(&desc->iuspin);
if (rv) {
dev_err(&desc->intf->dev,
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 40378487e023..a5e3e410db4e 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -529,8 +529,6 @@ static int resume_common(struct device *dev, int event)
event == PM_EVENT_RESTORE);
if (retval) {
dev_err(dev, "PCI post-resume error %d!\n", retval);
- if (hcd->shared_hcd)
- usb_hc_died(hcd->shared_hcd);
usb_hc_died(hcd);
}
}
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index dfab6c1e1c18..588ef3accfd3 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1303,6 +1303,11 @@ void usb_enable_interface(struct usb_device *dev,
* is submitted that needs that bandwidth. Some other operating systems
* allocate bandwidth early, when a configuration is chosen.
*
+ * xHCI reserves bandwidth and configures the alternate setting in
+ * usb_hcd_alloc_bandwidth(). If it fails the original interface altsetting
+ * may be disabled. Drivers cannot rely on any particular alternate
+ * setting being in effect after a failure.
+ *
* This call is synchronous, and may not be used in an interrupt context.
* Also, drivers must not change altsettings while urbs are scheduled for
* endpoints in that interface; all such urbs must first be completed
@@ -1338,6 +1343,12 @@ int usb_set_interface(struct usb_device *dev, int interface, int alternate)
alternate);
return -EINVAL;
}
+ /*
+ * usb3 hosts configure the interface in usb_hcd_alloc_bandwidth,
+ * including freeing dropped endpoint ring buffers.
+ * Make sure the interface endpoints are flushed before that
+ */
+ usb_disable_interface(dev, iface, false);
/* Make sure we have enough bandwidth for this alternate interface.
* Remove the current alt setting and add the new alt setting.
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 46169cc94b36..0c5704bda73d 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -37,6 +37,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* CBM - Flash disk */
{ USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME },
+ /* WORLDE Controller KS49 or Prodipe MIDI 49C USB controller */
+ { USB_DEVICE(0x0218, 0x0201), .driver_info =
+ USB_QUIRK_CONFIG_INTF_STRINGS },
+
/* WORLDE easy key (easykey.25) MIDI controller */
{ USB_DEVICE(0x0218, 0x0401), .driver_info =
USB_QUIRK_CONFIG_INTF_STRINGS },
@@ -270,6 +274,9 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x2040, 0x7200), .driver_info =
USB_QUIRK_CONFIG_INTF_STRINGS },
+ /* DJI CineSSD */
+ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
+
/* INTEL VALUE SSD */
{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index a47de8c31ce9..8efeadf30b4d 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1542,11 +1542,14 @@ static int net2280_pullup(struct usb_gadget *_gadget, int is_on)
writel(tmp | BIT(USB_DETECT_ENABLE), &dev->usb->usbctl);
} else {
writel(tmp & ~BIT(USB_DETECT_ENABLE), &dev->usb->usbctl);
- stop_activity(dev, dev->driver);
+ stop_activity(dev, NULL);
}
spin_unlock_irqrestore(&dev->lock, flags);
+ if (!is_on && dev->driver)
+ dev->driver->disconnect(&dev->gadget);
+
return 0;
}
@@ -2425,8 +2428,11 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver)
nuke(&dev->ep[i]);
/* report disconnect; the driver is already quiesced */
- if (driver)
+ if (driver) {
+ spin_unlock(&dev->lock);
driver->disconnect(&dev->gadget);
+ spin_lock(&dev->lock);
+ }
usb_reinit(dev);
}
@@ -3272,6 +3278,8 @@ next_endpoints:
BIT(PCI_RETRY_ABORT_INTERRUPT))
static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
+__releases(dev->lock)
+__acquires(dev->lock)
{
struct net2280_ep *ep;
u32 tmp, num, mask, scratch;
@@ -3312,12 +3320,14 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
if (disconnect || reset) {
stop_activity(dev, dev->driver);
ep0_start(dev);
+ spin_unlock(&dev->lock);
if (reset)
usb_gadget_udc_reset
(&dev->gadget, dev->driver);
else
(dev->driver->disconnect)
(&dev->gadget);
+ spin_lock(&dev->lock);
return;
}
}
@@ -3336,6 +3346,7 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
tmp = BIT(SUSPEND_REQUEST_CHANGE_INTERRUPT);
if (stat & tmp) {
writel(tmp, &dev->regs->irqstat1);
+ spin_unlock(&dev->lock);
if (stat & BIT(SUSPEND_REQUEST_INTERRUPT)) {
if (dev->driver->suspend)
dev->driver->suspend(&dev->gadget);
@@ -3346,6 +3357,7 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
dev->driver->resume(&dev->gadget);
/* at high speed, note erratum 0133 */
}
+ spin_lock(&dev->lock);
stat &= ~tmp;
}
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 692ccc69345e..d5434e7a3b2e 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2565,7 +2565,7 @@ static int u132_get_frame(struct usb_hcd *hcd)
} else {
int frame = 0;
dev_err(&u132->platform_dev->dev, "TODO: u132_get_frame\n");
- msleep(100);
+ mdelay(100);
return frame;
}
}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 1ec0c5d89c43..849a20ad8e17 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3686,6 +3686,9 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
}
spin_lock_irqsave(&xhci->lock, flags);
+
+ virt_dev->udev = NULL;
+
/* Don't disable the slot if the host controller is dead. */
state = readl(&xhci->op_regs->status);
if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING) ||
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 442b6631162e..3d750671b85a 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -388,7 +388,7 @@ static unsigned char parport_uss720_frob_control(struct parport *pp, unsigned ch
mask &= 0x0f;
val &= 0x0f;
d = (priv->reg[1] & (~mask)) ^ val;
- if (set_1284_register(pp, 2, d, GFP_KERNEL))
+ if (set_1284_register(pp, 2, d, GFP_ATOMIC))
return 0;
priv->reg[1] = d;
return d & 0xf;
@@ -398,7 +398,7 @@ static unsigned char parport_uss720_read_status(struct parport *pp)
{
unsigned char ret;
- if (get_1284_register(pp, 1, &ret, GFP_KERNEL))
+ if (get_1284_register(pp, 1, &ret, GFP_ATOMIC))
return 0;
return ret & 0xf8;
}
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 512c84adcace..e8e8702d5adf 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -439,13 +439,13 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
{
struct usb_yurex *dev;
int i, set = 0, retval = 0;
- char buffer[16];
+ char buffer[16 + 1];
char *data = buffer;
unsigned long long c, c2 = 0;
signed long timeout = 0;
DEFINE_WAIT(wait);
- count = min(sizeof(buffer), count);
+ count = min(sizeof(buffer) - 1, count);
dev = file->private_data;
/* verify that we actually have some data to write */
@@ -464,6 +464,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
retval = -EFAULT;
goto error;
}
+ buffer[count] = 0;
memset(dev->cntl_buffer, CMD_PADDING, YUREX_BUF_SIZE);
switch (buffer[0]) {
diff --git a/drivers/usb/serial/io_ti.h b/drivers/usb/serial/io_ti.h
index 1bd67b24f916..bc9ff5ebd67c 100644
--- a/drivers/usb/serial/io_ti.h
+++ b/drivers/usb/serial/io_ti.h
@@ -178,7 +178,7 @@ struct ump_interrupt {
} __attribute__((packed));
-#define TIUMP_GET_PORT_FROM_CODE(c) (((c) >> 4) - 3)
+#define TIUMP_GET_PORT_FROM_CODE(c) (((c) >> 6) & 0x01)
#define TIUMP_GET_FUNC_FROM_CODE(c) ((c) & 0x0f)
#define TIUMP_INTERRUPT_CODE_LSR 0x03
#define TIUMP_INTERRUPT_CODE_MSR 0x04
diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h
index 98f35c656c02..0cd247f75b8b 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.h
+++ b/drivers/usb/serial/ti_usb_3410_5052.h
@@ -227,7 +227,7 @@ struct ti_interrupt {
} __attribute__((packed));
/* Interrupt codes */
-#define TI_GET_PORT_FROM_CODE(c) (((c) >> 4) - 3)
+#define TI_GET_PORT_FROM_CODE(c) (((c) >> 6) & 0x01)
#define TI_GET_FUNC_FROM_CODE(c) ((c) & 0x0f)
#define TI_CODE_HARDWARE_ERROR 0xFF
#define TI_CODE_DATA_ERROR 0x03
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 1159bb81e63d..e42b5dec85d8 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -345,6 +345,15 @@ static int queuecommand_lck(struct scsi_cmnd *srb,
return 0;
}
+ if ((us->fflags & US_FL_NO_ATA_1X) &&
+ (srb->cmnd[0] == ATA_12 || srb->cmnd[0] == ATA_16)) {
+ memcpy(srb->sense_buffer, usb_stor_sense_invalidCDB,
+ sizeof(usb_stor_sense_invalidCDB));
+ srb->result = SAM_STAT_CHECK_CONDITION;
+ done(srb);
+ return 0;
+ }
+
/* enqueue the command and wake up the control thread */
srb->scsi_done = done;
us->srb = srb;
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index f5aa5ca66da2..6a7d814b72bf 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2219,6 +2219,13 @@ UNUSUAL_DEV( 0x4146, 0xba01, 0x0100, 0x0100,
"Micro Mini 1GB",
USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
+/* Reported-by: Tim Anderson <tsa@biglakesoftware.com> */
+UNUSUAL_DEV( 0x2ca3, 0x0031, 0x0000, 0x9999,
+ "DJI",
+ "CineSSD",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_NO_ATA_1X),
+
/*
* Nick Bowler <nbowler@elliptictech.com>
* SCSI stack spams (otherwise harmless) error messages.
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index bf348a6e6517..c682f441282a 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1723,12 +1723,12 @@ static int do_register_framebuffer(struct fb_info *fb_info)
return 0;
}
-static int do_unregister_framebuffer(struct fb_info *fb_info)
+static int unbind_console(struct fb_info *fb_info)
{
struct fb_event event;
- int i, ret = 0;
+ int ret;
+ int i = fb_info->node;
- i = fb_info->node;
if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
return -EINVAL;
@@ -1743,17 +1743,29 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
unlock_fb_info(fb_info);
console_unlock();
+ return ret;
+}
+
+static int __unlink_framebuffer(struct fb_info *fb_info);
+
+static int do_unregister_framebuffer(struct fb_info *fb_info)
+{
+ struct fb_event event;
+ int ret;
+
+ ret = unbind_console(fb_info);
+
if (ret)
return -EINVAL;
pm_vt_switch_unregister(fb_info->dev);
- unlink_framebuffer(fb_info);
+ __unlink_framebuffer(fb_info);
if (fb_info->pixmap.addr &&
(fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
kfree(fb_info->pixmap.addr);
fb_destroy_modelist(&fb_info->modelist);
- registered_fb[i] = NULL;
+ registered_fb[fb_info->node] = NULL;
num_registered_fb--;
fb_cleanup_device(fb_info);
event.info = fb_info;
@@ -1766,7 +1778,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
return 0;
}
-int unlink_framebuffer(struct fb_info *fb_info)
+static int __unlink_framebuffer(struct fb_info *fb_info)
{
int i;
@@ -1778,6 +1790,20 @@ int unlink_framebuffer(struct fb_info *fb_info)
device_destroy(fb_class, MKDEV(FB_MAJOR, i));
fb_info->dev = NULL;
}
+
+ return 0;
+}
+
+int unlink_framebuffer(struct fb_info *fb_info)
+{
+ int ret;
+
+ ret = __unlink_framebuffer(fb_info);
+ if (ret)
+ return ret;
+
+ unbind_console(fb_info);
+
return 0;
}
EXPORT_SYMBOL(unlink_framebuffer);
diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c
index 2510fa728d77..de119f11b78f 100644
--- a/drivers/video/fbdev/core/modedb.c
+++ b/drivers/video/fbdev/core/modedb.c
@@ -644,7 +644,7 @@ static int fb_try_mode(struct fb_var_screeninfo *var, struct fb_info *info,
*
* Valid mode specifiers for @mode_option:
*
- * <xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m] or
+ * <xres>x<yres>[M][R][-<bpp>][@<refresh>][i][p][m] or
* <name>[-<bpp>][@<refresh>]
*
* with <xres>, <yres>, <bpp> and <refresh> decimal numbers and
@@ -653,10 +653,10 @@ static int fb_try_mode(struct fb_var_screeninfo *var, struct fb_info *info,
* If 'M' is present after yres (and before refresh/bpp if present),
* the function will compute the timings using VESA(tm) Coordinated
* Video Timings (CVT). If 'R' is present after 'M', will compute with
- * reduced blanking (for flatpanels). If 'i' is present, compute
- * interlaced mode. If 'm' is present, add margins equal to 1.8%
- * of xres rounded down to 8 pixels, and 1.8% of yres. The char
- * 'i' and 'm' must be after 'M' and 'R'. Example:
+ * reduced blanking (for flatpanels). If 'i' or 'p' are present, compute
+ * interlaced or progressive mode. If 'm' is present, add margins equal
+ * to 1.8% of xres rounded down to 8 pixels, and 1.8% of yres. The chars
+ * 'i', 'p' and 'm' must be after 'M' and 'R'. Example:
*
* 1024x768MR-8@60m - Reduced blank with margins at 60Hz.
*
@@ -697,7 +697,8 @@ int fb_find_mode(struct fb_var_screeninfo *var,
unsigned int namelen = strlen(name);
int res_specified = 0, bpp_specified = 0, refresh_specified = 0;
unsigned int xres = 0, yres = 0, bpp = default_bpp, refresh = 0;
- int yres_specified = 0, cvt = 0, rb = 0, interlace = 0;
+ int yres_specified = 0, cvt = 0, rb = 0;
+ int interlace_specified = 0, interlace = 0;
int margins = 0;
u32 best, diff, tdiff;
@@ -748,9 +749,17 @@ int fb_find_mode(struct fb_var_screeninfo *var,
if (!cvt)
margins = 1;
break;
+ case 'p':
+ if (!cvt) {
+ interlace = 0;
+ interlace_specified = 1;
+ }
+ break;
case 'i':
- if (!cvt)
+ if (!cvt) {
interlace = 1;
+ interlace_specified = 1;
+ }
break;
default:
goto done;
@@ -819,11 +828,21 @@ done:
if ((name_matches(db[i], name, namelen) ||
(res_specified && res_matches(db[i], xres, yres))) &&
!fb_try_mode(var, info, &db[i], bpp)) {
- if (refresh_specified && db[i].refresh == refresh)
- return 1;
+ const int db_interlace = (db[i].vmode &
+ FB_VMODE_INTERLACED ? 1 : 0);
+ int score = abs(db[i].refresh - refresh);
+
+ if (interlace_specified)
+ score += abs(db_interlace - interlace);
+
+ if (!interlace_specified ||
+ db_interlace == interlace)
+ if (refresh_specified &&
+ db[i].refresh == refresh)
+ return 1;
- if (abs(db[i].refresh - refresh) < diff) {
- diff = abs(db[i].refresh - refresh);
+ if (score < diff) {
+ diff = score;
best = i;
}
}
diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c
index 88adb2970b44..39662b2e3537 100644
--- a/drivers/video/fbdev/goldfishfb.c
+++ b/drivers/video/fbdev/goldfishfb.c
@@ -368,6 +368,7 @@ static int goldfish_fb_remove(struct platform_device *pdev)
dma_free_coherent(&pdev->dev, framesize, (void *)fb->fb.screen_base,
fb->fb.fix.smem_start);
iounmap(fb->reg_base);
+ kfree(fb);
return 0;
}
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 393ae1bc07e8..a8a6f072fb78 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -977,7 +977,7 @@ int omapfb_register_client(struct omapfb_notifier_block *omapfb_nb,
{
int r;
- if ((unsigned)omapfb_nb->plane_idx > OMAPFB_PLANE_NUM)
+ if ((unsigned)omapfb_nb->plane_idx >= OMAPFB_PLANE_NUM)
return -EINVAL;
if (!notifier_inited) {
diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index badee04ef496..71b5dca95bdb 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -19,6 +19,7 @@
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
+#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -1468,7 +1469,7 @@ static const struct file_operations viafb_vt1636_proc_fops = {
#endif /* CONFIG_FB_VIA_DIRECT_PROCFS */
-static int viafb_sup_odev_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused viafb_sup_odev_proc_show(struct seq_file *m, void *v)
{
via_odev_to_seq(m, supported_odev_map[
viaparinfo->shared->chip_info.gfx_chip_name]);
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index e3d026ac382e..f35168ce426b 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
{
struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len};
struct iov_iter from;
- int retval;
+ int retval, err;
iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
@@ -128,7 +128,9 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
retval);
else
p9_client_write(fid, 0, &from, &retval);
- p9_client_clunk(fid);
+ err = p9_client_clunk(fid);
+ if (!retval && err)
+ retval = err;
return retval;
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 502d3892d8a4..d71e7ad4d382 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -18,6 +18,7 @@
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/magic.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
@@ -135,7 +136,8 @@ struct autofs_sb_info {
static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
{
- return (struct autofs_sb_info *)(sb->s_fs_info);
+ return sb->s_magic != AUTOFS_SUPER_MAGIC ?
+ NULL : (struct autofs_sb_info *)(sb->s_fs_info);
}
static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index a3ae0b2aeb5a..1132fe71b312 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -18,7 +18,6 @@
#include <linux/pagemap.h>
#include <linux/parser.h>
#include <linux/bitops.h>
-#include <linux/magic.h>
#include "autofs_i.h"
#include <linux/module.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9065d672887..ef90667e048b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1711,7 +1711,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset *regset = &view->regsets[i];
do_thread_regset_writeback(t->task, regset);
if (regset->core_note_type && regset->get &&
- (!regset->active || regset->active(t->task, regset))) {
+ (!regset->active || regset->active(t->task, regset) > 0)) {
int ret;
size_t size = regset->n * regset->size;
void *data = kmalloc(size, GFP_KERNEL);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1e668fb7dd4c..176a27bc63aa 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -574,6 +574,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_rm_dev_replace_unblocked(fs_info);
/*
+ * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will
+ * update on-disk dev stats value during commit transaction
+ */
+ atomic_inc(&tgt_device->dev_stats_ccnt);
+
+ /*
* this is again a consistent state where no dev_replace procedure
* is running, the target device is part of the filesystem, the
* source device is not part of the filesystem anymore and its 1st
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d106b981d86f..ae6e3a30e61e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1011,8 +1011,9 @@ static int btree_writepages(struct address_space *mapping,
fs_info = BTRFS_I(mapping->host)->root->fs_info;
/* this is a bit racy, but that's ok */
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret < 0)
return 0;
}
@@ -3987,8 +3988,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
if (flush_delayed)
btrfs_balance_delayed_items(root);
- ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ root->fs_info->dirty_metadata_batch);
if (ret > 0) {
balance_dirty_pages_ratelimited(
root->fs_info->btree_inode->i_mapping);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 493c7354ec0b..a72f941ca750 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10410,7 +10410,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
- if (block_group->reserved ||
+ if (block_group->reserved || block_group->pinned ||
btrfs_block_group_used(&block_group->item) ||
block_group->ro ||
list_is_singular(&block_group->list)) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 88bee6703cc0..7de8d545f4d6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3932,8 +3932,8 @@ retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag))) {
unsigned i;
scanned = 1;
@@ -3943,11 +3943,6 @@ retry:
if (!PagePrivate(page))
continue;
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- break;
- }
-
spin_lock(&mapping->private_lock);
if (!PagePrivate(page)) {
spin_unlock(&mapping->private_lock);
@@ -4076,8 +4071,8 @@ retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
+ &index, end, tag))) {
unsigned i;
scanned = 1;
@@ -4101,12 +4096,6 @@ retry:
continue;
}
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- unlock_page(page);
- continue;
- }
-
if (wbc->sync_mode != WB_SYNC_NONE) {
if (PageWriteback(page))
flush_fn(data);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 9ebe027cc4b7..cfe913d2d3df 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1318,18 +1318,19 @@ static void __del_reloc_root(struct btrfs_root *root)
struct mapping_node *node = NULL;
struct reloc_control *rc = root->fs_info->reloc_ctl;
- spin_lock(&rc->reloc_root_tree.lock);
- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
- if (rb_node) {
- node = rb_entry(rb_node, struct mapping_node, rb_node);
- rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ if (rc) {
+ spin_lock(&rc->reloc_root_tree.lock);
+ rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+ root->node->start);
+ if (rb_node) {
+ node = rb_entry(rb_node, struct mapping_node, rb_node);
+ rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ }
+ spin_unlock(&rc->reloc_root_tree.lock);
+ if (!node)
+ return;
+ BUG_ON((struct btrfs_root *)node->data != root);
}
- spin_unlock(&rc->reloc_root_tree.lock);
-
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&root->fs_info->trans_lock);
list_del_init(&root->root_list);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 22bae2b434e2..26607401edfe 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -786,8 +786,7 @@ retry:
struct page **pages = NULL;
mempool_t *pool = NULL; /* Becomes non-null if mempool used */
struct page *page;
- int want;
- u64 offset, len;
+ u64 offset = 0, len = 0;
long writeback_stat;
next = 0;
@@ -796,14 +795,9 @@ retry:
get_more_pages:
first = -1;
- want = min(end - index,
- min((pgoff_t)PAGEVEC_SIZE,
- max_pages - (pgoff_t)locked_pages) - 1)
- + 1;
- pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- want);
- dout("pagevec_lookup_tag got %d\n", pvec_pages);
+ pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
+ end, PAGECACHE_TAG_DIRTY);
+ dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
if (!pvec_pages && !locked_pages)
break;
for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index bcbe42fb7e92..0e72a14228f8 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -285,6 +285,10 @@ static ssize_t cifs_stats_proc_write(struct file *file,
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
#endif /* CONFIG_CIFS_STATS2 */
+ spin_lock(&GlobalMid_Lock);
+ GlobalMaxActiveXid = 0;
+ GlobalCurrentXid = 0;
+ spin_unlock(&GlobalMid_Lock);
spin_lock(&cifs_tcp_ses_lock);
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
@@ -297,6 +301,10 @@ static ssize_t cifs_stats_proc_write(struct file *file,
struct cifs_tcon,
tcon_list);
atomic_set(&tcon->num_smbs_sent, 0);
+ spin_lock(&tcon->stat_lock);
+ tcon->bytes_read = 0;
+ tcon->bytes_written = 0;
+ spin_unlock(&tcon->stat_lock);
if (server->ops->clear_stats)
server->ops->clear_stats(tcon);
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 97d1a15873c5..57b039ebfb1f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -373,8 +373,15 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) +
pfData->FileNameLength;
- } else
- new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
+ } else {
+ u32 next_offset = le32_to_cpu(pDirInfo->NextEntryOffset);
+
+ if (old_entry + next_offset < old_entry) {
+ cifs_dbg(VFS, "invalid offset %u\n", next_offset);
+ return NULL;
+ }
+ new_entry = old_entry + next_offset;
+ }
cifs_dbg(FYI, "new entry %p old entry %p\n", new_entry, old_entry);
/* validate that new_entry is not past end of SMB */
if (new_entry >= end_of_smb) {
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 76ccf20fbfb7..0e62bf1ebbd7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -185,6 +185,13 @@ smb2_check_message(char *buf, unsigned int length)
return 0;
/*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
+ /*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
* SMB2/SMB3 frame length (header + smb2 response specific data)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5f5ba807b414..f7111bb88ec1 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -315,7 +315,7 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
smb2_hdr_assemble((struct smb2_hdr *) *request_buf, smb2_command, tcon);
if (tcon != NULL) {
-#ifdef CONFIG_CIFS_STATS2
+#ifdef CONFIG_CIFS_STATS
uint16_t com_code = le16_to_cpu(smb2_command);
cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
#endif
@@ -2402,33 +2402,38 @@ num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size)
int len;
unsigned int entrycount = 0;
unsigned int next_offset = 0;
- FILE_DIRECTORY_INFO *entryptr;
+ char *entryptr;
+ FILE_DIRECTORY_INFO *dir_info;
if (bufstart == NULL)
return 0;
- entryptr = (FILE_DIRECTORY_INFO *)bufstart;
+ entryptr = bufstart;
while (1) {
- entryptr = (FILE_DIRECTORY_INFO *)
- ((char *)entryptr + next_offset);
-
- if ((char *)entryptr + size > end_of_buf) {
+ if (entryptr + next_offset < entryptr ||
+ entryptr + next_offset > end_of_buf ||
+ entryptr + next_offset + size > end_of_buf) {
cifs_dbg(VFS, "malformed search entry would overflow\n");
break;
}
- len = le32_to_cpu(entryptr->FileNameLength);
- if ((char *)entryptr + len + size > end_of_buf) {
+ entryptr = entryptr + next_offset;
+ dir_info = (FILE_DIRECTORY_INFO *)entryptr;
+
+ len = le32_to_cpu(dir_info->FileNameLength);
+ if (entryptr + len < entryptr ||
+ entryptr + len > end_of_buf ||
+ entryptr + len + size > end_of_buf) {
cifs_dbg(VFS, "directory entry name would overflow frame end of buf %p\n",
end_of_buf);
break;
}
- *lastentry = (char *)entryptr;
+ *lastentry = entryptr;
entrycount++;
- next_offset = le32_to_cpu(entryptr->NextEntryOffset);
+ next_offset = le32_to_cpu(dir_info->NextEntryOffset);
if (!next_offset)
break;
}
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 0758d32ad01b..0f46cf550907 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -162,12 +162,8 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
}
req = skcipher_request_alloc(tfm, gfp_flags);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_request_alloc() failed\n",
- __func__);
+ if (!req)
return -ENOMEM;
- }
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
@@ -184,9 +180,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_skcipher_encrypt() returned %d\n",
- __func__, res);
+ fscrypt_err(inode->i_sb,
+ "%scryption failed for inode %lu, block %llu: %d",
+ (rw == FS_DECRYPT ? "de" : "en"),
+ inode->i_ino, lblk_num, res);
return res;
}
return 0;
@@ -332,7 +329,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
- /* this should eventually be an flag in d_flags */
spin_lock(&dentry->d_lock);
cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
@@ -359,7 +355,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
-EXPORT_SYMBOL(fscrypt_d_ops);
void fscrypt_restore_control_page(struct page *page)
{
@@ -428,13 +423,43 @@ fail:
return res;
}
+void fscrypt_msg(struct super_block *sb, const char *level,
+ const char *fmt, ...)
+{
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ struct va_format vaf;
+ va_list args;
+
+ if (!__ratelimit(&rs))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (sb)
+ printk("%sfscrypt (%s): %pV\n", level, sb->s_id, &vaf);
+ else
+ printk("%sfscrypt: %pV\n", level, &vaf);
+ va_end(args);
+}
+
/**
* fscrypt_init() - Set up for fs encryption.
*/
static int __init fscrypt_init(void)
{
+ /*
+ * Use an unbound workqueue to allow bios to be decrypted in parallel
+ * even when they happen to complete on the same CPU. This sacrifices
+ * locality, but it's worthwhile since decryption is CPU-intensive.
+ *
+ * Also use a high-priority workqueue to prioritize decryption work,
+ * which blocks reads from completing, over regular application tasks.
+ */
fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
- WQ_HIGHPRI, 0);
+ WQ_UNBOUND | WQ_HIGHPRI,
+ num_online_cpus());
if (!fscrypt_read_workqueue)
goto fail;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index b18fa323d1d9..1bdb9f226eec 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -58,11 +58,8 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
/* Set up the encryption request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: skcipher_request_alloc() failed\n", __func__);
+ if (!req)
return -ENOMEM;
- }
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
@@ -73,8 +70,9 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
- printk_ratelimited(KERN_ERR
- "%s: Error (error code %d)\n", __func__, res);
+ fscrypt_err(inode->i_sb,
+ "Filename encryption failed for inode %lu: %d",
+ inode->i_ino, res);
return res;
}
@@ -95,23 +93,14 @@ static int fname_decrypt(struct inode *inode,
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
- struct fscrypt_info *ci = inode->i_crypt_info;
- struct crypto_skcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
int res = 0;
char iv[FS_CRYPTO_BLOCK_SIZE];
- unsigned lim;
-
- lim = inode->i_sb->s_cop->max_namelen(inode);
- if (iname->len <= 0 || iname->len > lim)
- return -EIO;
/* Allocate request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_request_alloc() failed\n", __func__);
+ if (!req)
return -ENOMEM;
- }
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
@@ -126,8 +115,9 @@ static int fname_decrypt(struct inode *inode,
res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
- printk_ratelimited(KERN_ERR
- "%s: Error (error code %d)\n", __func__, res);
+ fscrypt_err(inode->i_sb,
+ "Filename decryption failed for inode %lu: %d",
+ inode->i_ino, res);
return res;
}
@@ -340,12 +330,12 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
ret = fscrypt_get_encryption_info(dir);
- if (ret && ret != -EOPNOTSUPP)
+ if (ret)
return ret;
if (dir->i_crypt_info) {
if (!fscrypt_fname_encrypted_size(dir, iname->len,
- dir->i_sb->s_cop->max_namelen(dir),
+ dir->i_sb->s_cop->max_namelen,
&fname->crypto_buf.len))
return -ENAMETOOLONG;
fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index fe6f6524c1aa..ea372cd53ab6 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -17,15 +17,7 @@
/* Encryption parameters */
#define FS_IV_SIZE 16
-#define FS_AES_128_ECB_KEY_SIZE 16
-#define FS_AES_128_CBC_KEY_SIZE 16
-#define FS_AES_128_CTS_KEY_SIZE 16
-#define FS_AES_256_GCM_KEY_SIZE 32
-#define FS_AES_256_CBC_KEY_SIZE 32
-#define FS_AES_256_CTS_KEY_SIZE 32
-#define FS_AES_256_XTS_KEY_SIZE 64
-
-#define FS_KEY_DERIVATION_NONCE_SIZE 16
+#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
* Encryption context for inode
@@ -119,6 +111,15 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
gfp_t gfp_flags);
extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags);
+extern const struct dentry_operations fscrypt_d_ops;
+
+extern void __printf(3, 4) __cold
+fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+
+#define fscrypt_warn(sb, fmt, ...) \
+ fscrypt_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define fscrypt_err(sb, fmt, ...) \
+ fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
/* fname.c */
extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bc010e4609ef..b5328a0c6364 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -39,8 +39,9 @@ int fscrypt_file_open(struct inode *inode, struct file *filp)
dir = dget_parent(file_dentry(filp));
if (IS_ENCRYPTED(d_inode(dir)) &&
!fscrypt_has_permitted_context(d_inode(dir), inode)) {
- pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu",
- d_inode(dir)->i_ino, inode->i_ino);
+ fscrypt_warn(inode->i_sb,
+ "inconsistent encryption contexts: %lu/%lu",
+ d_inode(dir)->i_ino, inode->i_ino);
err = -EPERM;
}
dput(dir);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 472f69188a96..382e828f2f9a 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -18,17 +18,16 @@
static struct crypto_shash *essiv_hash_tfm;
-/**
- * derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivation.
- * @source_key: Source key to which to apply derivation.
- * @derived_raw_key: Derived raw key.
+/*
+ * Key derivation function. This generates the derived key by encrypting the
+ * master key with AES-128-ECB using the inode's nonce as the AES key.
*
- * Return: Zero on success; non-zero otherwise.
+ * The master key must be at least as long as the derived key. If the master
+ * key is longer, then only the first 'derived_keysize' bytes are used.
*/
-static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
- const struct fscrypt_key *source_key,
- u8 derived_raw_key[FS_MAX_KEY_SIZE])
+static int derive_key_aes(const u8 *master_key,
+ const struct fscrypt_context *ctx,
+ u8 *derived_key, unsigned int derived_keysize)
{
int res = 0;
struct skcipher_request *req = NULL;
@@ -50,14 +49,13 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
- res = crypto_skcipher_setkey(tfm, deriving_key,
- FS_AES_128_ECB_KEY_SIZE);
+ res = crypto_skcipher_setkey(tfm, ctx->nonce, sizeof(ctx->nonce));
if (res < 0)
goto out;
- sg_init_one(&src_sg, source_key->raw, source_key->size);
- sg_init_one(&dst_sg, derived_raw_key, source_key->size);
- skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
+ sg_init_one(&src_sg, master_key, derived_keysize);
+ sg_init_one(&dst_sg, derived_key, derived_keysize);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize,
NULL);
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
out:
@@ -66,103 +64,147 @@ out:
return res;
}
-static int validate_user_key(struct fscrypt_info *crypt_info,
- struct fscrypt_context *ctx, u8 *raw_key,
- const char *prefix, int min_keysize)
+/*
+ * Search the current task's subscribed keyrings for a "logon" key with
+ * description prefix:descriptor, and if found acquire a read lock on it and
+ * return a pointer to its validated payload in *payload_ret.
+ */
+static struct key *
+find_and_lock_process_key(const char *prefix,
+ const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
+ unsigned int min_keysize,
+ const struct fscrypt_key **payload_ret)
{
char *description;
- struct key *keyring_key;
- struct fscrypt_key *master_key;
+ struct key *key;
const struct user_key_payload *ukp;
- int res;
+ const struct fscrypt_key *payload;
description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
- FS_KEY_DESCRIPTOR_SIZE,
- ctx->master_key_descriptor);
+ FS_KEY_DESCRIPTOR_SIZE, descriptor);
if (!description)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- keyring_key = request_key(&key_type_logon, description, NULL);
+ key = request_key(&key_type_logon, description, NULL);
kfree(description);
- if (IS_ERR(keyring_key))
- return PTR_ERR(keyring_key);
- down_read(&keyring_key->sem);
-
- if (keyring_key->type != &key_type_logon) {
- printk_once(KERN_WARNING
- "%s: key type must be logon\n", __func__);
- res = -ENOKEY;
- goto out;
- }
- ukp = user_key_payload(keyring_key);
- if (!ukp) {
- /* key was revoked before we acquired its semaphore */
- res = -EKEYREVOKED;
- goto out;
+ if (IS_ERR(key))
+ return key;
+
+ down_read(&key->sem);
+ ukp = user_key_payload(key);
+
+ if (!ukp) /* was the key revoked before we acquired its semaphore? */
+ goto invalid;
+
+ payload = (const struct fscrypt_key *)ukp->data;
+
+ if (ukp->datalen != sizeof(struct fscrypt_key) ||
+ payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) {
+ fscrypt_warn(NULL,
+ "key with description '%s' has invalid payload",
+ key->description);
+ goto invalid;
}
- if (ukp->datalen != sizeof(struct fscrypt_key)) {
- res = -EINVAL;
- goto out;
+
+ if (payload->size < min_keysize) {
+ fscrypt_warn(NULL,
+ "key with description '%s' is too short (got %u bytes, need %u+ bytes)",
+ key->description, payload->size, min_keysize);
+ goto invalid;
}
- master_key = (struct fscrypt_key *)ukp->data;
- BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
-
- if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE
- || master_key->size % AES_BLOCK_SIZE != 0) {
- printk_once(KERN_WARNING
- "%s: key size incorrect: %d\n",
- __func__, master_key->size);
- res = -ENOKEY;
- goto out;
+
+ *payload_ret = payload;
+ return key;
+
+invalid:
+ up_read(&key->sem);
+ key_put(key);
+ return ERR_PTR(-ENOKEY);
+}
+
+/* Find the master key, then derive the inode's actual encryption key */
+static int find_and_derive_key(const struct inode *inode,
+ const struct fscrypt_context *ctx,
+ u8 *derived_key, unsigned int derived_keysize)
+{
+ struct key *key;
+ const struct fscrypt_key *payload;
+ int err;
+
+ key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+ ctx->master_key_descriptor,
+ derived_keysize, &payload);
+ if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
+ key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
+ ctx->master_key_descriptor,
+ derived_keysize, &payload);
}
- res = derive_key_aes(ctx->nonce, master_key, raw_key);
-out:
- up_read(&keyring_key->sem);
- key_put(keyring_key);
- return res;
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize);
+ up_read(&key->sem);
+ key_put(key);
+ return err;
}
-static const struct {
+static struct fscrypt_mode {
+ const char *friendly_name;
const char *cipher_str;
int keysize;
+ bool logged_impl_name;
} available_modes[] = {
- [FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)",
- FS_AES_256_XTS_KEY_SIZE },
- [FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))",
- FS_AES_256_CTS_KEY_SIZE },
- [FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)",
- FS_AES_128_CBC_KEY_SIZE },
- [FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))",
- FS_AES_128_CTS_KEY_SIZE },
- [FS_ENCRYPTION_MODE_SPECK128_256_XTS] = { "xts(speck128)", 64 },
- [FS_ENCRYPTION_MODE_SPECK128_256_CTS] = { "cts(cbc(speck128))", 32 },
+ [FS_ENCRYPTION_MODE_AES_256_XTS] = {
+ .friendly_name = "AES-256-XTS",
+ .cipher_str = "xts(aes)",
+ .keysize = 64,
+ },
+ [FS_ENCRYPTION_MODE_AES_256_CTS] = {
+ .friendly_name = "AES-256-CTS-CBC",
+ .cipher_str = "cts(cbc(aes))",
+ .keysize = 32,
+ },
+ [FS_ENCRYPTION_MODE_AES_128_CBC] = {
+ .friendly_name = "AES-128-CBC",
+ .cipher_str = "cbc(aes)",
+ .keysize = 16,
+ },
+ [FS_ENCRYPTION_MODE_AES_128_CTS] = {
+ .friendly_name = "AES-128-CTS-CBC",
+ .cipher_str = "cts(cbc(aes))",
+ .keysize = 16,
+ },
+ [FS_ENCRYPTION_MODE_SPECK128_256_XTS] = {
+ .friendly_name = "Speck128/256-XTS",
+ .cipher_str = "xts(speck128)",
+ .keysize = 64,
+ },
+ [FS_ENCRYPTION_MODE_SPECK128_256_CTS] = {
+ .friendly_name = "Speck128/256-CTS-CBC",
+ .cipher_str = "cts(cbc(speck128))",
+ .keysize = 32,
+ },
};
-static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
- const char **cipher_str_ret, int *keysize_ret)
+static struct fscrypt_mode *
+select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
{
- u32 mode;
-
if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
- pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n",
- inode->i_ino,
- ci->ci_data_mode, ci->ci_filename_mode);
- return -EINVAL;
+ fscrypt_warn(inode->i_sb,
+ "inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)",
+ inode->i_ino, ci->ci_data_mode,
+ ci->ci_filename_mode);
+ return ERR_PTR(-EINVAL);
}
- if (S_ISREG(inode->i_mode)) {
- mode = ci->ci_data_mode;
- } else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
- mode = ci->ci_filename_mode;
- } else {
- WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
- inode->i_ino, (inode->i_mode & S_IFMT));
- return -EINVAL;
- }
+ if (S_ISREG(inode->i_mode))
+ return &available_modes[ci->ci_data_mode];
+
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ return &available_modes[ci->ci_filename_mode];
- *cipher_str_ret = available_modes[mode].cipher_str;
- *keysize_ret = available_modes[mode].keysize;
- return 0;
+ WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+ inode->i_ino, (inode->i_mode & S_IFMT));
+ return ERR_PTR(-EINVAL);
}
static void put_crypt_info(struct fscrypt_info *ci)
@@ -185,8 +227,9 @@ static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
tfm = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(tfm)) {
- pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n",
- PTR_ERR(tfm));
+ fscrypt_warn(NULL,
+ "error allocating SHA-256 transform: %ld",
+ PTR_ERR(tfm));
return PTR_ERR(tfm);
}
prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
@@ -246,8 +289,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
struct crypto_skcipher *ctfm;
- const char *cipher_str;
- int keysize;
+ struct fscrypt_mode *mode;
u8 *raw_key = NULL;
int res;
@@ -291,57 +333,59 @@ int fscrypt_get_encryption_info(struct inode *inode)
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
- res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize);
- if (res)
+ mode = select_encryption_mode(crypt_info, inode);
+ if (IS_ERR(mode)) {
+ res = PTR_ERR(mode);
goto out;
+ }
/*
* This cannot be a stack buffer because it is passed to the scatterlist
* crypto API as part of key derivation.
*/
res = -ENOMEM;
- raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS);
+ raw_key = kmalloc(mode->keysize, GFP_NOFS);
if (!raw_key)
goto out;
- res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX,
- keysize);
- if (res && inode->i_sb->s_cop->key_prefix) {
- int res2 = validate_user_key(crypt_info, &ctx, raw_key,
- inode->i_sb->s_cop->key_prefix,
- keysize);
- if (res2) {
- if (res2 == -ENOKEY)
- res = -ENOKEY;
- goto out;
- }
- } else if (res) {
+ res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize);
+ if (res)
goto out;
- }
- ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
- if (!ctfm || IS_ERR(ctfm)) {
- res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
- pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n",
- __func__, res, inode->i_ino);
+
+ ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+ if (IS_ERR(ctfm)) {
+ res = PTR_ERR(ctfm);
+ fscrypt_warn(inode->i_sb,
+ "error allocating '%s' transform for inode %lu: %d",
+ mode->cipher_str, inode->i_ino, res);
goto out;
}
+ if (unlikely(!mode->logged_impl_name)) {
+ /*
+ * fscrypt performance can vary greatly depending on which
+ * crypto algorithm implementation is used. Help people debug
+ * performance problems by logging the ->cra_driver_name the
+ * first time a mode is used. Note that multiple threads can
+ * race here, but it doesn't really matter.
+ */
+ mode->logged_impl_name = true;
+ pr_info("fscrypt: %s using implementation \"%s\"\n",
+ mode->friendly_name,
+ crypto_skcipher_alg(ctfm)->base.cra_driver_name);
+ }
crypt_info->ci_ctfm = ctfm;
- crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
- /*
- * if the provided key is longer than keysize, we use the first
- * keysize bytes of the derived key only
- */
- res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
+ res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize);
if (res)
goto out;
if (S_ISREG(inode->i_mode) &&
crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
- res = init_essiv_generator(crypt_info, raw_key, keysize);
+ res = init_essiv_generator(crypt_info, raw_key, mode->keysize);
if (res) {
- pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n",
- __func__, res, inode->i_ino);
+ fscrypt_warn(inode->i_sb,
+ "error initializing ESSIV generator for inode %lu: %d",
+ inode->i_ino, res);
goto out;
}
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 0f3937dd29f6..b056cf8271a5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -278,7 +278,8 @@ void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry
spin_unlock(&dentry->d_lock);
name->name = p->name;
} else {
- memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
+ memcpy(name->inline_name, dentry->d_iname,
+ dentry->d_name.len + 1);
spin_unlock(&dentry->d_lock);
name->name = name->inline_name;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 6d17f31a31d7..8eb768bbf5b5 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -74,7 +74,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
else if (unlikely(((char *) de - buf) + rlen > size))
- error_msg = "directory entry across range";
+ error_msg = "directory entry overrun";
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
@@ -83,18 +83,16 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
if (filp)
ext4_error_file(filp, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
+ "bad entry in directory: %s - offset=%u, "
+ "inode=%u, rec_len=%d, name_len=%d, size=%d",
+ error_msg, offset, le32_to_cpu(de->inode),
+ rlen, de->name_len, size);
else
ext4_error_inode(dir, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
+ "bad entry in directory: %s - offset=%u, "
+ "inode=%u, rec_len=%d, name_len=%d, size=%d",
+ error_msg, offset, le32_to_cpu(de->inode),
+ rlen, de->name_len, size);
return 1;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 72883bc6fc32..f1375d3ad5c1 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1770,6 +1770,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data)
{
int err, inline_size;
struct ext4_iloc iloc;
+ size_t inline_len;
void *inline_pos;
unsigned int offset;
struct ext4_dir_entry_2 *de;
@@ -1797,8 +1798,9 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data)
goto out;
}
+ inline_len = ext4_get_inline_size(dir);
offset = EXT4_INLINE_DOTDOT_SIZE;
- while (offset < dir->i_size) {
+ while (offset < inline_len) {
de = ext4_get_inline_entry(dir, &iloc, offset,
&inline_pos, &inline_size);
if (ext4_check_dir_entry(dir, NULL, de,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b06109979e94..920665b94f11 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2411,8 +2411,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
mpd->map.m_len = 0;
mpd->next_page = index;
while (index <= end) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
goto out;
@@ -2420,16 +2420,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct page *page = pvec.pages[i];
/*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end)
- goto out;
-
- /*
* Accumulated enough dirty pages? This doesn't apply
* to WB_SYNC_ALL mode. For integrity sync we have to
* keep going because someone may be concurrently
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 0a512aa81bf7..4c9d799955d1 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -48,7 +48,6 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
*/
sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
- mark_buffer_dirty(bh);
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d2421fd38833..783280ebc2fe 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -18,6 +18,7 @@
int ext4_resize_begin(struct super_block *sb)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
int ret = 0;
if (!capable(CAP_SYS_RESOURCE))
@@ -28,7 +29,7 @@ int ext4_resize_begin(struct super_block *sb)
* because the user tools have no way of handling this. Probably a
* bad time to do it anyways.
*/
- if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+ if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
ext4_warning(sb, "won't resize using backup superblock at %llu",
(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
@@ -1954,6 +1955,26 @@ retry:
}
}
+ /*
+ * Make sure the last group has enough space so that it's
+ * guaranteed to have enough space for all metadata blocks
+ * that it might need to hold. (We might not need to store
+ * the inode table blocks in the last block group, but there
+ * will be cases where this might be needed.)
+ */
+ if ((ext4_group_first_block_no(sb, n_group) +
+ ext4_group_overhead_blocks(sb, n_group) + 2 +
+ sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) {
+ n_blocks_count = ext4_group_first_block_no(sb, n_group);
+ n_group--;
+ n_blocks_count_retry = 0;
+ if (resize_inode) {
+ iput(resize_inode);
+ resize_inode = NULL;
+ }
+ goto retry;
+ }
+
/* extend the last group */
if (n_group == o_group)
add = n_blocks_count - o_blocks_count;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 71c9104a7779..959d734806cb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4023,11 +4023,13 @@ no_journal:
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
+ ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 760d1ad22722..f7cdd3b536e3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -24,10 +24,11 @@
#include <trace/events/f2fs.h>
static struct kmem_cache *ino_entry_slab;
-struct kmem_cache *inode_entry_slab;
+struct kmem_cache *f2fs_inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
+ f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
f2fs_flush_merged_writes(sbi);
@@ -36,7 +37,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
/*
* We guarantee no failure on the returned page.
*/
-struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
@@ -70,6 +71,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.encrypted_page = NULL,
.is_meta = is_meta,
};
+ int err;
if (unlikely(!is_meta))
fio.op_flags &= ~REQ_META;
@@ -84,9 +86,10 @@ repeat:
fio.page = page;
- if (f2fs_submit_page_bio(&fio)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
f2fs_put_page(page, 1);
- goto repeat;
+ return ERR_PTR(err);
}
lock_page(page);
@@ -95,29 +98,46 @@ repeat:
goto repeat;
}
- /*
- * if there is any IO error when accessing device, make our filesystem
- * readonly and make sure do not write checkpoint with non-uptodate
- * meta page.
- */
- if (unlikely(!PageUptodate(page)))
- f2fs_stop_checkpoint(sbi, false);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
out:
return page;
}
-struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, true);
}
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ int count = 0;
+
+retry:
+ page = __get_meta_page(sbi, index, true);
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -EIO &&
+ ++count <= DEFAULT_RETRY_IO_COUNT)
+ goto retry;
+
+ f2fs_stop_checkpoint(sbi, false);
+ f2fs_bug_on(sbi, 1);
+ }
+
+ return page;
+}
+
/* for POR only */
-struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
@@ -137,8 +157,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
return false;
break;
case META_POR:
+ case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi)))
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ if (type == DATA_GENERIC) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ WARN_ON(1);
+ }
+ return false;
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -151,7 +183,7 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
/*
* Readahead CP/NAT/SIT/SSA pages
*/
-int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
{
struct page *page;
@@ -174,7 +206,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
- if (!is_valid_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -218,7 +250,7 @@ out:
return blkno - start;
}
-void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
bool readahead = false;
@@ -229,7 +261,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0);
if (readahead)
- ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
+ f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int __f2fs_write_meta_page(struct page *page,
@@ -240,17 +272,14 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
goto redirty_out;
- write_meta_page(sbi, page, io_type);
+ f2fs_do_write_meta_page(sbi, page, io_type);
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
@@ -295,7 +324,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
- written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
+ written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -306,13 +335,14 @@ skip_write:
return 0;
}
-long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write, enum iostat_type io_type)
{
struct address_space *mapping = META_MAPPING(sbi);
- pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
+ pgoff_t index = 0, prev = ULONG_MAX;
struct pagevec pvec;
long nwritten = 0;
+ int nr_pages;
struct writeback_control wbc = {
.for_reclaim = 0,
};
@@ -322,13 +352,9 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
blk_start_plug(&plug);
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (unlikely(nr_pages == 0))
- break;
+ while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -459,20 +485,20 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
spin_unlock(&im->ino_lock);
}
-void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
__add_ino_entry(sbi, ino, 0, type);
}
-void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* remove dirty ino entry from list */
__remove_ino_entry(sbi, ino, type);
}
/* mode should be APPEND_INO or UPDATE_INO */
-bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
struct inode_management *im = &sbi->im[mode];
struct ino_entry *e;
@@ -483,7 +509,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
-void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
+void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all)
{
struct ino_entry *e, *tmp;
int i;
@@ -502,13 +528,13 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
}
}
-void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type)
{
__add_ino_entry(sbi, ino, devidx, type);
}
-bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type)
{
struct inode_management *im = &sbi->im[type];
@@ -523,20 +549,19 @@ bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
return is_dirty;
}
-int acquire_orphan_inode(struct f2fs_sb_info *sbi)
+int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
int err = 0;
spin_lock(&im->ino_lock);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
-#endif
+
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
@@ -546,7 +571,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
return err;
}
-void release_orphan_inode(struct f2fs_sb_info *sbi)
+void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
@@ -556,14 +581,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
spin_unlock(&im->ino_lock);
}
-void add_orphan_inode(struct inode *inode)
+void f2fs_add_orphan_inode(struct inode *inode)
{
/* add new orphan ino entry into list */
__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
}
-void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
/* remove orphan entry from orphan list */
__remove_ino_entry(sbi, ino, ORPHAN_INO);
@@ -573,12 +598,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct node_info ni;
- int err = acquire_orphan_inode(sbi);
-
- if (err)
- goto err_out;
-
- __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
+ int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -591,23 +611,25 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
}
err = dquot_initialize(inode);
- if (err)
+ if (err) {
+ iput(inode);
goto err_out;
+ }
- dquot_initialize(inode);
clear_nlink(inode);
/* truncate all the data during iput */
iput(inode);
- get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni);
+ if (err)
+ goto err_out;
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
err = -EIO;
goto err_out;
}
- __remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
err_out:
@@ -618,7 +640,7 @@ err_out:
return err;
}
-int recover_orphan_inodes(struct f2fs_sb_info *sbi)
+int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
unsigned int s_flags = sbi->sb->s_flags;
@@ -639,19 +661,28 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
- ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
+ f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
- struct page *page = get_meta_page(sbi, start_blk + i);
+ struct page *page;
struct f2fs_orphan_block *orphan_blk;
+ page = f2fs_get_meta_page(sbi, start_blk + i);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
@@ -699,7 +730,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
if (!page) {
- page = grab_meta_page(sbi, start_blk++);
+ page = f2fs_grab_meta_page(sbi, start_blk++);
orphan_blk =
(struct f2fs_orphan_block *)page_address(page);
memset(orphan_blk, 0, sizeof(*orphan_blk));
@@ -741,11 +772,15 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
size_t crc_offset = 0;
__u32 crc = 0;
- *cp_page = get_meta_page(sbi, cp_addr);
+ *cp_page = f2fs_get_meta_page(sbi, cp_addr);
+ if (IS_ERR(*cp_page))
+ return PTR_ERR(*cp_page);
+
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset > (blk_size - sizeof(__le32))) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
@@ -753,6 +788,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
}
@@ -772,14 +808,22 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
- goto invalid_cp1;
+ return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
- goto invalid_cp2;
+ goto invalid_cp;
cur_version = *version;
if (cur_version == pre_version) {
@@ -787,14 +831,13 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
-invalid_cp2:
f2fs_put_page(cp_page_2, 1);
-invalid_cp1:
+invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
-int get_valid_checkpoint(struct f2fs_sb_info *sbi)
+int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *cp_block;
struct f2fs_super_block *fsb = sbi->raw_super;
@@ -806,7 +849,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
block_t cp_blk_no;
int i;
- sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL);
+ sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
+ GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
@@ -837,15 +881,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
- /* Sanity checking of checkpoint */
- if (sanity_check_ckpt(sbi))
- goto free_fail_no_cp;
-
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
+ /* Sanity checking of checkpoint */
+ if (f2fs_sanity_check_ckpt(sbi))
+ goto free_fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -857,7 +901,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
void *sit_bitmap_ptr;
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
- cur_page = get_meta_page(sbi, cp_blk_no + i);
+ cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
+ if (IS_ERR(cur_page))
+ goto free_fail_no_cp;
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -902,7 +948,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
-void update_dirty_page(struct inode *inode, struct page *page)
+void f2fs_update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
@@ -921,7 +967,7 @@ void update_dirty_page(struct inode *inode, struct page *page)
f2fs_trace_pid(page);
}
-void remove_dirty_inode(struct inode *inode)
+void f2fs_remove_dirty_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
@@ -938,7 +984,7 @@ void remove_dirty_inode(struct inode *inode)
spin_unlock(&sbi->inode_lock[type]);
}
-int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
+int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
struct list_head *head;
struct inode *inode;
@@ -979,12 +1025,10 @@ retry:
iput(inode);
/* We need to give cpu to another writers. */
- if (ino == cur_ino) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ if (ino == cur_ino)
cond_resched();
- } else {
+ else
ino = cur_ino;
- }
} else {
/*
* We should submit bio, since it exists several
@@ -1021,7 +1065,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
/* it's on eviction */
if (is_inode_flag_set(inode, FI_DIRTY_INODE))
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
iput(inode);
}
}
@@ -1061,7 +1105,7 @@ retry_flush_dents:
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
- err = sync_dirty_inodes(sbi, DIR_INODE);
+ err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
cond_resched();
@@ -1089,7 +1133,9 @@ retry_flush_nodes:
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
- err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+ err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
+ atomic_dec(&sbi->wb_sync_req[NODE]);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
@@ -1116,7 +1162,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
}
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
DEFINE_WAIT(wait);
@@ -1126,6 +1172,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
+ if (unlikely(f2fs_cp_error(sbi)))
+ break;
+
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
@@ -1183,10 +1232,10 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
/*
* pagevec_lookup_tag and lock_page again will take
- * some extra time. Therefore, update_meta_pages and
- * sync_meta_pages are combined in this function.
+ * some extra time. Therefore, f2fs_update_meta_pages and
+ * f2fs_sync_meta_pages are combined in this function.
*/
- struct page *page = grab_meta_page(sbi, blk_addr);
+ struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
int err;
memcpy(page_address(page), src, PAGE_SIZE);
@@ -1199,8 +1248,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
/* writeout cp pack 2 page */
err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
- f2fs_bug_on(sbi, err);
+ if (unlikely(err && f2fs_cp_error(sbi))) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ f2fs_bug_on(sbi, err);
f2fs_put_page(page, 0);
/* submit checkpoint (with barrier if NOBARRIER is not set) */
@@ -1224,16 +1277,16 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
- sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
/*
* modify checkpoint
* version number is already updated
*/
- ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
+ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
@@ -1253,7 +1306,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
/* 2 cp + n data seg summary + orphan inode blocks */
- data_sum_blocks = npages_for_summary_flush(sbi, false);
+ data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
@@ -1298,22 +1351,23 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
- update_meta_page(sbi, nm_i->nat_bits +
+ f2fs_update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
/* Flush all the NAT BITS pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
- sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX,
+ FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
}
/* write out checkpoint buffer at block 0 */
- update_meta_page(sbi, ckpt, start_blk++);
+ f2fs_update_meta_page(sbi, ckpt, start_blk++);
for (i = 1; i < 1 + cp_payload_blks; i++)
- update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
+ f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
start_blk++);
if (orphan_num) {
@@ -1321,7 +1375,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk += orphan_blocks;
}
- write_data_summaries(sbi, start_blk);
+ f2fs_write_data_summaries(sbi, start_blk);
start_blk += data_sum_blocks;
/* Record write statistics in the hot node summary */
@@ -1332,7 +1386,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
if (__remain_node_summaries(cpc->reason)) {
- write_node_summaries(sbi, start_blk);
+ f2fs_write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
}
@@ -1341,13 +1395,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
/* Here, we have one bio having CP pack except cp pack 2 page */
- sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
- wait_on_all_pages_writeback(sbi);
-
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_wait_on_all_pages_writeback(sbi);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1356,12 +1407,19 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
- wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
- release_ino_entry(sbi, false);
+ /*
+ * invalidate intermediate page cache borrowed from meta inode
+ * which are used for migration of encrypted inode's blocks.
+ */
+ if (f2fs_sb_has_encrypt(sbi->sb))
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_release_ino_entry(sbi, false);
+
+ f2fs_reset_fsync_node_info(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
@@ -1377,13 +1435,13 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
- return 0;
+ return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
/*
* We guarantee that this checkpoint procedure will not fail.
*/
-int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
@@ -1416,7 +1474,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* this is the case of multiple fstrims without any changes */
if (cpc->reason & CP_DISCARD) {
- if (!exist_trim_candidates(sbi, cpc)) {
+ if (!f2fs_exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
}
@@ -1424,8 +1482,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (NM_I(sbi)->dirty_nat_cnt == 0 &&
SIT_I(sbi)->dirty_sentries == 0 &&
prefree_segments(sbi) == 0) {
- flush_sit_entries(sbi, cpc);
- clear_prefree_segments(sbi, cpc);
+ f2fs_flush_sit_entries(sbi, cpc);
+ f2fs_clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
goto out;
}
@@ -1440,15 +1498,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
- flush_nat_entries(sbi, cpc);
- flush_sit_entries(sbi, cpc);
+ f2fs_flush_nat_entries(sbi, cpc);
+ f2fs_flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
if (err)
- release_discard_addrs(sbi);
+ f2fs_release_discard_addrs(sbi);
else
- clear_prefree_segments(sbi, cpc);
+ f2fs_clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@@ -1465,7 +1523,7 @@ out:
return err;
}
-void init_ino_entry_info(struct f2fs_sb_info *sbi)
+void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
{
int i;
@@ -1483,23 +1541,23 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
F2FS_ORPHANS_PER_BLOCK;
}
-int __init create_checkpoint_caches(void)
+int __init f2fs_create_checkpoint_caches(void)
{
ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
sizeof(struct ino_entry));
if (!ino_entry_slab)
return -ENOMEM;
- inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
+ f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
sizeof(struct inode_entry));
- if (!inode_entry_slab) {
+ if (!f2fs_inode_entry_slab) {
kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;
}
return 0;
}
-void destroy_checkpoint_caches(void)
+void f2fs_destroy_checkpoint_caches(void)
{
kmem_cache_destroy(ino_entry_slab);
- kmem_cache_destroy(inode_entry_slab);
+ kmem_cache_destroy(f2fs_inode_entry_slab);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3d846b027fa1..c62bec58f37e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -48,6 +48,8 @@ static bool __is_cp_guaranteed(struct page *page)
if (inode->i_ino == F2FS_META_INO(sbi) ||
inode->i_ino == F2FS_NODE_INO(sbi) ||
S_ISDIR(inode->i_mode) ||
+ (S_ISREG(inode->i_mode) &&
+ is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
is_cold_data(page))
return true;
return false;
@@ -124,12 +126,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
bio->bi_error = -EIO;
}
-#endif
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -175,6 +175,8 @@ static void f2fs_write_end_io(struct bio *bio)
page->index != nid_of_node(page));
dec_page_count(sbi, type);
+ if (f2fs_in_warm_node_list(sbi, page))
+ f2fs_del_fsync_node_entry(sbi, page);
clear_cold_data(page);
end_page_writeback(page);
}
@@ -244,7 +246,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
- bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp);
+ bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
}
if (wbc)
wbc_init_bio(wbc, bio);
@@ -261,7 +263,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (type != DATA && type != NODE)
goto submit_io;
- if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
@@ -438,7 +440,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
- verify_block_addr(fio, fio->new_blkaddr);
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFAULT;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -459,13 +464,12 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
-int f2fs_submit_page_write(struct f2fs_io_info *fio)
+void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
- int err = 0;
f2fs_bug_on(sbi, is_read_io(fio->op));
@@ -475,7 +479,7 @@ next:
spin_lock(&io->io_lock);
if (list_empty(&io->io_list)) {
spin_unlock(&io->io_lock);
- goto out_fail;
+ goto out;
}
fio = list_first_entry(&io->io_list,
struct f2fs_io_info, list);
@@ -483,7 +487,7 @@ next:
spin_unlock(&io->io_lock);
}
- if (fio->old_blkaddr != NEW_ADDR)
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
verify_block_addr(fio, fio->old_blkaddr);
verify_block_addr(fio, fio->new_blkaddr);
@@ -502,9 +506,9 @@ alloc_new:
if (io->bio == NULL) {
if ((fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
- err = -EAGAIN;
dec_page_count(sbi, WB_DATA_TYPE(bio_page));
- goto out_fail;
+ fio->retry = true;
+ goto skip;
}
io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
BIO_MAX_PAGES, false,
@@ -524,28 +528,30 @@ alloc_new:
f2fs_trace_ios(fio, 0);
trace_f2fs_submit_page_write(fio->page, fio);
-
+skip:
if (fio->in_list)
goto next;
-out_fail:
+out:
up_write(&io->io_rwsem);
- return err;
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
+ unsigned nr_pages, unsigned op_flag)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ return ERR_PTR(-EFAULT);
+
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
@@ -570,7 +576,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
+ struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -603,7 +609,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn)
* ->node_page
* update block addresses in the node page
*/
-void set_data_blkaddr(struct dnode_of_data *dn)
+void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
{
f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
__set_data_blkaddr(dn);
@@ -614,12 +620,12 @@ void set_data_blkaddr(struct dnode_of_data *dn)
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
dn->data_blkaddr = blkaddr;
- set_data_blkaddr(dn);
+ f2fs_set_data_blkaddr(dn);
f2fs_update_extent_cache(dn);
}
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
-int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
+int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
int err;
@@ -653,12 +659,12 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
}
/* Should keep dn->ofs_in_node unchanged */
-int reserve_new_block(struct dnode_of_data *dn)
+int f2fs_reserve_new_block(struct dnode_of_data *dn)
{
unsigned int ofs_in_node = dn->ofs_in_node;
int ret;
- ret = reserve_new_blocks(dn, 1);
+ ret = f2fs_reserve_new_blocks(dn, 1);
dn->ofs_in_node = ofs_in_node;
return ret;
}
@@ -668,12 +674,12 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
bool need_put = dn->inode_page ? false : true;
int err;
- err = get_dnode_of_data(dn, index, ALLOC_NODE);
+ err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
if (err)
return err;
if (dn->data_blkaddr == NULL_ADDR)
- err = reserve_new_block(dn);
+ err = f2fs_reserve_new_block(dn);
if (err || need_put)
f2fs_put_dnode(dn);
return err;
@@ -692,7 +698,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
return f2fs_reserve_block(dn, index);
}
-struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write)
{
struct address_space *mapping = inode->i_mapping;
@@ -711,7 +717,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err)
goto put_err;
f2fs_put_dnode(&dn);
@@ -730,7 +736,8 @@ got_it:
* A new dentry page is allocated but not able to be written, since its
* new inode page couldn't be allocated due to -ENOSPC.
* In such the case, its blkaddr can be remained as NEW_ADDR.
- * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
+ * see, f2fs_add_link -> f2fs_get_new_data_page ->
+ * f2fs_init_inode_metadata.
*/
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_SIZE);
@@ -750,7 +757,7 @@ put_err:
return ERR_PTR(err);
}
-struct page *find_data_page(struct inode *inode, pgoff_t index)
+struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -760,7 +767,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
return page;
f2fs_put_page(page, 0);
- page = get_read_data_page(inode, index, REQ_SYNC, false);
+ page = f2fs_get_read_data_page(inode, index, REQ_SYNC, false);
if (IS_ERR(page))
return page;
@@ -780,13 +787,13 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
* Because, the callers, functions in dir.c and GC, should be able to know
* whether this page exists or not.
*/
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
bool for_write)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
repeat:
- page = get_read_data_page(inode, index, REQ_SYNC, for_write);
+ page = f2fs_get_read_data_page(inode, index, REQ_SYNC, for_write);
if (IS_ERR(page))
return page;
@@ -812,7 +819,7 @@ repeat:
* Note that, ipage is set only by make_empty_dir, and if any error occur,
* ipage should be released by this function.
*/
-struct page *get_new_data_page(struct inode *inode,
+struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size)
{
struct address_space *mapping = inode->i_mapping;
@@ -851,7 +858,7 @@ struct page *get_new_data_page(struct inode *inode,
/* if ipage exists, blkaddr should be NEW_ADDR */
f2fs_bug_on(F2FS_I_SB(inode), ipage);
- page = get_lock_data_page(inode, index, true);
+ page = f2fs_get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return page;
}
@@ -867,6 +874,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
+ block_t old_blkaddr;
pgoff_t fofs;
blkcnt_t count = 1;
int err;
@@ -874,6 +882,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
+
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
if (dn->data_blkaddr == NEW_ADDR)
@@ -883,15 +895,17 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
return err;
alloc:
- get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
-
- allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ old_blkaddr = dn->data_blkaddr;
+ f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
&sum, seg_type, NULL, false);
- set_data_blkaddr(dn);
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
+ f2fs_set_data_blkaddr(dn);
/* update i_size */
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
dn->ofs_in_node;
if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
f2fs_i_size_write(dn->inode,
@@ -929,7 +943,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_seg_type = NO_CHECK_TYPE;
if (direct_io) {
- map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
flag = f2fs_force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO;
@@ -1019,7 +1033,7 @@ next_dnode:
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, mode);
+ err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
if (err) {
if (flag == F2FS_GET_BLOCK_BMAP)
map->m_pblk = 0;
@@ -1027,10 +1041,10 @@ next_dnode:
err = 0;
if (map->m_next_pgofs)
*map->m_next_pgofs =
- get_next_page_offset(&dn, pgofs);
+ f2fs_get_next_page_offset(&dn, pgofs);
if (map->m_next_extent)
*map->m_next_extent =
- get_next_page_offset(&dn, pgofs);
+ f2fs_get_next_page_offset(&dn, pgofs);
}
goto unlock_out;
}
@@ -1043,7 +1057,13 @@ next_dnode:
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ err = -EFAULT;
+ goto sync_out;
+ }
+
+ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1116,7 +1136,7 @@ skip:
(pgofs == end || dn.ofs_in_node == end_offset)) {
dn.ofs_in_node = ofs_in_node;
- err = reserve_new_blocks(&dn, prealloc);
+ err = f2fs_reserve_new_blocks(&dn, prealloc);
if (err)
goto sync_out;
@@ -1235,7 +1255,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
{
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DEFAULT, NULL,
- rw_hint_to_seg_type(
+ f2fs_rw_hint_to_seg_type(
inode->i_write_hint));
}
@@ -1280,7 +1300,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
@@ -1307,7 +1331,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
len = inode->i_sb->s_blocksize;
@@ -1419,10 +1447,15 @@ out:
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
+ *
+ * Note that the aops->readpages() function is ONLY used for read-ahead. If
+ * this function ever deviates from doing just read-ahead, it should either
+ * use ->readpage() or do the necessary surgery to decouple ->readpages()
+ * from read-ahead.
*/
static int f2fs_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
- unsigned nr_pages)
+ unsigned nr_pages, bool is_readahead)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
@@ -1492,6 +1525,10 @@ got_it:
SetPageUptodate(page);
goto confused;
}
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC))
+ goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
@@ -1511,7 +1548,8 @@ submit_and_realloc:
bio = NULL;
}
if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
if (IS_ERR(bio)) {
bio = NULL;
goto set_error_page;
@@ -1555,7 +1593,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
+ ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
return ret;
}
@@ -1572,12 +1610,13 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
+ struct page *mpage;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
@@ -1589,17 +1628,25 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
- if (!IS_ERR(fio->encrypted_page))
- return 0;
+ if (IS_ERR(fio->encrypted_page)) {
+ /* flush pending IOs and wait for a while in the ENOMEM case */
+ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
+ f2fs_flush_merged_writes(fio->sbi);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
+ return PTR_ERR(fio->encrypted_page);
+ }
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
+ mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
+ if (mpage) {
+ if (PageUptodate(mpage))
+ memcpy(page_address(mpage),
+ page_address(fio->encrypted_page), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
}
- return PTR_ERR(fio->encrypted_page);
+ return 0;
}
static inline bool check_inplace_update_policy(struct inode *inode,
@@ -1610,12 +1657,12 @@ static inline bool check_inplace_update_policy(struct inode *inode,
if (policy & (0x1 << F2FS_IPU_FORCE))
return true;
- if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
+ if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
return true;
if (policy & (0x1 << F2FS_IPU_UTIL) &&
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
- if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
+ if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
@@ -1636,7 +1683,7 @@ static inline bool check_inplace_update_policy(struct inode *inode,
return false;
}
-bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
+bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
{
if (f2fs_is_pinned_file(inode))
return true;
@@ -1648,7 +1695,7 @@ bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
return check_inplace_update_policy(inode, fio);
}
-bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
+bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1671,27 +1718,19 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
- if (should_update_outplace(inode, fio))
+ if (f2fs_should_update_outplace(inode, fio))
return false;
- return should_update_inplace(inode, fio);
-}
-
-static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
-{
- if (fio->old_blkaddr == NEW_ADDR)
- return false;
- if (fio->old_blkaddr == NULL_ADDR)
- return false;
- return true;
+ return f2fs_should_update_inplace(inode, fio);
}
-int do_write_data_page(struct f2fs_io_info *fio)
+int f2fs_do_write_data_page(struct f2fs_io_info *fio)
{
struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
+ struct node_info ni;
bool ipu_force = false;
int err = 0;
@@ -1700,18 +1739,20 @@ int do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
- if (valid_ipu_blkaddr(fio)) {
- ipu_force = true;
- fio->need_lock = LOCK_DONE;
- goto got_it;
- }
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC))
+ return -EFAULT;
+
+ ipu_force = true;
+ fio->need_lock = LOCK_DONE;
+ goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
return -EAGAIN;
- err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
if (err)
goto out;
@@ -1723,11 +1764,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
goto out_writepage;
}
got_it:
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC)) {
+ err = -EFAULT;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
+ if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
+ need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -1737,7 +1785,7 @@ got_it:
f2fs_put_dnode(&dn);
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
- err = rewrite_data_page(fio);
+ err = f2fs_inplace_write_data(fio);
trace_f2fs_do_write_data_page(fio->page, IPU);
set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
@@ -1751,6 +1799,12 @@ got_it:
fio->need_lock = LOCK_REQ;
}
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ if (err)
+ goto out_writepage;
+
+ fio->version = ni.version;
+
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -1759,7 +1813,7 @@ got_it:
ClearPageError(page);
/* LFS mode write path */
- write_data_page(&dn, fio);
+ f2fs_outplace_write_data(&dn, fio);
trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (page->index == 0)
@@ -1805,6 +1859,12 @@ static int __write_data_page(struct page *page, bool *submitted,
/* we should bypass data pages to proceed the kworkder jobs */
if (unlikely(f2fs_cp_error(sbi))) {
mapping_set_error(page->mapping, -EIO);
+ /*
+ * don't drop any dirty dentry pages for keeping lastest
+ * directory structure.
+ */
+ if (S_ISDIR(inode->i_mode))
+ goto redirty_out;
goto out;
}
@@ -1829,13 +1889,13 @@ write:
/* we should not write 0'th page having journal header */
if (f2fs_is_volatile_file(inode) && (!page->index ||
(!wbc->for_reclaim &&
- available_free_memory(sbi, BASE_CHECK))))
+ f2fs_available_free_memory(sbi, BASE_CHECK))))
goto redirty_out;
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
fio.need_lock = LOCK_DONE;
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
goto done;
}
@@ -1854,10 +1914,10 @@ write:
}
if (err == -EAGAIN) {
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
fio.need_lock = LOCK_REQ;
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
}
}
@@ -1882,7 +1942,7 @@ out:
if (wbc->for_reclaim) {
f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
clear_inode_flag(inode, FI_HOT_DATA);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
submitted = NULL;
}
@@ -1932,6 +1992,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0;
struct pagevec pvec;
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
@@ -1976,8 +2037,8 @@ retry:
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
break;
@@ -1985,7 +2046,9 @@ retry:
struct page *page = pvec.pages[i];
bool submitted = false;
- if (page->index > end) {
+ /* give a priority to WB_SYNC threads */
+ if (atomic_read(&sbi->wb_sync_req[DATA]) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -2044,9 +2107,7 @@ continue_unlock:
last_idx = page->index;
}
- /* give a priority to WB_SYNC threads */
- if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
- --wbc->nr_to_write <= 0) &&
+ if (--wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
@@ -2072,7 +2133,19 @@ continue_unlock:
return ret;
}
-int __f2fs_write_data_pages(struct address_space *mapping,
+static inline bool __should_serialize_io(struct inode *inode,
+ struct writeback_control *wbc)
+{
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ return true;
+ if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
+ return true;
+ return false;
+}
+
+static int __f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc,
enum iostat_type io_type)
{
@@ -2080,6 +2153,7 @@ int __f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct blk_plug plug;
int ret;
+ bool locked = false;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
@@ -2095,7 +2169,7 @@ int __f2fs_write_data_pages(struct address_space *mapping,
if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
- available_free_memory(sbi, DIRTY_DENTS))
+ f2fs_available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
/* skip writing during file defragment */
@@ -2106,22 +2180,30 @@ int __f2fs_write_data_pages(struct address_space *mapping,
/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
if (wbc->sync_mode == WB_SYNC_ALL)
- atomic_inc(&sbi->wb_sync_req);
- else if (atomic_read(&sbi->wb_sync_req))
+ atomic_inc(&sbi->wb_sync_req[DATA]);
+ else if (atomic_read(&sbi->wb_sync_req[DATA]))
goto skip_write;
+ if (__should_serialize_io(inode, wbc)) {
+ mutex_lock(&sbi->writepages);
+ locked = true;
+ }
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc, io_type);
blk_finish_plug(&plug);
+ if (locked)
+ mutex_unlock(&sbi->writepages);
+
if (wbc->sync_mode == WB_SYNC_ALL)
- atomic_dec(&sbi->wb_sync_req);
+ atomic_dec(&sbi->wb_sync_req[DATA]);
/*
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
*/
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
return ret;
skip_write:
@@ -2146,10 +2228,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);
if (to > i_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_pagecache(inode, i_size);
- truncate_blocks(inode, i_size, true);
+ f2fs_truncate_blocks(inode, i_size, true);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -2180,7 +2266,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
}
restart:
/* check inline_data */
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto unlock_out;
@@ -2190,7 +2276,7 @@ restart:
if (f2fs_has_inline_data(inode)) {
if (pos + len <= MAX_INLINE_DATA(inode)) {
- read_inline_data(page, ipage);
+ f2fs_do_read_inline_data(page, ipage);
set_inode_flag(inode, FI_DATA_EXIST);
if (inode->i_nlink)
set_inline_node(ipage);
@@ -2208,7 +2294,7 @@ restart:
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
/* hole case */
- err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
@@ -2254,8 +2340,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
}
trace_f2fs_write_begin(inode, pos, len, flags);
- if (f2fs_is_atomic_file(inode) &&
- !available_free_memory(sbi, INMEM_PAGES)) {
+ if ((f2fs_is_atomic_file(inode) &&
+ !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
+ is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
@@ -2339,7 +2426,7 @@ fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
if (drop_atomic)
- drop_inmem_pages_all(sbi);
+ f2fs_drop_inmem_pages_all(sbi, false);
return err;
}
@@ -2380,14 +2467,20 @@ unlock_out:
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
- unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
-
- if (offset & blocksize_mask)
- return -EINVAL;
-
- if (iov_iter_alignment(iter) & blocksize_mask)
- return -EINVAL;
-
+ unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ unsigned long align = offset | iov_iter_alignment(iter);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+
+ if (align & blocksize_mask) {
+ if (bdev)
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ blocksize_mask = (1 << blkbits) - 1;
+ if (align & blocksize_mask)
+ return -EINVAL;
+ return 1;
+ }
return 0;
}
@@ -2405,7 +2498,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
err = check_direct_IO(inode, iter, offset);
if (err)
- return err;
+ return err < 0 ? err : 0;
if (f2fs_force_buffered_io(inode, rw))
return 0;
@@ -2437,17 +2530,17 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = WRITE_LIFE_NOT_SET;
- if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) {
+ if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
if (iocb->ki_flags & IOCB_NOWAIT) {
iocb->ki_hint = hint;
err = -EAGAIN;
goto out;
}
- down_read(&F2FS_I(inode)->dio_rwsem[rw]);
+ down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
}
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
- up_read(&F2FS_I(inode)->dio_rwsem[rw]);
+ up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
if (rw == WRITE) {
if (whint_mode == WHINT_MODE_OFF)
@@ -2490,13 +2583,13 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
dec_page_count(sbi, F2FS_DIRTY_NODES);
} else {
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
}
}
/* This is atomic written page, keep Private */
if (IS_ATOMIC_WRITTEN_PAGE(page))
- return drop_inmem_page(inode, page);
+ return f2fs_drop_inmem_page(inode, page);
set_page_private(page, 0);
ClearPagePrivate(page);
@@ -2527,9 +2620,13 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+ /* don't remain PG_checked flag which was set during GC */
+ if (is_cold_data(page))
+ clear_cold_data(page);
+
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
- register_inmem_page(inode, page);
+ f2fs_register_inmem_page(inode, page);
return 1;
}
/*
@@ -2541,7 +2638,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
- update_dirty_page(inode, page);
+ f2fs_update_dirty_page(inode, page);
return 1;
}
return 0;
@@ -2634,6 +2731,17 @@ const struct address_space_operations f2fs_dblock_aops = {
#endif
};
+void f2fs_clear_radix_tree_dirty_tag(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+}
+
int __init f2fs_init_post_read_processing(void)
{
bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a66107b5cfff..214a968962a1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -104,6 +104,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
+ si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
+ si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
@@ -213,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
- si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+ si->base_mem += NM_I(sbi)->nat_blocks *
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
@@ -342,6 +345,10 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_data_blks);
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
+ seq_printf(s, "Skipped : atomic write %llu (%llu)\n",
+ si->skipped_atomic_files[BG_GC] +
+ si->skipped_atomic_files[FG_GC],
+ si->skipped_atomic_files[BG_GC]);
seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached,
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 41d32171bd52..086639556705 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -60,12 +60,12 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
};
-void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
+static void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
{
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
-unsigned char get_de_type(struct f2fs_dir_entry *de)
+unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de)
{
if (de->file_type < F2FS_FT_MAX)
return f2fs_filetype_table[de->file_type];
@@ -97,14 +97,14 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
- de = find_target_dentry(fname, namehash, max_slots, &d);
+ de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
return de;
}
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d)
{
@@ -171,7 +171,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
for (; bidx < end_block; bidx++) {
/* no need to allocate new dentry pages to all the indices */
- dentry_page = find_data_page(dir, bidx);
+ dentry_page = f2fs_find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT) {
room = true;
@@ -210,7 +210,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
if (f2fs_has_inline_dentry(dir)) {
*res_page = NULL;
- de = find_in_inline_dir(dir, fname, res_page);
+ de = f2fs_find_in_inline_dir(dir, fname, res_page);
goto out;
}
@@ -319,7 +319,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
-void do_make_empty_dir(struct inode *inode, struct inode *parent,
+void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
struct qstr dot = QSTR_INIT(".", 1);
@@ -340,23 +340,23 @@ static int make_empty_dir(struct inode *inode,
struct f2fs_dentry_ptr d;
if (f2fs_has_inline_dentry(inode))
- return make_empty_inline_dir(inode, parent, page);
+ return f2fs_make_empty_inline_dir(inode, parent, page);
- dentry_page = get_new_data_page(inode, page, 0, true);
+ dentry_page = f2fs_get_new_data_page(inode, page, 0, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
dentry_blk = page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
- do_make_empty_dir(inode, parent, &d);
+ f2fs_do_make_empty_dir(inode, parent, &d);
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
return 0;
}
-struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
const struct qstr *new_name, const struct qstr *orig_name,
struct page *dpage)
{
@@ -365,7 +365,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
int err;
if (is_inode_flag_set(inode, FI_NEW_INODE)) {
- page = new_inode_page(inode);
+ page = f2fs_new_inode_page(inode);
if (IS_ERR(page))
return page;
@@ -395,7 +395,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
goto put_error;
}
} else {
- page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
+ page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino);
if (IS_ERR(page))
return page;
}
@@ -418,19 +418,19 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
* we should remove this inode from orphan list.
*/
if (inode->i_nlink == 0)
- remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
+ f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
f2fs_i_links_write(inode, true);
}
return page;
put_error:
clear_nlink(inode);
- update_inode(inode, page);
+ f2fs_update_inode(inode, page);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
-void update_parent_metadata(struct inode *dir, struct inode *inode,
+void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) {
@@ -448,7 +448,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(inode, FI_INC_LINK);
}
-int room_for_filename(const void *bitmap, int slots, int max_slots)
+int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots)
{
int bit_start = 0;
int zero_start, zero_end;
@@ -517,12 +517,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
}
start:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
-#endif
+
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
@@ -537,12 +536,12 @@ start:
(le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
- dentry_page = get_new_data_page(dir, NULL, block, true);
+ dentry_page = f2fs_get_new_data_page(dir, NULL, block, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
dentry_blk = page_address(dentry_page);
- bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap,
slots, NR_DENTRY_IN_BLOCK);
if (bit_pos < NR_DENTRY_IN_BLOCK)
goto add_dentry;
@@ -558,7 +557,7 @@ add_dentry:
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, new_name,
+ page = f2fs_init_inode_metadata(inode, dir, new_name,
orig_name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -576,7 +575,7 @@ add_dentry:
f2fs_put_page(page, 1);
}
- update_parent_metadata(dir, inode, current_depth);
+ f2fs_update_parent_metadata(dir, inode, current_depth);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
@@ -586,7 +585,7 @@ fail:
return err;
}
-int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
struct inode *inode, nid_t ino, umode_t mode)
{
struct qstr new_name;
@@ -610,7 +609,7 @@ int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
-int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
@@ -639,7 +638,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
- err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
+ err = f2fs_add_dentry(dir, &fname, inode, ino, mode);
}
fscrypt_free_filename(&fname);
return err;
@@ -651,7 +650,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, NULL, NULL, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -683,9 +682,9 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
up_write(&F2FS_I(inode)->i_sem);
if (inode->i_nlink == 0)
- add_orphan_inode(inode);
+ f2fs_add_orphan_inode(inode);
else
- release_orphan_inode(sbi);
+ f2fs_release_orphan_inode(sbi);
}
/*
@@ -698,14 +697,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
- struct address_space *mapping = page_mapping(page);
- unsigned long flags;
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT)
- add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
+ f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
@@ -731,17 +728,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_drop_nlink(dir, inode);
if (bit_pos == NR_DENTRY_IN_BLOCK &&
- !truncate_hole(dir, page->index, page->index + 1)) {
- spin_lock_irqsave(&mapping->tree_lock, flags);
- radix_tree_tag_clear(&mapping->page_tree, page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
-
+ !f2fs_truncate_hole(dir, page->index, page->index + 1)) {
+ f2fs_clear_radix_tree_dirty_tag(page);
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
ClearPageUptodate(page);
inode_dec_dirty_pages(dir);
- remove_dirty_inode(dir);
+ f2fs_remove_dirty_inode(dir);
}
f2fs_put_page(page, 1);
}
@@ -758,7 +751,7 @@ bool f2fs_empty_dir(struct inode *dir)
return f2fs_empty_inline_dir(dir);
for (bidx = 0; bidx < nblock; bidx++) {
- dentry_page = get_lock_data_page(dir, bidx, false);
+ dentry_page = f2fs_get_lock_data_page(dir, bidx, false);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
continue;
@@ -806,7 +799,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
continue;
}
- d_type = get_de_type(de);
+ d_type = f2fs_get_de_type(de);
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
@@ -830,7 +823,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
return 1;
if (sbi->readdir_ra == 1)
- ra_node_page(sbi, le32_to_cpu(de->ino));
+ f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
@@ -880,7 +873,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- dentry_page = get_lock_data_page(inode, n, false);
+ dentry_page = f2fs_get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT) {
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index d5a861bf2b42..231b77ef5a53 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -49,7 +49,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
return NULL;
}
-struct rb_entry *__lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs)
{
struct rb_entry *re;
@@ -61,7 +61,7 @@ struct rb_entry *__lookup_rb_tree(struct rb_root *root,
return re;
}
-struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
+struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs)
{
@@ -92,7 +92,7 @@ struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
-struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re,
unsigned int ofs,
struct rb_entry **prev_entry,
@@ -159,7 +159,7 @@ lookup_neighbors:
return re;
}
-bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
+bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
@@ -390,7 +390,7 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
- en = (struct extent_node *)__lookup_rb_tree(&et->root,
+ en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root,
(struct rb_entry *)et->cached_en, pgofs);
if (!en)
goto out;
@@ -470,7 +470,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
goto do_insert;
}
- p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
@@ -520,7 +520,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
__drop_largest_extent(inode, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
- en = (struct extent_node *)__lookup_rb_tree_ret(&et->root,
+ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
(struct rb_entry *)et->cached_en, fofs,
(struct rb_entry **)&prev_en,
(struct rb_entry **)&next_en,
@@ -773,7 +773,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
else
blkaddr = dn->data_blkaddr;
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
dn->ofs_in_node;
f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
}
@@ -788,7 +788,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
}
-void init_extent_cache_info(struct f2fs_sb_info *sbi)
+void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
mutex_init(&sbi->extent_tree_lock);
@@ -800,7 +800,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->total_ext_node, 0);
}
-int __init create_extent_cache(void)
+int __init f2fs_create_extent_cache(void)
{
extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
sizeof(struct extent_tree));
@@ -815,7 +815,7 @@ int __init create_extent_cache(void)
return 0;
}
-void destroy_extent_cache(void)
+void f2fs_destroy_extent_cache(void)
{
kmem_cache_destroy(extent_node_slab);
kmem_cache_destroy(extent_tree_slab);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4587fbadc555..366b28a17950 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -26,6 +26,7 @@
#include <linux/quotaops.h>
#include <crypto/hash.h>
#include <linux/writeback.h>
+#include <linux/overflow.h>
#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
#include <linux/fscrypt.h>
@@ -42,7 +43,6 @@
} while (0)
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
@@ -57,16 +57,20 @@ enum {
FAULT_TRUNCATE,
FAULT_IO,
FAULT_CHECKPOINT,
+ FAULT_DISCARD,
FAULT_MAX,
};
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
+
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
-extern char *fault_name[FAULT_MAX];
+extern char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -242,8 +246,8 @@ enum {
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
-#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
+#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */
#define DEF_CP_INTERVAL 60 /* 60 secs */
@@ -257,7 +261,7 @@ struct cp_control {
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
@@ -265,6 +269,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
+ DATA_GENERIC,
+ META_GENERIC,
};
/* for the list of ino */
@@ -289,6 +295,12 @@ struct inode_entry {
struct inode *inode; /* vfs inode pointer */
};
+struct fsync_node_entry {
+ struct list_head list; /* list head */
+ struct page *page; /* warm node page pointer */
+ unsigned int seq_id; /* sequence id */
+};
+
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@@ -305,9 +317,10 @@ struct discard_entry {
(MAX_PLIST_NUM - 1) : (blk_num - 1))
enum {
- D_PREP,
- D_SUBMIT,
- D_DONE,
+ D_PREP, /* initial */
+ D_PARTIAL, /* partially submitted */
+ D_SUBMIT, /* all submitted */
+ D_DONE, /* finished */
};
struct discard_info {
@@ -332,7 +345,10 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
+ unsigned char issuing; /* issuing discard */
int error; /* bio error */
+ spinlock_t lock; /* for state/bio_ref updating */
+ unsigned short bio_ref; /* bio reference count */
};
enum {
@@ -346,11 +362,13 @@ enum {
struct discard_policy {
int type; /* type of discard */
unsigned int min_interval; /* used for candidates exist */
+ unsigned int mid_interval; /* used for device busy */
unsigned int max_interval; /* used for candidates not exist */
unsigned int max_requests; /* # of discards issued per round */
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
+ bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
};
@@ -367,10 +385,12 @@ struct discard_cmd_control {
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
+ unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
+ bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
@@ -567,13 +587,12 @@ enum {
*/
};
+#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
-/* vector size for gang look-up from extent cache that consists of radix tree */
-#define EXT_TREE_VEC_SIZE 64
-
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
@@ -659,6 +678,8 @@ enum {
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
+
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
@@ -678,15 +699,20 @@ enum {
#define DEF_DIR_LEVEL 0
+enum {
+ GC_FAILURE_PIN,
+ GC_FAILURE_ATOMIC,
+ MAX_GC_FAILURE
+};
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
unsigned char i_dir_level; /* use for dentry level for large dir */
- union {
- unsigned int i_current_depth; /* only for directory depth */
- unsigned short i_gc_failures; /* only for regular file */
- };
+ unsigned int i_current_depth; /* only for directory depth */
+ /* for gc failure statistic */
+ unsigned int i_gc_failures[MAX_GC_FAILURE];
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
@@ -714,7 +740,9 @@ struct f2fs_inode_info {
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
- struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
+
+ /* avoid racing between foreground op and gc */
+ struct rw_semaphore i_gc_rwsem[2];
struct rw_semaphore i_mmap_sem;
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
@@ -750,22 +778,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
}
static inline bool __is_discard_mergeable(struct discard_info *back,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
- (back->len + front->len < DEF_MAX_DISCARD_LEN);
+ (back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
- struct discard_info *back)
+ struct discard_info *back, unsigned int max_len)
{
- return __is_discard_mergeable(back, cur);
+ return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
- return __is_discard_mergeable(cur, front);
+ return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
@@ -820,6 +848,7 @@ struct f2fs_nm_info {
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
+ spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
unsigned int nat_blocks; /* # of nat blocks */
@@ -946,6 +975,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
+ unsigned int min_seq_blocks; /* threshold for sequential blocks */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
@@ -1064,8 +1094,10 @@ struct f2fs_io_info {
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
bool is_meta; /* indicate borrow meta inode mapping or not */
+ bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ unsigned char version; /* version of the node */
};
#define is_read_io(rw) ((rw) == READ)
@@ -1117,6 +1149,7 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
SBI_NEED_CP, /* need to checkpoint */
+ SBI_IS_SHUTDOWN, /* shutdown by ioctl */
};
enum {
@@ -1126,6 +1159,13 @@ enum {
};
enum {
+ GC_NORMAL,
+ GC_IDLE_CB,
+ GC_IDLE_GREEDY,
+ GC_URGENT,
+};
+
+enum {
WHINT_MODE_OFF, /* not pass down write hints */
WHINT_MODE_USER, /* try to pass down hints given by users */
WHINT_MODE_FS, /* pass down hints with F2FS policy */
@@ -1156,6 +1196,7 @@ struct f2fs_sb_info {
struct rw_semaphore sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
+ struct mutex writepages; /* mutex for writepages() */
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1173,6 +1214,8 @@ struct f2fs_sb_info {
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
/* bio ordering for NODE/DATA */
+ /* keep migration IO order for LFS mode */
+ struct rw_semaphore io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
/* for checkpoint */
@@ -1190,6 +1233,11 @@ struct f2fs_sb_info {
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+ spinlock_t fsync_node_lock; /* for node entry lock */
+ struct list_head fsync_node_list; /* node list head */
+ unsigned int fsync_seg_id; /* sequence id */
+ unsigned int fsync_node_num; /* number of node entries */
+
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
@@ -1243,7 +1291,7 @@ struct f2fs_sb_info {
struct percpu_counter alloc_valid_block_count;
/* writeback control */
- atomic_t wb_sync_req; /* count # of WB_SYNC threads */
+ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */
/* valid inode count */
struct percpu_counter total_valid_inode_count;
@@ -1254,9 +1302,10 @@ struct f2fs_sb_info {
struct mutex gc_mutex; /* mutex for GC */
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
-
- /* threshold for converting bg victims for fg */
- u64 fggc_threshold;
+ unsigned int gc_mode; /* current GC state */
+ /* for skip statistic */
+ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
+ unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
@@ -1321,7 +1370,7 @@ struct f2fs_sb_info {
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk("%sF2FS-fs : inject %s in %s of %pF\n", \
- KERN_INFO, fault_name[type], \
+ KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
@@ -1340,6 +1389,12 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
return false;
}
+#else
+#define f2fs_show_injection_info(type) do { } while (0)
+static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+{
+ return false;
+}
#endif
/* For write statistics. Suppose sector size is 512 bytes,
@@ -1368,7 +1423,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- return 0;
+ return false;
return f2fs_time_over(sbi, REQ_TIME);
}
@@ -1647,18 +1702,6 @@ static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
}
/*
- * Check whether the given nid is within node id range.
- */
-static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
-{
- if (unlikely(nid < F2FS_ROOT_INO(sbi)))
- return -EINVAL;
- if (unlikely(nid >= NM_I(sbi)->max_nid))
- return -EINVAL;
- return 0;
-}
-
-/*
* Check whether the inode has blocks or not
*/
static inline int F2FS_HAS_BLOCKS(struct inode *inode)
@@ -1704,13 +1747,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (ret)
return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
}
-#endif
+
/*
* let's increase this in prior to actual block count change in order
* for f2fs_sync_file to avoid data races when deciding checkpoint.
@@ -1734,18 +1776,20 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(&sbi->stat_lock);
- percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
- if (unlikely(release))
+ if (unlikely(release)) {
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
+ }
f2fs_i_blocks_write(inode, *count, true, true);
return 0;
enospc:
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
@@ -1917,12 +1961,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
goto enospc;
}
-#endif
spin_lock(&sbi->stat_lock);
@@ -2007,17 +2049,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi)
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct page *page = find_lock_page(mapping, index);
+ struct page *page;
- if (page)
- return page;
+ if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) {
+ if (!for_write)
+ page = find_get_page_flags(mapping, index,
+ FGP_LOCK | FGP_ACCESSED);
+ else
+ page = find_lock_page(mapping, index);
+ if (page)
+ return page;
- if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
- f2fs_show_injection_info(FAULT_PAGE_ALLOC);
- return NULL;
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+ f2fs_show_injection_info(FAULT_PAGE_ALLOC);
+ return NULL;
+ }
}
-#endif
+
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -2027,12 +2075,11 @@ static inline struct page *f2fs_pagecache_get_page(
struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp_mask)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
return NULL;
}
-#endif
+
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
@@ -2097,12 +2144,11 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
return NULL;
}
-#endif
+
return bio_alloc(GFP_KERNEL, npages);
}
@@ -2216,9 +2262,60 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
*addr ^= mask;
}
-#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
-#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
-#define F2FS_FL_INHERITED (FS_PROJINHERIT_FL)
+/*
+ * Inode flags
+ */
+#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */
+#define F2FS_UNRM_FL 0x00000002 /* Undelete */
+#define F2FS_COMPR_FL 0x00000004 /* Compress file */
+#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */
+#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */
+#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */
+#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define F2FS_DIRTY_FL 0x00000100
+#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
+#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */
+#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */
+/* End compression flags --- maybe not all used */
+#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */
+#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
+#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
+#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
+#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */
+#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
+#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
+#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
+#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
+#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
+
+#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
+
+/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
+#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \
+ F2FS_IMMUTABLE_FL | \
+ F2FS_APPEND_FL | \
+ F2FS_NODUMP_FL | \
+ F2FS_NOATIME_FL | \
+ F2FS_PROJINHERIT_FL)
+
+/* Flags that should be inherited by new inodes from their parent. */
+#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\
+ F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\
+ F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\
+ F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\
+ F2FS_PROJINHERIT_FL)
+
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
{
@@ -2261,6 +2358,7 @@ enum {
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
+ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2359,7 +2457,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
static inline void f2fs_i_gc_failures_write(struct inode *inode,
unsigned int count)
{
- F2FS_I(inode)->i_gc_failures = count;
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count;
f2fs_mark_inode_dirty_sync(inode, true);
}
@@ -2585,12 +2683,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
-#endif
+
return kmalloc(size, flags);
}
@@ -2623,12 +2720,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
return NULL;
}
-#endif
+
return kvmalloc(size, flags);
}
@@ -2648,7 +2744,7 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
return F2FS_I(inode)->i_inline_xattr_size;
}
-#define get_inode_mode(i) \
+#define f2fs_get_inode_mode(i) \
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -2687,18 +2783,51 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
+ (!is_read_io(fio->op) || fio->is_meta))
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
+{
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ return false;
+ return true;
+}
+
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (!__is_valid_data_blkaddr(blkaddr))
+ return false;
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
+ return true;
+}
+
/*
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void truncate_data_blocks(struct dnode_of_data *dn);
-int truncate_blocks(struct inode *inode, u64 from, bool lock);
+void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
-int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
-void truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
+void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count);
int f2fs_precache_extents(struct inode *inode);
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -2712,38 +2841,37 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page);
void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page);
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
-int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
-void update_inode(struct inode *inode, struct page *node_page);
-void update_inode_page(struct inode *inode);
+int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
+void f2fs_update_inode(struct inode *inode, struct page *node_page);
+void f2fs_update_inode_page(struct inode *inode);
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
void f2fs_evict_inode(struct inode *inode);
-void handle_failed_inode(struct inode *inode);
+void f2fs_handle_failed_inode(struct inode *inode);
/*
* namei.c
*/
-int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
-void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
-unsigned char get_de_type(struct f2fs_dir_entry *de);
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de);
+struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
-void do_make_empty_dir(struct inode *inode, struct inode *parent,
+void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d);
-struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
const struct qstr *new_name,
const struct qstr *orig_name, struct page *dpage);
-void update_parent_metadata(struct inode *dir, struct inode *inode,
+void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth);
-int room_for_filename(const void *bitmap, int slots, int max_slots);
+int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots);
void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page);
@@ -2760,9 +2888,9 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode);
-int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
struct inode *inode, nid_t ino, umode_t mode);
-int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode);
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct inode *dir, struct inode *inode);
@@ -2771,7 +2899,7 @@ bool f2fs_empty_dir(struct inode *dir);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
{
- return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name,
+ return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name,
inode, inode->i_ino, inode->i_mode);
}
@@ -2786,7 +2914,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
extern __printf(3, 4)
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
-int sanity_check_ckpt(struct f2fs_sb_info *sbi);
+int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
@@ -2800,138 +2928,154 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
struct dnode_of_data;
struct node_info;
-bool available_free_memory(struct f2fs_sb_info *sbi, int type);
-int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
-bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
-bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
-void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
-pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
-int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
-int truncate_inode_blocks(struct inode *inode, pgoff_t from);
-int truncate_xattr_node(struct inode *inode);
-int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
-int remove_inode_page(struct inode *inode);
-struct page *new_inode_page(struct inode *inode);
-struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs);
-void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
-struct page *get_node_page_ra(struct page *parent, int start);
-void move_node_page(struct page *node_page, int gc_type);
-int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic);
-int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
+int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
+int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+ struct node_info *ni);
+pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
+int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
+int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
+int f2fs_truncate_xattr_node(struct inode *inode);
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id);
+int f2fs_remove_inode_page(struct inode *inode);
+struct page *f2fs_new_inode_page(struct inode *inode);
+struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
+void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
+struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
+struct page *f2fs_get_node_page_ra(struct page *parent, int start);
+void f2fs_move_node_page(struct page *node_page, int gc_type);
+int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id);
+int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
+ struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
-bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
-void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
-void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
-int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
-void recover_inline_xattr(struct inode *inode, struct page *page);
-int recover_xattr_data(struct inode *inode, struct page *page);
-int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
-void restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
+bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
+void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
+void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
+int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
+void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
+int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-int build_node_manager(struct f2fs_sb_info *sbi);
-void destroy_node_manager(struct f2fs_sb_info *sbi);
-int __init create_node_manager_caches(void);
-void destroy_node_manager_caches(void);
+void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
+void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
+int __init f2fs_create_node_manager_caches(void);
+void f2fs_destroy_node_manager_caches(void);
/*
* segment.c
*/
-bool need_SSR(struct f2fs_sb_info *sbi);
-void register_inmem_page(struct inode *inode, struct page *page);
-void drop_inmem_pages_all(struct f2fs_sb_info *sbi);
-void drop_inmem_pages(struct inode *inode);
-void drop_inmem_page(struct inode *inode, struct page *page);
-int commit_inmem_pages(struct inode *inode);
+bool f2fs_need_SSR(struct f2fs_sb_info *sbi);
+void f2fs_register_inmem_page(struct inode *inode, struct page *page);
+void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure);
+void f2fs_drop_inmem_pages(struct inode *inode);
+void f2fs_drop_inmem_page(struct inode *inode, struct page *page);
+int f2fs_commit_inmem_pages(struct inode *inode);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
-int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi);
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
-void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
-void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
-bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
-void drop_discard_cmd(struct f2fs_sb_info *sbi);
-void stop_discard_thread(struct f2fs_sb_info *sbi);
+void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
+void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
+bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
+void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
-void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-void release_discard_addrs(struct f2fs_sb_info *sbi);
-int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void allocate_new_segments(struct f2fs_sb_info *sbi);
+void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc);
+void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
+int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
-bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
-void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
-void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc);
+struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
+void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src,
+ block_t blk_addr);
+void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
enum iostat_type io_type);
-void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
-void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
-int rewrite_data_page(struct f2fs_io_info *fio);
-void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio);
+void f2fs_outplace_write_data(struct dnode_of_data *dn,
+ struct f2fs_io_info *fio);
+int f2fs_inplace_write_data(struct f2fs_io_info *fio);
+void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
bool recover_newaddr);
-void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered);
void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr);
-void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
-void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
-int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc);
-void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-int build_segment_manager(struct f2fs_sb_info *sbi);
-void destroy_segment_manager(struct f2fs_sb_info *sbi);
-int __init create_segment_manager_caches(void);
-void destroy_segment_manager_caches(void);
-int rw_hint_to_seg_type(enum rw_hint hint);
-enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type,
- enum temp_type temp);
+void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
+void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
+int __init f2fs_create_segment_manager_caches(void);
+void f2fs_destroy_segment_manager_caches(void);
+int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp);
/*
* checkpoint.c
*/
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
-struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
-int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
-void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
-long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write, enum iostat_type io_type);
-void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
-void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
-void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
-bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
-void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all);
+bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type);
-bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type);
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
-int acquire_orphan_inode(struct f2fs_sb_info *sbi);
-void release_orphan_inode(struct f2fs_sb_info *sbi);
-void add_orphan_inode(struct inode *inode);
-void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
-int recover_orphan_inodes(struct f2fs_sb_info *sbi);
-int get_valid_checkpoint(struct f2fs_sb_info *sbi);
-void update_dirty_page(struct inode *inode, struct page *page);
-void remove_dirty_inode(struct inode *inode);
-int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
-int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
-void init_ino_entry_info(struct f2fs_sb_info *sbi);
-int __init create_checkpoint_caches(void);
-void destroy_checkpoint_caches(void);
+int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi);
+void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi);
+void f2fs_add_orphan_inode(struct inode *inode);
+void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi);
+int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
+void f2fs_update_dirty_page(struct inode *inode, struct page *page);
+void f2fs_remove_dirty_inode(struct inode *inode);
+int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
+int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
+int __init f2fs_create_checkpoint_caches(void);
+void f2fs_destroy_checkpoint_caches(void);
/*
* data.c
@@ -2944,34 +3088,31 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
enum page_type type);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
-int f2fs_submit_page_write(struct f2fs_io_info *fio);
+void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio);
int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
-void set_data_blkaddr(struct dnode_of_data *dn);
+void f2fs_set_data_blkaddr(struct dnode_of_data *dn);
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
-int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
-int reserve_new_block(struct dnode_of_data *dn);
+int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
+int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
-struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write);
-struct page *find_data_page(struct inode *inode, pgoff_t index);
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index);
+struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
bool for_write);
-struct page *get_new_data_page(struct inode *inode,
+struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size);
-int do_write_data_page(struct f2fs_io_info *fio);
+int f2fs_do_write_data_page(struct f2fs_io_info *fio);
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
-bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
-bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
-int __f2fs_write_data_pages(struct address_space *mapping,
- struct writeback_control *wbc,
- enum iostat_type io_type);
+bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
+bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
@@ -2980,22 +3121,23 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode);
#endif
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
+void f2fs_clear_radix_tree_dirty_tag(struct page *page);
/*
* gc.c
*/
-int start_gc_thread(struct f2fs_sb_info *sbi);
-void stop_gc_thread(struct f2fs_sb_info *sbi);
-block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
+int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
+void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
+block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
-void build_gc_manager(struct f2fs_sb_info *sbi);
+void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
/*
* recovery.c
*/
-int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
-bool space_for_roll_forward(struct f2fs_sb_info *sbi);
+int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
+bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi);
/*
* debug.c
@@ -3033,6 +3175,7 @@ struct f2fs_stat_info {
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
int bg_data_blks, bg_node_blks;
+ unsigned long long skipped_atomic_files[2];
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
@@ -3199,29 +3342,31 @@ extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
extern const struct inode_operations f2fs_encrypted_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
-extern struct kmem_cache *inode_entry_slab;
+extern struct kmem_cache *f2fs_inode_entry_slab;
/*
* inline.c
*/
bool f2fs_may_inline_data(struct inode *inode);
bool f2fs_may_inline_dentry(struct inode *inode);
-void read_inline_data(struct page *page, struct page *ipage);
-void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from);
+void f2fs_do_read_inline_data(struct page *page, struct page *ipage);
+void f2fs_truncate_inline_inode(struct inode *inode,
+ struct page *ipage, u64 from);
int f2fs_read_inline_data(struct inode *inode, struct page *page);
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
int f2fs_convert_inline_inode(struct inode *inode);
int f2fs_write_inline_data(struct inode *inode, struct page *page);
-bool recover_inline_data(struct inode *inode, struct page *npage);
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+bool f2fs_recover_inline_data(struct inode *inode, struct page *npage);
+struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page);
-int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode);
-void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
- struct inode *dir, struct inode *inode);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry,
+ struct page *page, struct inode *dir,
+ struct inode *inode);
bool f2fs_empty_inline_dir(struct inode *dir);
int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct fscrypt_str *fstr);
@@ -3242,17 +3387,17 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-struct rb_entry *__lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs);
-struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
+struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs);
-struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs,
struct rb_entry **prev_entry, struct rb_entry **next_entry,
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force);
-bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
+bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
@@ -3264,9 +3409,9 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
void f2fs_update_extent_cache(struct dnode_of_data *dn);
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t fofs, block_t blkaddr, unsigned int len);
-void init_extent_cache_info(struct f2fs_sb_info *sbi);
-int __init create_extent_cache(void);
-void destroy_extent_cache(void);
+void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi);
+int __init f2fs_create_extent_cache(void);
+void f2fs_destroy_extent_cache(void);
/*
* sysfs.c
@@ -3365,7 +3510,7 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
- return 0;
+ return false;
#endif
}
@@ -3376,4 +3521,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
F2FS_I_SB(inode)->s_ndevs);
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type);
+#else
+#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
+#endif
+
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 40d03d58b390..8e381b6385e3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -98,7 +98,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
/* page is wholly or partially inside EOF */
if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
i_size_read(inode)) {
- unsigned offset;
+ loff_t offset;
+
offset = i_size_read(inode) & ~PAGE_MASK;
zero_user_segment(page, offset, PAGE_SIZE);
}
@@ -160,17 +161,18 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
cp_reason = CP_SB_NEED_CP;
else if (file_wrong_pino(inode))
cp_reason = CP_WRONG_PINO;
- else if (!space_for_roll_forward(sbi))
+ else if (!f2fs_space_for_roll_forward(sbi))
cp_reason = CP_NO_SPC_ROLL;
- else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
+ else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
cp_reason = CP_NODE_NEED_CP;
else if (test_opt(sbi, FASTBOOT))
cp_reason = CP_FASTBOOT_MODE;
else if (F2FS_OPTION(sbi).active_logs == 2)
cp_reason = CP_SPEC_LOG_NUM;
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
- need_dentry_mark(sbi, inode->i_ino) &&
- exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO))
+ f2fs_need_dentry_mark(sbi, inode->i_ino) &&
+ f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
+ TRANS_DIR_INO))
cp_reason = CP_RECOVER_DIR;
return cp_reason;
@@ -181,7 +183,7 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
bool ret = false;
/* But we need to avoid that there are some inode updates */
- if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
+ if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
ret = true;
f2fs_put_page(i, 0);
return ret;
@@ -214,6 +216,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
+ unsigned int seq_id = 0;
if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
@@ -241,14 +244,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
* if there is no written data, don't waste time to write recovery info.
*/
if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
- !exist_written_data(sbi, ino, APPEND_INO)) {
+ !f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
/* it may call write_inode just prior to fsync */
if (need_inode_page_update(sbi, ino))
goto go_write;
if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
- exist_written_data(sbi, ino, UPDATE_INO))
+ f2fs_exist_written_data(sbi, ino, UPDATE_INO))
goto flush_out;
goto out;
}
@@ -275,7 +278,9 @@ go_write:
goto out;
}
sync_nodes:
- ret = fsync_node_pages(sbi, inode, &wbc, atomic);
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+ ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
+ atomic_dec(&sbi->wb_sync_req[NODE]);
if (ret)
goto out;
@@ -285,7 +290,7 @@ sync_nodes:
goto out;
}
- if (need_inode_block_update(sbi, ino)) {
+ if (f2fs_need_inode_block_update(sbi, ino)) {
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_write_inode(inode, NULL);
goto sync_nodes;
@@ -300,21 +305,21 @@ sync_nodes:
* given fsync mark.
*/
if (!atomic) {
- ret = wait_on_node_pages_writeback(sbi, ino);
+ ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
if (ret)
goto out;
}
/* once recovery info is written, don't need to tack this */
- remove_ino_entry(sbi, ino, APPEND_INO);
+ f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
ret = f2fs_issue_flush(sbi, inode->i_ino);
if (!ret) {
- remove_ino_entry(sbi, ino, UPDATE_INO);
+ f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
- remove_ino_entry(sbi, ino, FLUSH_INO);
+ f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
}
f2fs_update_time(sbi, REQ_TIME);
out:
@@ -333,28 +338,29 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
static pgoff_t __get_first_dirty_index(struct address_space *mapping,
pgoff_t pgofs, int whence)
{
- struct pagevec pvec;
+ struct page *page;
int nr_pages;
if (whence != SEEK_DATA)
return 0;
/* find first dirty page index */
- pagevec_init(&pvec, 0);
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
- PAGECACHE_TAG_DIRTY, 1);
- pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX;
- pagevec_release(&pvec);
+ nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
+ 1, &page);
+ if (!nr_pages)
+ return ULONG_MAX;
+ pgofs = page->index;
+ put_page(page);
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -394,13 +400,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
if (err && err != -ENOENT) {
goto fail;
} else if (err == -ENOENT) {
/* direct node does not exists */
if (whence == SEEK_DATA) {
- pgofs = get_next_page_offset(&dn, pgofs);
+ pgofs = f2fs_get_next_page_offset(&dn, pgofs);
continue;
} else {
goto found;
@@ -414,10 +420,19 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
dn.ofs_in_node++, pgofs++,
data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
block_t blkaddr;
+
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -488,7 +503,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
return dquot_file_open(inode, filp);
}
-void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_node *raw_node;
@@ -504,12 +519,18 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
for (; count > 0; count--, addr++, dn->ofs_in_node++) {
block_t blkaddr = le32_to_cpu(*addr);
+
if (blkaddr == NULL_ADDR)
continue;
dn->data_blkaddr = NULL_ADDR;
- set_data_blkaddr(dn);
- invalidate_blocks(sbi, blkaddr);
+ f2fs_set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ continue;
+
+ f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
nr_free++;
@@ -521,7 +542,7 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
* once we invalidate valid blkaddr in range [ofs, ofs + count],
* we will invalidate all blkaddr in the whole range.
*/
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
dn->inode) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
@@ -533,15 +554,15 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->ofs_in_node, nr_free);
}
-void truncate_data_blocks(struct dnode_of_data *dn)
+void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
{
- truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
bool cache_only)
{
- unsigned offset = from & (PAGE_SIZE - 1);
+ loff_t offset = from & (PAGE_SIZE - 1);
pgoff_t index = from >> PAGE_SHIFT;
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -557,7 +578,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
return 0;
}
- page = get_lock_data_page(inode, index, true);
+ page = f2fs_get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
@@ -572,7 +593,7 @@ truncate_out:
return 0;
}
-int truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
@@ -591,21 +612,21 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
if (lock)
f2fs_lock_op(sbi);
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto out;
}
if (f2fs_has_inline_data(inode)) {
- truncate_inline_inode(inode, ipage, from);
+ f2fs_truncate_inline_inode(inode, ipage, from);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
}
set_new_dnode(&dn, inode, ipage, NULL, 0);
- err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
+ err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
if (err) {
if (err == -ENOENT)
goto free_next;
@@ -618,13 +639,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
f2fs_bug_on(sbi, count < 0);
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
- truncate_data_blocks_range(&dn, count);
+ f2fs_truncate_data_blocks_range(&dn, count);
free_from += count;
}
f2fs_put_dnode(&dn);
free_next:
- err = truncate_inode_blocks(inode, free_from);
+ err = f2fs_truncate_inode_blocks(inode, free_from);
out:
if (lock)
f2fs_unlock_op(sbi);
@@ -650,12 +671,11 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
-#endif
+
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -663,7 +683,7 @@ int f2fs_truncate(struct inode *inode)
return err;
}
- err = truncate_blocks(inode, i_size_read(inode), true);
+ err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
if (err)
return err;
@@ -689,16 +709,16 @@ int f2fs_getattr(struct vfsmount *mnt,
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
}
- flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL);
- if (flags & FS_APPEND_FL)
+ flags = fi->i_flags & F2FS_FL_USER_VISIBLE;
+ if (flags & F2FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
- if (flags & FS_COMPR_FL)
+ if (flags & F2FS_COMPR_FL)
stat->attributes |= STATX_ATTR_COMPRESSED;
if (f2fs_encrypted_inode(inode))
stat->attributes |= STATX_ATTR_ENCRYPTED;
- if (flags & FS_IMMUTABLE_FL)
+ if (flags & F2FS_IMMUTABLE_FL)
stat->attributes |= STATX_ATTR_IMMUTABLE;
- if (flags & FS_NODUMP_FL)
+ if (flags & F2FS_NODUMP_FL)
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= (STATX_ATTR_APPEND |
@@ -779,22 +799,26 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size <= i_size_read(inode)) {
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
+ bool to_smaller = (attr->ia_size <= i_size_read(inode));
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_setsize(inode, attr->ia_size);
+
+ if (to_smaller)
err = f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
- if (err)
- return err;
- } else {
- /*
- * do not trim all blocks after i_size if target size is
- * larger than i_size.
- */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ /*
+ * do not trim all blocks after i_size if target size is
+ * larger than i_size.
+ */
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err)
+ return err;
+
+ if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -814,7 +838,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
__setattr_copy(inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = posix_acl_chmod(inode, get_inode_mode(inode));
+ err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode));
if (err || is_inode_flag_set(inode, FI_ACL_MODE)) {
inode->i_mode = F2FS_I(inode)->i_acl_mode;
clear_inode_flag(inode, FI_ACL_MODE);
@@ -856,7 +880,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- page = get_new_data_page(inode, NULL, index, false);
+ page = f2fs_get_new_data_page(inode, NULL, index, false);
f2fs_unlock_op(sbi);
if (IS_ERR(page))
@@ -869,7 +893,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
return 0;
}
-int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
+int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
{
int err;
@@ -878,10 +902,11 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
pgoff_t end_offset, count;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
if (err) {
if (err == -ENOENT) {
- pg_start = get_next_page_offset(&dn, pg_start);
+ pg_start = f2fs_get_next_page_offset(&dn,
+ pg_start);
continue;
}
return err;
@@ -892,7 +917,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
- truncate_data_blocks_range(&dn, count);
+ f2fs_truncate_data_blocks_range(&dn, count);
f2fs_put_dnode(&dn);
pg_start += count;
@@ -943,14 +968,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
- ret = truncate_hole(inode, pg_start, pg_end);
+ ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -966,7 +996,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
next_dnode:
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
+ ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
if (ret && ret != -ENOENT) {
return ret;
} else if (ret == -ENOENT) {
@@ -983,7 +1013,7 @@ next_dnode:
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (!is_checkpointed_data(sbi, *blkaddr)) {
+ if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
if (test_opt(sbi, LFS)) {
f2fs_put_dnode(&dn);
@@ -1016,10 +1046,10 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
continue;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
+ ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
if (ret) {
dec_valid_block_count(sbi, inode, 1);
- invalidate_blocks(sbi, *blkaddr);
+ f2fs_invalidate_blocks(sbi, *blkaddr);
} else {
f2fs_update_data_blkaddr(&dn, *blkaddr);
}
@@ -1049,18 +1079,23 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
pgoff_t ilen;
set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
+ ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
if (ret)
return ret;
- get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ return ret;
+ }
+
ilen = min((pgoff_t)
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
do {
dn.data_blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- truncate_data_blocks_range(&dn, 1);
+ f2fs_truncate_data_blocks_range(&dn, 1);
if (do_replace[i]) {
f2fs_i_blocks_write(src_inode,
@@ -1083,10 +1118,11 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
} else {
struct page *psrc, *pdst;
- psrc = get_lock_data_page(src_inode, src + i, true);
+ psrc = f2fs_get_lock_data_page(src_inode,
+ src + i, true);
if (IS_ERR(psrc))
return PTR_ERR(psrc);
- pdst = get_new_data_page(dst_inode, NULL, dst + i,
+ pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
true);
if (IS_ERR(pdst)) {
f2fs_put_page(psrc, 1);
@@ -1097,7 +1133,8 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
f2fs_put_page(pdst, 1);
f2fs_put_page(psrc, 1);
- ret = truncate_hole(src_inode, src + i, src + i + 1);
+ ret = f2fs_truncate_hole(src_inode,
+ src + i, src + i + 1);
if (ret)
return ret;
i++;
@@ -1119,12 +1156,14 @@ static int __exchange_data_block(struct inode *src_inode,
olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
- sizeof(block_t) * olen, GFP_KERNEL);
+ array_size(olen, sizeof(block_t)),
+ GFP_KERNEL);
if (!src_blkaddr)
return -ENOMEM;
do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
- sizeof(int) * olen, GFP_KERNEL);
+ array_size(olen, sizeof(int)),
+ GFP_KERNEL);
if (!do_replace) {
kvfree(src_blkaddr);
return -ENOMEM;
@@ -1150,31 +1189,39 @@ static int __exchange_data_block(struct inode *src_inode,
return 0;
roll_back:
- __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len);
+ __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
kvfree(src_blkaddr);
kvfree(do_replace);
return ret;
}
-static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t start = offset >> PAGE_SHIFT;
+ pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
- f2fs_drop_extent_tree(inode);
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ f2fs_lock_op(sbi);
+ f2fs_drop_extent_tree(inode);
+ truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
+
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
- pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;
@@ -1189,37 +1236,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- pg_start = offset >> PAGE_SHIFT;
- pg_end = (offset + len) >> PAGE_SHIFT;
-
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
-
- down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out_unlock;
-
- truncate_pagecache(inode, offset);
+ return ret;
- ret = f2fs_do_collapse(inode, pg_start, pg_end);
+ ret = f2fs_do_collapse(inode, offset, len);
if (ret)
- goto out_unlock;
+ return ret;
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
new_size = i_size_read(inode) - len;
truncate_pagecache(inode, new_size);
- ret = truncate_blocks(inode, new_size, true);
+ ret = f2fs_truncate_blocks(inode, new_size, true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out_unlock:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
return ret;
}
@@ -1239,7 +1276,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
}
dn->ofs_in_node = ofs_in_node;
- ret = reserve_new_blocks(dn, count);
+ ret = f2fs_reserve_new_blocks(dn, count);
if (ret)
return ret;
@@ -1248,7 +1285,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
/*
- * reserve_new_blocks will not guarantee entire block
+ * f2fs_reserve_new_blocks will not guarantee entire block
* allocation.
*/
if (dn->data_blkaddr == NULL_ADDR) {
@@ -1256,9 +1293,9 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
break;
}
if (dn->data_blkaddr != NEW_ADDR) {
- invalidate_blocks(sbi, dn->data_blkaddr);
+ f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
dn->data_blkaddr = NEW_ADDR;
- set_data_blkaddr(dn);
+ f2fs_set_data_blkaddr(dn);
}
}
@@ -1285,12 +1322,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
- goto out_sem;
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1302,7 +1336,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size, offset + len);
} else {
@@ -1310,7 +1344,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1321,12 +1355,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_pagecache_range(inode,
+ (loff_t)index << PAGE_SHIFT,
+ ((loff_t)pg_end << PAGE_SHIFT) - 1);
+
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
+ ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1335,7 +1378,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
+
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1363,9 +1409,6 @@ out:
else
f2fs_i_size_write(inode, new_size);
}
-out_sem:
- up_write(&F2FS_I(inode)->i_mmap_sem);
-
return ret;
}
@@ -1394,26 +1437,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
-
down_write(&F2FS_I(inode)->i_mmap_sem);
- ret = truncate_blocks(inode, i_size_read(inode), true);
+ ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
- goto out;
+ return ret;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out;
-
- truncate_pagecache(inode, offset);
+ return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ truncate_pagecache(inode, offset);
+
while (!ret && idx > pg_start) {
nr = idx - pg_start;
if (nr > delta)
@@ -1427,16 +1471,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
return ret;
}
@@ -1479,7 +1524,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
last_off = map.m_lblk + map.m_len - 1;
/* update new size to the failed position */
- new_size = (last_off == pg_end) ? offset + len:
+ new_size = (last_off == pg_end) ? offset + len :
(loff_t)(last_off + 1) << PAGE_SHIFT;
} else {
new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
@@ -1559,13 +1604,13 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
- clear_inode_flag(inode, FI_VOLATILE_FILE);
- stat_dec_volatile_write(inode);
set_inode_flag(inode, FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
clear_inode_flag(inode, FI_DROP_CACHE);
+ clear_inode_flag(inode, FI_VOLATILE_FILE);
+ stat_dec_volatile_write(inode);
}
return 0;
}
@@ -1582,7 +1627,7 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
*/
if (f2fs_is_atomic_file(inode) &&
F2FS_I(inode)->inmem_task == current)
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
return 0;
}
@@ -1590,7 +1635,15 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
+ unsigned int flags = fi->i_flags;
+
+ if (f2fs_encrypted_inode(inode))
+ flags |= F2FS_ENCRYPT_FL;
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ flags |= F2FS_INLINE_DATA_FL;
+
+ flags &= F2FS_FL_USER_VISIBLE;
+
return put_user(flags, (int __user *)arg);
}
@@ -1624,15 +1677,15 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
oldflags = fi->i_flags;
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+ if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
ret = -EPERM;
goto unlock_out;
}
}
- flags = flags & FS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
+ flags = flags & (F2FS_FL_USER_MODIFIABLE);
+ flags |= oldflags & ~(F2FS_FL_USER_MODIFIABLE);
fi->i_flags = flags;
inode->i_ctime = current_time(inode);
@@ -1668,31 +1721,35 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode_lock(inode);
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
+ ret = -EINVAL;
goto out;
+ }
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
- set_inode_flag(inode, FI_ATOMIC_FILE);
- set_inode_flag(inode, FI_HOT_DATA);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (!get_dirty_pages(inode))
- goto inc_stat;
+ goto skip_flush;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- clear_inode_flag(inode, FI_HOT_DATA);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
+skip_flush:
+ set_inode_flag(inode, FI_ATOMIC_FILE);
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-inc_stat:
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
@@ -1714,29 +1771,34 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (ret)
return ret;
- inode_lock(inode);
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
- down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+ inode_lock(inode);
- if (f2fs_is_volatile_file(inode))
+ if (f2fs_is_volatile_file(inode)) {
+ ret = -EINVAL;
goto err_out;
+ }
if (f2fs_is_atomic_file(inode)) {
- ret = commit_inmem_pages(inode);
+ ret = f2fs_commit_inmem_pages(inode);
if (ret)
goto err_out;
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
if (!ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
- clear_inode_flag(inode, FI_HOT_DATA);
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
}
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
err_out:
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+ ret = -EINVAL;
+ }
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1821,13 +1883,15 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
inode_lock(inode);
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1841,7 +1905,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct super_block *sb = sbi->sb;
__u32 in;
- int ret;
+ int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1849,9 +1913,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (get_user(in, (__u32 __user *)arg))
return -EFAULT;
- ret = mnt_want_write_file(filp);
- if (ret)
- return ret;
+ if (in != F2FS_GOING_DOWN_FULLSYNC) {
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+ }
switch (in) {
case F2FS_GOING_DOWN_FULLSYNC:
@@ -1862,6 +1928,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
}
if (sb) {
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev, sb);
}
break;
@@ -1871,28 +1938,32 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
- sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
default:
ret = -EINVAL;
goto out;
}
- stop_gc_thread(sbi);
- stop_discard_thread(sbi);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
- drop_discard_cmd(sbi);
+ f2fs_drop_discard_cmd(sbi);
clear_opt(sbi, DISCARD);
f2fs_update_time(sbi, REQ_TIME);
out:
- mnt_drop_write_file(filp);
+ if (in != F2FS_GOING_DOWN_FULLSYNC)
+ mnt_drop_write_file(filp);
return ret;
}
@@ -2051,15 +2122,15 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ end = range.start + range.len;
+ if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
+ return -EINVAL;
+ }
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
- end = range.start + range.len;
- if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
- ret = -EINVAL;
- goto out;
- }
do_more:
if (!range.sync) {
if (!mutex_trylock(&sbi->gc_mutex)) {
@@ -2108,7 +2179,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_extent = NULL,
.m_seg_type = NO_CHECK_TYPE };
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, 0, 0};
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
@@ -2117,7 +2188,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
int err;
/* if in-place-update policy is enabled, don't waste time here */
- if (should_update_inplace(inode, NULL))
+ if (f2fs_should_update_inplace(inode, NULL))
return -EINVAL;
pg_start = range->start >> PAGE_SHIFT;
@@ -2212,7 +2283,7 @@ do_map:
while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
struct page *page;
- page = get_lock_data_page(inode, idx, true);
+ page = f2fs_get_lock_data_page(inode, idx, true);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto clear_out;
@@ -2323,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
- down_write(&F2FS_I(src)->dio_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
- if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) {
- inode_unlock(dst);
- goto out;
- }
}
ret = -EINVAL;
@@ -2376,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out_unlock;
f2fs_balance_fs(sbi, true);
+
+ down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ if (src != dst) {
+ ret = -EBUSY;
+ if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ goto out_src;
+ }
+
f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
@@ -2388,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
+
+ if (src != dst)
+ up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
+out_src:
+ up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
out_unlock:
- if (src != dst) {
- up_write(&F2FS_I(dst)->dio_rwsem[WRITE]);
+ if (src != dst)
inode_unlock(dst);
- }
out:
- up_write(&F2FS_I(src)->dio_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
@@ -2521,12 +2597,14 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
/* Use i_gc_failures for normal file as a risk signal. */
if (inc)
- f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
+ f2fs_i_gc_failures_write(inode,
+ fi->i_gc_failures[GC_FAILURE_PIN] + 1);
- if (fi->i_gc_failures > sbi->gc_pin_file_threshold) {
+ if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: Enable GC = ino %lx after %x GC trials\n",
- __func__, inode->i_ino, fi->i_gc_failures);
+ __func__, inode->i_ino,
+ fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
return -EAGAIN;
}
@@ -2557,14 +2635,14 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
inode_lock(inode);
- if (should_update_outplace(inode, NULL)) {
+ if (f2fs_should_update_outplace(inode, NULL)) {
ret = -EINVAL;
goto out;
}
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
- F2FS_I(inode)->i_gc_failures = 1;
+ f2fs_i_gc_failures_write(inode, 0);
goto done;
}
@@ -2577,7 +2655,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
goto out;
set_inode_flag(inode, FI_PIN_FILE);
- ret = F2FS_I(inode)->i_gc_failures;
+ ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
done:
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
out:
@@ -2592,7 +2670,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
__u32 pin = 0;
if (is_inode_flag_set(inode, FI_PIN_FILE))
- pin = F2FS_I(inode)->i_gc_failures;
+ pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
return put_user(pin, (u32 __user *)arg);
}
@@ -2616,9 +2694,9 @@ int f2fs_precache_extents(struct inode *inode)
while (map.m_lblk < end) {
map.m_len = end - map.m_lblk;
- down_write(&fi->dio_rwsem[WRITE]);
+ down_write(&fi->i_gc_rwsem[WRITE]);
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
- up_write(&fi->dio_rwsem[WRITE]);
+ up_write(&fi->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -2694,7 +2772,6 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct blk_plug plug;
ssize_t ret;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
@@ -2724,6 +2801,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iov_iter_count(from)) ||
f2fs_has_inline_data(inode) ||
f2fs_force_buffered_io(inode, WRITE)) {
+ clear_inode_flag(inode,
+ FI_NO_PREALLOC);
inode_unlock(inode);
return -EAGAIN;
}
@@ -2739,9 +2818,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return err;
}
}
- blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
- blk_finish_plug(&plug);
clear_inode_flag(inode, FI_NO_PREALLOC);
/* if we couldn't write data, we should deallocate blocks. */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d28d31cbd7d2..ada8b8056cd0 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -53,12 +53,10 @@ static int gc_thread_func(void *data)
continue;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
@@ -76,7 +74,7 @@ static int gc_thread_func(void *data)
* invalidated soon after by user update or deletion.
* So, I'd like to wait some time to collect dirty segments.
*/
- if (gc_th->gc_urgent) {
+ if (sbi->gc_mode == GC_URGENT) {
wait_ms = gc_th->urgent_sleep_time;
mutex_lock(&sbi->gc_mutex);
goto do_gc;
@@ -114,7 +112,7 @@ next:
return 0;
}
-int start_gc_thread(struct f2fs_sb_info *sbi)
+int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th;
dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -131,8 +129,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
- gc_th->gc_idle = 0;
- gc_th->gc_urgent = 0;
gc_th->gc_wake= 0;
sbi->gc_thread = gc_th;
@@ -148,7 +144,7 @@ out:
return err;
}
-void stop_gc_thread(struct f2fs_sb_info *sbi)
+void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
if (!gc_th)
@@ -158,21 +154,19 @@ void stop_gc_thread(struct f2fs_sb_info *sbi)
sbi->gc_thread = NULL;
}
-static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
+static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
{
int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
- if (!gc_th)
- return gc_mode;
-
- if (gc_th->gc_idle) {
- if (gc_th->gc_idle == 1)
- gc_mode = GC_CB;
- else if (gc_th->gc_idle == 2)
- gc_mode = GC_GREEDY;
- }
- if (gc_th->gc_urgent)
+ switch (sbi->gc_mode) {
+ case GC_IDLE_CB:
+ gc_mode = GC_CB;
+ break;
+ case GC_IDLE_GREEDY:
+ case GC_URGENT:
gc_mode = GC_GREEDY;
+ break;
+ }
return gc_mode;
}
@@ -187,7 +181,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
} else {
- p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
+ p->gc_mode = select_gc_type(sbi, gc_type);
p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
p->max_search = dirty_i->nr_dirty[DIRTY];
p->ofs_unit = sbi->segs_per_sec;
@@ -195,7 +189,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
/* we need to check every dirty segments in the FG_GC case */
if (gc_type != FG_GC &&
- (sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
+ (sbi->gc_mode != GC_URGENT) &&
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
@@ -234,10 +228,6 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
continue;
-
- if (no_fggc_candidate(sbi, secno))
- continue;
-
clear_bit(secno, dirty_i->victim_secmap);
return GET_SEG_FROM_SEC(sbi, secno);
}
@@ -377,9 +367,6 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
- if (gc_type == FG_GC && p.alloc_mode == LFS &&
- no_fggc_candidate(sbi, secno))
- goto next;
cost = get_gc_cost(sbi, segno, &p);
@@ -440,7 +427,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
iput(inode);
return;
}
- new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS);
new_ie->inode = inode;
f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
@@ -454,7 +441,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list)
radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(&ie->list);
- kmem_cache_free(inode_entry_slab, ie);
+ kmem_cache_free(f2fs_inode_entry_slab, ie);
}
}
@@ -484,12 +471,16 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
block_t start_addr;
int off;
int phase = 0;
+ bool fggc = (gc_type == FG_GC);
start_addr = START_BLOCK(sbi, segno);
next_step:
entry = sum;
+ if (fggc && phase == 2)
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
@@ -503,39 +494,46 @@ next_step:
continue;
if (phase == 0) {
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
META_NAT, true);
continue;
}
if (phase == 1) {
- ra_node_page(sbi, nid);
+ f2fs_ra_node_page(sbi, nid);
continue;
}
/* phase == 2 */
- node_page = get_node_page(sbi, nid);
+ node_page = f2fs_get_node_page(sbi, nid);
if (IS_ERR(node_page))
continue;
- /* block may become invalid during get_node_page */
+ /* block may become invalid during f2fs_get_node_page */
if (check_valid_map(sbi, segno, off) == 0) {
f2fs_put_page(node_page, 1);
continue;
}
- get_node_info(sbi, nid, &ni);
+ if (f2fs_get_node_info(sbi, nid, &ni)) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
}
- move_node_page(node_page, gc_type);
+ f2fs_move_node_page(node_page, gc_type);
stat_inc_node_blk_count(sbi, 1, gc_type);
}
if (++phase < 3)
goto next_step;
+
+ if (fggc)
+ atomic_dec(&sbi->wb_sync_req[NODE]);
}
/*
@@ -545,7 +543,7 @@ next_step:
* as indirect or double indirect node blocks, are given, it must be a caller's
* bug.
*/
-block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
+block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
{
unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
unsigned int bidx;
@@ -576,11 +574,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
nid = le32_to_cpu(sum->nid);
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
- node_page = get_node_page(sbi, nid);
+ node_page = f2fs_get_node_page(sbi, nid);
if (IS_ERR(node_page))
return false;
- get_node_info(sbi, nid, dni);
+ if (f2fs_get_node_info(sbi, nid, dni)) {
+ f2fs_put_page(node_page, 1);
+ return false;
+ }
if (sum->version != dni->version) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -598,12 +599,78 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
+static int ra_data_block(struct inode *inode, pgoff_t index)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ struct extent_info ei = {0, 0, 0};
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .ino = inode->i_ino,
+ .type = DATA,
+ .temp = COLD,
+ .op = REQ_OP_READ,
+ .op_flags = 0,
+ .encrypted_page = NULL,
+ .in_list = false,
+ .retry = false,
+ };
+ int err;
+
+ page = f2fs_grab_cache_page(mapping, index, true);
+ if (!page)
+ return -ENOMEM;
+
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err)
+ goto put_page;
+ f2fs_put_dnode(&dn);
+
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC))) {
+ err = -EFAULT;
+ goto put_page;
+ }
+got_it:
+ /* read page */
+ fio.page = page;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
+ dn.data_blkaddr,
+ FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ if (!fio.encrypted_page) {
+ err = -ENOMEM;
+ goto put_page;
+ }
+
+ err = f2fs_submit_page_bio(&fio);
+ if (err)
+ goto put_encrypted_page;
+ f2fs_put_page(fio.encrypted_page, 0);
+ f2fs_put_page(page, 1);
+ return 0;
+put_encrypted_page:
+ f2fs_put_page(fio.encrypted_page, 1);
+put_page:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
*/
static void move_data_block(struct inode *inode, block_t bidx,
- unsigned int segno, int off)
+ int gc_type, unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
@@ -614,13 +681,15 @@ static void move_data_block(struct inode *inode, block_t bidx,
.op_flags = REQ_SYNC,
.encrypted_page = NULL,
.in_list = false,
+ .retry = false,
};
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
- struct page *page;
+ struct page *page, *mpage;
block_t newaddr;
int err;
+ bool lfs_mode = test_opt(fio.sbi, LFS);
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
@@ -630,8 +699,11 @@ static void move_data_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
+ F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
goto out;
+ }
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
@@ -639,7 +711,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
goto out;
@@ -654,14 +726,20 @@ static void move_data_block(struct inode *inode, block_t bidx,
*/
f2fs_wait_on_page_writeback(page, DATA, true);
- get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ if (err)
+ goto put_out;
+
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
- allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
+ if (lfs_mode)
+ down_write(&fio.sbi->io_order_lock);
+
+ f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA, NULL, false);
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
@@ -671,6 +749,23 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto recover_block;
}
+ mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
+ if (mpage) {
+ bool updated = false;
+
+ if (PageUptodate(mpage)) {
+ memcpy(page_address(fio.encrypted_page),
+ page_address(mpage), PAGE_SIZE);
+ updated = true;
+ }
+ f2fs_put_page(mpage, 1);
+ invalidate_mapping_pages(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, fio.old_blkaddr);
+ if (updated)
+ goto write_page;
+ }
+
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_page_out;
@@ -687,6 +782,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
+write_page:
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
@@ -701,8 +797,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC | REQ_NOIDLE;
fio.new_blkaddr = newaddr;
- err = f2fs_submit_page_write(&fio);
- if (err) {
+ f2fs_submit_page_write(&fio);
+ if (fio.retry) {
if (PageWriteback(fio.encrypted_page))
end_page_writeback(fio.encrypted_page);
goto put_page_out;
@@ -717,8 +813,10 @@ static void move_data_block(struct inode *inode, block_t bidx,
put_page_out:
f2fs_put_page(fio.encrypted_page, 1);
recover_block:
+ if (lfs_mode)
+ up_write(&fio.sbi->io_order_lock);
if (err)
- __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
+ f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
true, true);
put_out:
f2fs_put_dnode(&dn);
@@ -731,15 +829,18 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
{
struct page *page;
- page = get_lock_data_page(inode, bidx, true);
+ page = f2fs_get_lock_data_page(inode, bidx, true);
if (IS_ERR(page))
return;
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
+ F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
goto out;
+ }
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
@@ -773,15 +874,20 @@ retry:
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
}
set_cold_data(page);
- err = do_write_data_page(&fio);
- if (err == -ENOMEM && is_dirty) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
+ err = f2fs_do_write_data_page(&fio);
+ if (err) {
+ clear_cold_data(page);
+ if (err == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
+ if (is_dirty)
+ set_page_dirty(page);
}
}
out:
@@ -825,13 +931,13 @@ next_step:
continue;
if (phase == 0) {
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
META_NAT, true);
continue;
}
if (phase == 1) {
- ra_node_page(sbi, nid);
+ f2fs_ra_node_page(sbi, nid);
continue;
}
@@ -840,7 +946,7 @@ next_step:
continue;
if (phase == 2) {
- ra_node_page(sbi, dni.ino);
+ f2fs_ra_node_page(sbi, dni.ino);
continue;
}
@@ -851,23 +957,31 @@ next_step:
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
- /* if inode uses special I/O path, let's go phase 3 */
- if (f2fs_post_read_required(inode)) {
- add_gc_inode(gc_list, inode);
+ if (!down_write_trylock(
+ &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
+ iput(inode);
+ sbi->skipped_gc_rwsem++;
continue;
}
- if (!down_write_trylock(
- &F2FS_I(inode)->dio_rwsem[WRITE])) {
- iput(inode);
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
+ ofs_in_node;
+
+ if (f2fs_post_read_required(inode)) {
+ int err = ra_data_block(inode, start_bidx);
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err) {
+ iput(inode);
+ continue;
+ }
+ add_gc_inode(gc_list, inode);
continue;
}
- start_bidx = start_bidx_of_node(nofs, inode);
- data_page = get_read_data_page(inode,
- start_bidx + ofs_in_node, REQ_RAHEAD,
- true);
- up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+ data_page = f2fs_get_read_data_page(inode,
+ start_bidx, REQ_RAHEAD, true);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
continue;
@@ -885,11 +999,12 @@ next_step:
bool locked = false;
if (S_ISREG(inode->i_mode)) {
- if (!down_write_trylock(&fi->dio_rwsem[READ]))
+ if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
continue;
if (!down_write_trylock(
- &fi->dio_rwsem[WRITE])) {
- up_write(&fi->dio_rwsem[READ]);
+ &fi->i_gc_rwsem[WRITE])) {
+ sbi->skipped_gc_rwsem++;
+ up_write(&fi->i_gc_rwsem[READ]);
continue;
}
locked = true;
@@ -898,17 +1013,18 @@ next_step:
inode_dio_wait(inode);
}
- start_bidx = start_bidx_of_node(nofs, inode)
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_post_read_required(inode))
- move_data_block(inode, start_bidx, segno, off);
+ move_data_block(inode, start_bidx, gc_type,
+ segno, off);
else
move_data_page(inode, start_bidx, gc_type,
segno, off);
if (locked) {
- up_write(&fi->dio_rwsem[WRITE]);
- up_write(&fi->dio_rwsem[READ]);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+ up_write(&fi->i_gc_rwsem[READ]);
}
stat_inc_data_blk_count(sbi, 1, gc_type);
@@ -947,12 +1063,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
/* readahead multi ssa blocks those have contiguous address */
if (sbi->segs_per_sec > 1)
- ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
+ f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
sbi->segs_per_sec, META_SSA, true);
/* reference all summary page */
while (segno < end_segno) {
- sum_page = get_sum_page(sbi, segno++);
+ sum_page = f2fs_get_sum_page(sbi, segno++);
unlock_page(sum_page);
}
@@ -971,7 +1087,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
goto next;
sum = page_address(sum_page);
- f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+ if (type != GET_SUM_TYPE((&sum->footer))) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
+ "type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto next;
+ }
/*
* this is to avoid deadlock:
@@ -1018,6 +1140,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
+ unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
+ unsigned long long first_skipped;
+ unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
@@ -1029,6 +1154,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
+ sbi->skipped_gc_rwsem = 0;
+ first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
ret = -EINVAL;
@@ -1046,7 +1173,7 @@ gc_more:
* secure free segments which doesn't need fggc any more.
*/
if (prefree_segments(sbi)) {
- ret = write_checkpoint(sbi, &cpc);
+ ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
@@ -1069,17 +1196,36 @@ gc_more:
sec_freed++;
total_freed += seg_freed;
+ if (gc_type == FG_GC) {
+ if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
+ sbi->skipped_gc_rwsem)
+ skipped_round++;
+ last_skipped = sbi->skipped_atomic_files[FG_GC];
+ round++;
+ }
+
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
- if (!sync) {
- if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (sync)
+ goto stop;
+
+ if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (skipped_round <= MAX_SKIP_GC_COUNT ||
+ skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
+ if (first_skipped < last_skipped &&
+ (last_skipped - first_skipped) >
+ sbi->skipped_gc_rwsem) {
+ f2fs_drop_inmem_pages_all(sbi, true);
+ segno = NULL_SEGNO;
+ goto gc_more;
+ }
if (gc_type == FG_GC)
- ret = write_checkpoint(sbi, &cpc);
+ ret = f2fs_write_checkpoint(sbi, &cpc);
}
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
@@ -1103,19 +1249,10 @@ stop:
return ret;
}
-void build_gc_manager(struct f2fs_sb_info *sbi)
+void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
- u64 main_count, resv_count, ovp_count;
-
DIRTY_I(sbi)->v_ops = &default_v_ops;
- /* threshold of # of valid blocks in a section for victims of FG_GC */
- main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
- resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
- ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
-
- sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
- BLKS_PER_SEC(sbi), (main_count - resv_count));
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b0045d4c8d1e..c8619e408009 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -36,8 +36,6 @@ struct f2fs_gc_kthread {
unsigned int no_gc_sleep_time;
/* for changing gc mode */
- unsigned int gc_idle;
- unsigned int gc_urgent;
unsigned int gc_wake;
};
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ac951ee9b20b..202bfff31622 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -43,7 +43,7 @@ bool f2fs_may_inline_dentry(struct inode *inode)
return true;
}
-void read_inline_data(struct page *page, struct page *ipage)
+void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
{
struct inode *inode = page->mapping->host;
void *src_addr, *dst_addr;
@@ -65,7 +65,8 @@ void read_inline_data(struct page *page, struct page *ipage)
SetPageUptodate(page);
}
-void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
+void f2fs_truncate_inline_inode(struct inode *inode,
+ struct page *ipage, u64 from)
{
void *addr;
@@ -97,7 +98,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
path, current->comm);
}
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
trace_android_fs_dataread_end(inode, page_offset(page),
PAGE_SIZE);
@@ -115,7 +116,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
if (page->index)
zero_user_segment(page, 0, PAGE_SIZE);
else
- read_inline_data(page, ipage);
+ f2fs_do_read_inline_data(page, ipage);
if (!PageUptodate(page))
SetPageUptodate(page);
@@ -138,6 +139,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
+ struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
@@ -147,9 +149,27 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ if (err) {
+ f2fs_put_dnode(dn);
+ return err;
+ }
+
+ fio.version = ni.version;
+
+ if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(dn);
+ set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
+ f2fs_msg(fio.sbi->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EINVAL;
+ }
+
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
- read_inline_data(page, dn->inode_page);
+ f2fs_do_read_inline_data(page, dn->inode_page);
set_page_dirty(page);
/* clear dirty state */
@@ -160,18 +180,18 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
ClearPageError(page);
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
- write_data_page(dn, &fio);
+ f2fs_outplace_write_data(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty) {
inode_dec_dirty_pages(dn->inode);
- remove_dirty_inode(dn->inode);
+ f2fs_remove_dirty_inode(dn->inode);
}
/* this converted inline_data should be recovered. */
set_inode_flag(dn->inode, FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
- truncate_inline_inode(dn->inode, dn->inode_page, 0);
+ f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
@@ -196,7 +216,7 @@ int f2fs_convert_inline_inode(struct inode *inode)
f2fs_lock_op(sbi);
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto out;
@@ -222,12 +242,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
struct dnode_of_data dn;
- struct address_space *mapping = page_mapping(page);
- unsigned long flags;
int err;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
@@ -245,10 +263,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
- spin_lock_irqsave(&mapping->tree_lock, flags);
- radix_tree_tag_clear(&mapping->page_tree, page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ f2fs_clear_radix_tree_dirty_tag(page);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
@@ -258,7 +273,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
return 0;
}
-bool recover_inline_data(struct inode *inode, struct page *npage)
+bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode *ri = NULL;
@@ -279,7 +294,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
if (f2fs_has_inline_data(inode) &&
ri && (ri->i_inline & F2FS_INLINE_DATA)) {
process_inline:
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
f2fs_wait_on_page_writeback(ipage, NODE, true);
@@ -297,20 +312,20 @@ process_inline:
}
if (f2fs_has_inline_data(inode)) {
- ipage = get_node_page(sbi, inode->i_ino);
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- truncate_inline_inode(inode, ipage, 0);
+ f2fs_truncate_inline_inode(inode, ipage, 0);
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
- if (truncate_blocks(inode, 0, false))
+ if (f2fs_truncate_blocks(inode, 0, false))
return false;
goto process_inline;
}
return false;
}
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
@@ -321,7 +336,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
void *inline_dentry;
f2fs_hash_t namehash;
- ipage = get_node_page(sbi, dir->i_ino);
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
*res_page = ipage;
return NULL;
@@ -332,7 +347,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = find_target_dentry(fname, namehash, NULL, &d);
+ de = f2fs_find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
*res_page = ipage;
@@ -342,7 +357,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
return de;
}
-int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage)
{
struct f2fs_dentry_ptr d;
@@ -351,7 +366,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr_inline(inode, &d, inline_dentry);
- do_make_empty_dir(inode, parent, &d);
+ f2fs_do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
@@ -385,8 +400,18 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (err)
goto out;
+ if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(&dn);
+ set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
+ f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EINVAL;
+ goto out;
+ }
+
f2fs_wait_on_page_writeback(page, DATA, true);
- zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE);
dentry_blk = page_address(page);
@@ -410,7 +435,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
- truncate_inline_inode(dir, ipage, 0);
+ f2fs_truncate_inline_inode(dir, ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
@@ -453,7 +478,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
new_name.len = le16_to_cpu(de->name_len);
ino = le32_to_cpu(de->ino);
- fake_mode = get_de_type(de) << S_SHIFT;
+ fake_mode = f2fs_get_de_type(de) << S_SHIFT;
err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
ino, fake_mode);
@@ -465,8 +490,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
return 0;
punch_dentry_pages:
truncate_inode_pages(&dir->i_data, 0);
- truncate_blocks(dir, 0, false);
- remove_dirty_inode(dir);
+ f2fs_truncate_blocks(dir, 0, false);
+ f2fs_remove_dirty_inode(dir);
return err;
}
@@ -484,7 +509,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
}
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir));
- truncate_inline_inode(dir, ipage, 0);
+ f2fs_truncate_inline_inode(dir, ipage, 0);
unlock_page(ipage);
@@ -500,6 +525,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
return 0;
recover:
lock_page(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
@@ -533,14 +559,14 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
struct page *page = NULL;
int err = 0;
- ipage = get_node_page(sbi, dir->i_ino);
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- bit_pos = room_for_filename(d.bitmap, slots, d.max);
+ bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max);
if (bit_pos >= d.max) {
err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
if (err)
@@ -551,7 +577,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, new_name,
+ page = f2fs_init_inode_metadata(inode, dir, new_name,
orig_name, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -572,7 +598,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
f2fs_put_page(page, 1);
}
- update_parent_metadata(dir, inode, 0);
+ f2fs_update_parent_metadata(dir, inode, 0);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
@@ -618,7 +644,7 @@ bool f2fs_empty_inline_dir(struct inode *dir)
void *inline_dentry;
struct f2fs_dentry_ptr d;
- ipage = get_node_page(sbi, dir->i_ino);
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return false;
@@ -649,7 +675,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (ctx->pos == d.max)
return 0;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
@@ -675,7 +701,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
struct page *ipage;
int err = 0;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
@@ -691,7 +717,10 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
- get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ if (err)
+ goto out;
+
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 51846fc54fbd..292f787a65e2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -36,15 +36,15 @@ void f2fs_set_inode_flags(struct inode *inode)
unsigned int flags = F2FS_I(inode)->i_flags;
unsigned int new_fl = 0;
- if (flags & FS_SYNC_FL)
+ if (flags & F2FS_SYNC_FL)
new_fl |= S_SYNC;
- if (flags & FS_APPEND_FL)
+ if (flags & F2FS_APPEND_FL)
new_fl |= S_APPEND;
- if (flags & FS_IMMUTABLE_FL)
+ if (flags & F2FS_IMMUTABLE_FL)
new_fl |= S_IMMUTABLE;
- if (flags & FS_NOATIME_FL)
+ if (flags & F2FS_NOATIME_FL)
new_fl |= S_NOATIME;
- if (flags & FS_DIRSYNC_FL)
+ if (flags & F2FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
if (f2fs_encrypted_inode(inode))
new_fl |= S_ENCRYPTED;
@@ -68,13 +68,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
- if (addr != NEW_ADDR && addr != NULL_ADDR)
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ return -EFAULT;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -117,15 +120,15 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
- int extra_isize = le32_to_cpu(ri->i_extra_isize);
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
- if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
- if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum))
+ if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
+ i_inode_checksum))
return false;
return true;
@@ -159,8 +162,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_inode *ri;
__u32 provided, calculated;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
+ return true;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_enable_inode_chksum(sbi, page))
+#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
+#endif
return true;
ri = &F2FS_NODE(page)->i;
@@ -185,6 +195,101 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
+ "[%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
+
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
+ && !f2fs_has_extra_attr(inode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode ino=%lx, run fsck to fix.",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (f2fs_has_extra_attr(inode) &&
+ !f2fs_sb_has_extra_attr(sbi->sb)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) is with extra_attr, "
+ "but extra_attr feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize % sizeof(__le32)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
+ "max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
+ "is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+
+ if (f2fs_has_inline_data(inode) &&
+ (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ return true;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -192,16 +297,13 @@ static int do_read_inode(struct inode *inode)
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
+ int err;
/* Check if ino is within scope */
- if (check_nid_range(sbi, inode->i_ino)) {
- f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
- (unsigned long) inode->i_ino);
- WARN_ON(1);
+ if (f2fs_check_nid_range(sbi, inode->i_ino))
return -EINVAL;
- }
- node_page = get_node_page(sbi, inode->i_ino);
+ node_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
@@ -221,8 +323,11 @@ static int do_read_inode(struct inode *inode)
inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
inode->i_generation = le32_to_cpu(ri->i_generation);
-
- fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
+ if (S_ISDIR(inode->i_mode))
+ fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
+ else if (S_ISREG(inode->i_mode))
+ fi->i_gc_failures[GC_FAILURE_PIN] =
+ le16_to_cpu(ri->i_gc_failures);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
fi->flags = 0;
@@ -239,7 +344,6 @@ static int do_read_inode(struct inode *inode)
le16_to_cpu(ri->i_extra_isize) : 0;
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
- f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
@@ -255,6 +359,11 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EINVAL;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -262,13 +371,20 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
- set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ if (S_ISREG(inode->i_mode)) {
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ }
- if (!need_inode_block_update(sbi, inode->i_ino))
+ if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
- if (fi->i_flags & FS_PROJINHERIT_FL)
+ if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
@@ -320,10 +436,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_meta_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
@@ -373,7 +489,7 @@ retry:
return inode;
}
-void update_inode(struct inode *inode, struct page *node_page)
+void f2fs_update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
@@ -408,7 +524,12 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ if (S_ISDIR(inode->i_mode))
+ ri->i_current_depth =
+ cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ else if (S_ISREG(inode->i_mode))
+ ri->i_gc_failures =
+ cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]);
ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
@@ -452,14 +573,18 @@ void update_inode(struct inode *inode, struct page *node_page)
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
+#endif
}
-void update_inode_page(struct inode *inode)
+void f2fs_update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
retry:
- node_page = get_node_page(sbi, inode->i_ino);
+ node_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
int err = PTR_ERR(node_page);
if (err == -ENOMEM) {
@@ -470,7 +595,7 @@ retry:
}
return;
}
- update_inode(inode, node_page);
+ f2fs_update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
}
@@ -489,7 +614,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
if (wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
@@ -506,7 +631,7 @@ void f2fs_evict_inode(struct inode *inode)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
@@ -516,7 +641,7 @@ void f2fs_evict_inode(struct inode *inode)
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
f2fs_destroy_extent_tree(inode);
@@ -525,9 +650,9 @@ void f2fs_evict_inode(struct inode *inode)
dquot_initialize(inode);
- remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
- remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
- remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
@@ -536,15 +661,14 @@ retry:
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
-#endif
+
if (!err) {
f2fs_lock_op(sbi);
- err = remove_inode_page(inode);
+ err = f2fs_remove_inode_page(inode);
f2fs_unlock_op(sbi);
if (err == -ENOENT)
err = 0;
@@ -557,7 +681,7 @@ retry:
}
if (err)
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
dquot_free_inode(inode);
sb_end_intwrite(inode->i_sb);
no_delete:
@@ -580,16 +704,19 @@ no_delete:
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (inode->i_nlink) {
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
- add_ino_entry(sbi, inode->i_ino, APPEND_INO);
+ f2fs_add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
- add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ f2fs_add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
}
if (is_inode_flag_set(inode, FI_FREE_NID)) {
- alloc_nid_failed(sbi, inode->i_ino);
+ f2fs_alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
} else {
- f2fs_bug_on(sbi, err &&
- !exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
+ /*
+ * If xattr nid is corrupted, we can reach out error condition,
+ * err & !f2fs_exist_written_data(sbi, inode->i_ino, ORPHAN_INO)).
+ * In that case, f2fs_check_nid_range() is enough to give a clue.
+ */
}
out_clear:
fscrypt_put_encryption_info(inode, NULL);
@@ -597,10 +724,11 @@ out_clear:
}
/* caller should call f2fs_lock_op() */
-void handle_failed_inode(struct inode *inode)
+void f2fs_handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
+ int err;
/*
* clear nlink of inode in order to release resource of inode
@@ -612,7 +740,7 @@ void handle_failed_inode(struct inode *inode)
* we must call this to avoid inode being remained as dirty, resulting
* in a panic when flushing dirty inodes in gdirty_list.
*/
- update_inode_page(inode);
+ f2fs_update_inode_page(inode);
f2fs_inode_synced(inode);
/* don't make bad inode, since it becomes a regular file. */
@@ -623,22 +751,29 @@ void handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "May loss orphan inode, run fsck to fix.");
+ goto out;
+ }
if (ni.blk_addr != NULL_ADDR) {
- int err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"Too many orphan inodes, run fsck to fix.");
} else {
- add_orphan_inode(inode);
+ f2fs_add_orphan_inode(inode);
}
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
} else {
set_inode_flag(inode, FI_FREE_NID);
}
+out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0355891dbbf8..05193a103e80 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -37,7 +37,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
return ERR_PTR(-ENOMEM);
f2fs_lock_op(sbi);
- if (!alloc_nid(sbi, &ino)) {
+ if (!f2fs_alloc_nid(sbi, &ino)) {
f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
@@ -54,6 +54,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
F2FS_I(inode)->i_crtime = current_time(inode);
inode->i_generation = sbi->s_next_generation++;
+ if (S_ISDIR(inode->i_mode))
+ F2FS_I(inode)->i_current_depth = 1;
+
err = insert_inode_locked(inode);
if (err) {
err = -EINVAL;
@@ -61,7 +64,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
}
if (f2fs_sb_has_project_quota(sbi->sb) &&
- (F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL))
+ (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
@@ -116,9 +119,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
- F2FS_I(inode)->i_flags |= FS_INDEX_FL;
+ F2FS_I(inode)->i_flags |= F2FS_INDEX_FL;
- if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL)
+ if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
trace_f2fs_new_inode(inode, 0);
@@ -193,7 +196,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
up_read(&sbi->sb_lock);
}
-int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set)
{
__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
@@ -243,7 +246,7 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return -EINVAL;
if (hot) {
- strncpy(extlist[count], name, strlen(name));
+ memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
@@ -251,7 +254,7 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name,
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
- strncpy(extlist[cold_count], name, strlen(name));
+ memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
@@ -292,7 +295,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
goto out;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, ino);
+ f2fs_alloc_nid_done(sbi, ino);
d_instantiate_new(dentry, inode);
@@ -302,7 +305,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
f2fs_balance_fs(sbi, true);
return 0;
out:
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -397,7 +400,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
err = PTR_ERR(page);
goto out;
} else {
- err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
+ err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
if (err)
goto out;
}
@@ -408,7 +411,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
else if (IS_ERR(page))
err = PTR_ERR(page);
else
- err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
+ err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
out:
if (!err)
clear_inode_flag(dir, FI_INLINE_DOTS);
@@ -520,7 +523,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
f2fs_unlock_op(sbi);
f2fs_put_page(page, 0);
@@ -582,9 +585,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
- goto out_handle_failed_inode;
+ goto out_f2fs_handle_failed_inode;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
if (err)
@@ -617,8 +620,8 @@ err_out:
f2fs_balance_fs(sbi, true);
goto out_free_encrypted_link;
-out_handle_failed_inode:
- handle_failed_inode(inode);
+out_f2fs_handle_failed_inode:
+ f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
kfree(disk_link.name);
@@ -654,7 +657,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
d_instantiate_new(dentry, inode);
@@ -666,7 +669,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
out_fail:
clear_inode_flag(inode, FI_INC_LINK);
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -705,7 +708,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
goto out;
f2fs_unlock_op(sbi);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
d_instantiate_new(dentry, inode);
@@ -715,7 +718,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
f2fs_balance_fs(sbi, true);
return 0;
out:
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -744,7 +747,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
}
f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err)
goto out;
@@ -756,8 +759,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
* add this non-linked tmpfile to orphan list, in this way we could
* remove all unused data of tmpfile after abnormal power-off.
*/
- add_orphan_inode(inode);
- alloc_nid_done(sbi, inode->i_ino);
+ f2fs_add_orphan_inode(inode);
+ f2fs_alloc_nid_done(sbi, inode->i_ino);
if (whiteout) {
f2fs_i_links_write(inode, false);
@@ -773,9 +776,9 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
return 0;
release_out:
- release_orphan_inode(sbi);
+ f2fs_release_orphan_inode(sbi);
out:
- handle_failed_inode(inode);
+ f2fs_handle_failed_inode(inode);
return err;
}
@@ -882,7 +885,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err)
goto put_out_dir;
@@ -896,9 +899,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
up_write(&F2FS_I(new_inode)->i_sem);
if (!new_inode->i_nlink)
- add_orphan_inode(new_inode);
+ f2fs_add_orphan_inode(new_inode);
else
- release_orphan_inode(sbi);
+ f2fs_release_orphan_inode(sbi);
} else {
f2fs_balance_fs(sbi, true);
@@ -966,8 +969,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_put_page(old_dir_page, 0);
f2fs_i_links_write(old_dir, false);
}
- if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
- add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
+ f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ if (S_ISDIR(old_inode->i_mode))
+ f2fs_add_ino_entry(sbi, old_inode->i_ino,
+ TRANS_DIR_INO);
+ }
f2fs_unlock_op(sbi);
@@ -1118,8 +1125,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_mark_inode_dirty_sync(new_dir, false);
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
- add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
- add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+ f2fs_add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
+ f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
}
f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 16aee2a7b8a9..1af0805915b4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -23,13 +23,29 @@
#include "trace.h"
#include <trace/events/f2fs.h>
-#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
+#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
+static struct kmem_cache *fsync_node_entry_slab;
-bool available_free_memory(struct f2fs_sb_info *sbi, int type)
+/*
+ * Check whether the given nid is within node id range.
+ */
+int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: out-of-range nid=%x, run fsck to fix.",
+ __func__, nid);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
@@ -87,44 +103,33 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
static void clear_node_page_dirty(struct page *page)
{
- struct address_space *mapping = page->mapping;
- unsigned int long flags;
-
if (PageDirty(page)) {
- spin_lock_irqsave(&mapping->tree_lock, flags);
- radix_tree_tag_clear(&mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
-
+ f2fs_clear_radix_tree_dirty_tag(page);
clear_page_dirty_for_io(page);
- dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
+ dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
}
ClearPageUptodate(page);
}
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- pgoff_t index = current_nat_addr(sbi, nid);
- return get_meta_page(sbi, index);
+ return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
- pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
- src_off = current_nat_addr(sbi, nid);
- dst_off = next_nat_addr(sbi, src_off);
+ dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
/* get current nat block page with lock */
- src_page = get_meta_page(sbi, src_off);
- dst_page = grab_meta_page(sbi, dst_off);
+ src_page = get_current_nat_page(sbi, nid);
+ dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
src_addr = page_address(src_page);
@@ -169,14 +174,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+
+ spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
nm_i->nat_cnt++;
return ne;
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
- return radix_tree_lookup(&nm_i->nat_root, n);
+ struct nat_entry *ne;
+
+ ne = radix_tree_lookup(&nm_i->nat_root, n);
+
+ /* for recent accessed nat entry, move it to tail of lru list */
+ if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ spin_lock(&nm_i->nat_list_lock);
+ if (!list_empty(&ne->list))
+ list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+ }
+
+ return ne;
}
static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
@@ -187,7 +208,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
- list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
__free_nat_entry(e);
@@ -238,16 +258,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
nm_i->dirty_nat_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
+ spin_lock(&nm_i->nat_list_lock);
if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
+ spin_unlock(&nm_i->nat_list_lock);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry_set *set, struct nat_entry *ne)
{
+ spin_lock(&nm_i->nat_list_lock);
list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
@@ -260,7 +285,73 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
-int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
+{
+ return NODE_MAPPING(sbi) == page->mapping &&
+ IS_DNODE(page) && is_cold_node(page);
+}
+
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ spin_lock_init(&sbi->fsync_node_lock);
+ INIT_LIST_HEAD(&sbi->fsync_node_list);
+ sbi->fsync_seg_id = 0;
+ sbi->fsync_node_num = 0;
+}
+
+static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
+ struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+ unsigned int seq_id;
+
+ fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+
+ get_page(page);
+ fn->page = page;
+ INIT_LIST_HEAD(&fn->list);
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_add_tail(&fn->list, &sbi->fsync_node_list);
+ fn->seq_id = sbi->fsync_seg_id++;
+ seq_id = fn->seq_id;
+ sbi->fsync_node_num++;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ return seq_id;
+}
+
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_for_each_entry(fn, &sbi->fsync_node_list, list) {
+ if (fn->page == page) {
+ list_del(&fn->list);
+ sbi->fsync_node_num--;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ kmem_cache_free(fsync_node_entry_slab, fn);
+ put_page(page);
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ f2fs_bug_on(sbi, 1);
+}
+
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ sbi->fsync_seg_id = 0;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+}
+
+int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -277,7 +368,7 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
return need;
}
-bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
+bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -291,7 +382,7 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
return is_cp;
}
-bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
+bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -364,8 +455,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
- nat_get_blkaddr(e) != NULL_ADDR &&
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -376,7 +466,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -391,7 +481,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
up_write(&nm_i->nat_tree_lock);
}
-int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
@@ -399,13 +489,25 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
- while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+ spin_lock(&nm_i->nat_list_lock);
+ while (nr_shrink) {
struct nat_entry *ne;
+
+ if (list_empty(&nm_i->nat_entries))
+ break;
+
ne = list_first_entry(&nm_i->nat_entries,
struct nat_entry, list);
+ list_del(&ne->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
+
+ spin_lock(&nm_i->nat_list_lock);
}
+ spin_unlock(&nm_i->nat_list_lock);
+
up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -413,7 +515,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
/*
* This function always returns success
*/
-void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+ struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -436,14 +539,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
- return;
+ return 0;
}
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
/* Check current segment summary */
down_read(&curseg->journal_rwsem);
- i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
+ i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
@@ -458,7 +561,10 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
index = current_nat_addr(sbi, nid);
up_read(&nm_i->nat_tree_lock);
- page = get_meta_page(sbi, index);
+ page = f2fs_get_meta_page(sbi, index);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
@@ -466,12 +572,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
cache:
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
+ return 0;
}
/*
* readahead MAX_RA_NODE number of node pages.
*/
-static void ra_node_pages(struct page *parent, int start, int n)
+static void f2fs_ra_node_pages(struct page *parent, int start, int n)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
struct blk_plug plug;
@@ -485,13 +592,13 @@ static void ra_node_pages(struct page *parent, int start, int n)
end = min(end, NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
nid = get_nid(parent, i, false);
- ra_node_page(sbi, nid);
+ f2fs_ra_node_page(sbi, nid);
}
blk_finish_plug(&plug);
}
-pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
+pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
{
const long direct_index = ADDRS_PER_INODE(dn->inode);
const long direct_blks = ADDRS_PER_BLOCK;
@@ -606,7 +713,7 @@ got:
* f2fs_unlock_op() only if ro is not set RDONLY_NODE.
* In the case of RDONLY_NODE, we don't need to care about mutex.
*/
-int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
+int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct page *npage[4];
@@ -625,7 +732,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
npage[0] = dn->inode_page;
if (!npage[0]) {
- npage[0] = get_node_page(sbi, nids[0]);
+ npage[0] = f2fs_get_node_page(sbi, nids[0]);
if (IS_ERR(npage[0]))
return PTR_ERR(npage[0]);
}
@@ -649,24 +756,24 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
if (!nids[i] && mode == ALLOC_NODE) {
/* alloc new node */
- if (!alloc_nid(sbi, &(nids[i]))) {
+ if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
err = -ENOSPC;
goto release_pages;
}
dn->nid = nids[i];
- npage[i] = new_node_page(dn, noffset[i]);
+ npage[i] = f2fs_new_node_page(dn, noffset[i]);
if (IS_ERR(npage[i])) {
- alloc_nid_failed(sbi, nids[i]);
+ f2fs_alloc_nid_failed(sbi, nids[i]);
err = PTR_ERR(npage[i]);
goto release_pages;
}
set_nid(parent, offset[i - 1], nids[i], i == 1);
- alloc_nid_done(sbi, nids[i]);
+ f2fs_alloc_nid_done(sbi, nids[i]);
done = true;
} else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
- npage[i] = get_node_page_ra(parent, offset[i - 1]);
+ npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]);
if (IS_ERR(npage[i])) {
err = PTR_ERR(npage[i]);
goto release_pages;
@@ -681,7 +788,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
if (!done) {
- npage[i] = get_node_page(sbi, nids[i]);
+ npage[i] = f2fs_get_node_page(sbi, nids[i]);
if (IS_ERR(npage[i])) {
err = PTR_ERR(npage[i]);
f2fs_put_page(npage[0], 0);
@@ -715,20 +822,23 @@ release_out:
return err;
}
-static void truncate_node(struct dnode_of_data *dn)
+static int truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
+ int err;
- get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
/* Deallocate node address */
- invalidate_blocks(sbi, ni.blk_addr);
+ f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
- remove_orphan_inode(sbi, dn->nid);
+ f2fs_remove_orphan_inode(sbi, dn->nid);
dec_valid_inode_count(sbi);
f2fs_inode_synced(dn->inode);
}
@@ -743,17 +853,20 @@ static void truncate_node(struct dnode_of_data *dn)
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
+
+ return 0;
}
static int truncate_dnode(struct dnode_of_data *dn)
{
struct page *page;
+ int err;
if (dn->nid == 0)
return 1;
/* get direct node */
- page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
return 1;
else if (IS_ERR(page))
@@ -762,8 +875,11 @@ static int truncate_dnode(struct dnode_of_data *dn)
/* Make dnode_of_data for parameter */
dn->node_page = page;
dn->ofs_in_node = 0;
- truncate_data_blocks(dn);
- truncate_node(dn);
+ f2fs_truncate_data_blocks(dn);
+ err = truncate_node(dn);
+ if (err)
+ return err;
+
return 1;
}
@@ -783,13 +899,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
- page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
if (IS_ERR(page)) {
trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
return PTR_ERR(page);
}
- ra_node_pages(page, ofs, NIDS_PER_BLOCK);
+ f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK);
rn = F2FS_NODE(page);
if (depth < 3) {
@@ -828,7 +944,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
if (!ofs) {
/* remove current indirect node */
dn->node_page = page;
- truncate_node(dn);
+ ret = truncate_node(dn);
+ if (ret)
+ goto out_err;
freed++;
} else {
f2fs_put_page(page, 1);
@@ -859,7 +977,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
/* get indirect nodes in the path */
for (i = 0; i < idx + 1; i++) {
/* reference count'll be increased */
- pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
+ pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]);
if (IS_ERR(pages[i])) {
err = PTR_ERR(pages[i]);
idx = i - 1;
@@ -868,7 +986,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
}
- ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
+ f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
/* free direct nodes linked to a partial indirect node */
for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
@@ -886,7 +1004,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ goto fail;
} else {
f2fs_put_page(pages[idx], 1);
}
@@ -905,7 +1025,7 @@ fail:
/*
* All the block addresses of data and nodes should be nullified.
*/
-int truncate_inode_blocks(struct inode *inode, pgoff_t from)
+int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err = 0, cont = 1;
@@ -921,7 +1041,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
if (level < 0)
return level;
- page = get_node_page(sbi, inode->i_ino);
+ page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
return PTR_ERR(page);
@@ -1001,24 +1121,30 @@ fail:
}
/* caller must lock inode page */
-int truncate_xattr_node(struct inode *inode)
+int f2fs_truncate_xattr_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
struct page *npage;
+ int err;
if (!nid)
return 0;
- npage = get_node_page(sbi, nid);
+ npage = f2fs_get_node_page(sbi, nid);
if (IS_ERR(npage))
return PTR_ERR(npage);
+ set_new_dnode(&dn, inode, NULL, npage, nid);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_page(npage, 1);
+ return err;
+ }
+
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, NULL, npage, nid);
- truncate_node(&dn);
return 0;
}
@@ -1026,17 +1152,17 @@ int truncate_xattr_node(struct inode *inode)
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
-int remove_inode_page(struct inode *inode)
+int f2fs_remove_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
- err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
- err = truncate_xattr_node(inode);
+ err = f2fs_truncate_xattr_node(inode);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -1045,18 +1171,26 @@ int remove_inode_page(struct inode *inode)
/* remove potential inline_data blocks */
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode))
- truncate_data_blocks_range(&dn, 1);
+ f2fs_truncate_data_blocks_range(&dn, 1);
/* 0 is possible, after f2fs_new_inode() has failed */
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ f2fs_put_dnode(&dn);
+ return -EIO;
+ }
f2fs_bug_on(F2FS_I_SB(inode),
inode->i_blocks != 0 && inode->i_blocks != 8);
/* will put inode & node pages */
- truncate_node(&dn);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
return 0;
}
-struct page *new_inode_page(struct inode *inode)
+struct page *f2fs_new_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
@@ -1064,10 +1198,10 @@ struct page *new_inode_page(struct inode *inode)
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
/* caller should f2fs_put_page(page, 1); */
- return new_node_page(&dn, 0);
+ return f2fs_new_node_page(&dn, 0);
}
-struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
+struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info new_ni;
@@ -1085,7 +1219,11 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ if (err) {
+ dec_valid_node_count(sbi, dn->inode, !ofs);
+ goto fail;
+ }
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
new_ni.nid = dn->nid;
@@ -1133,13 +1271,21 @@ static int read_node_page(struct page *page, int op_flags)
.page = page,
.encrypted_page = NULL,
};
+ int err;
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
+#endif
return LOCKED_PAGE;
+ }
- get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni);
+ if (err)
+ return err;
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ if (unlikely(ni.blk_addr == NULL_ADDR) ||
+ is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
}
@@ -1151,14 +1297,15 @@ static int read_node_page(struct page *page, int op_flags)
/*
* Readahead a node page
*/
-void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
+void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *apage;
int err;
if (!nid)
return;
- f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+ if (f2fs_check_nid_range(sbi, nid))
+ return;
rcu_read_lock();
apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
@@ -1182,7 +1329,8 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
if (!nid)
return ERR_PTR(-ENOENT);
- f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+ if (f2fs_check_nid_range(sbi, nid))
+ return ERR_PTR(-EINVAL);
repeat:
page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
if (!page)
@@ -1198,7 +1346,7 @@ repeat:
}
if (parent)
- ra_node_pages(parent, start + 1, MAX_RA_NODE);
+ f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE);
lock_page(page);
@@ -1232,12 +1380,12 @@ out_err:
return page;
}
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
{
return __get_node_page(sbi, nid, NULL, 0);
}
-struct page *get_node_page_ra(struct page *parent, int start)
+struct page *f2fs_get_node_page_ra(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
nid_t nid = get_nid(parent, start, false);
@@ -1272,7 +1420,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
ret = f2fs_write_inline_data(inode, page);
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
if (ret)
set_page_dirty(page);
page_out:
@@ -1283,21 +1431,17 @@ iput_out:
static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
{
- pgoff_t index, end;
+ pgoff_t index;
struct pagevec pvec;
struct page *last_page = NULL;
+ int nr_pages;
pagevec_init(&pvec, 0);
index = 0;
- end = ULONG_MAX;
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
+
+ while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -1343,7 +1487,7 @@ continue_unlock:
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc, bool do_balance,
- enum iostat_type io_type)
+ enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
@@ -1360,22 +1504,27 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
.io_type = io_type,
.io_wbc = wbc,
};
+ unsigned int seq;
trace_f2fs_writepage(page, NODE);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ IS_DNODE(page) && is_cold_node(page))
+ goto redirty_out;
+
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
+ if (f2fs_get_node_info(sbi, nid, &ni))
+ goto redirty_out;
+
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
@@ -1383,8 +1532,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
down_read(&sbi->node_write);
}
- get_node_info(sbi, nid, &ni);
-
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
@@ -1394,13 +1541,24 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
+ goto redirty_out;
+
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= WRITE_FLUSH_FUA;
set_page_writeback(page);
ClearPageError(page);
+
+ if (f2fs_in_warm_node_list(sbi, page)) {
+ seq = f2fs_add_fsync_node_entry(sbi, page);
+ if (seq_id)
+ *seq_id = seq;
+ }
+
fio.old_blkaddr = ni.blk_addr;
- write_node_page(nid, &fio);
+ f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
@@ -1429,7 +1587,7 @@ redirty_out:
return AOP_WRITEPAGE_ACTIVATE;
}
-void move_node_page(struct page *node_page, int gc_type)
+void f2fs_move_node_page(struct page *node_page, int gc_type)
{
if (gc_type == FG_GC) {
struct writeback_control wbc = {
@@ -1446,7 +1604,7 @@ void move_node_page(struct page *node_page, int gc_type)
goto out_page;
if (__write_node_page(node_page, false, NULL,
- &wbc, false, FS_GC_NODE_IO))
+ &wbc, false, FS_GC_NODE_IO, NULL))
unlock_page(node_page);
goto release_page;
} else {
@@ -1463,19 +1621,22 @@ release_page:
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
+ return __write_node_page(page, false, NULL, wbc, false,
+ FS_NODE_IO, NULL);
}
-int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic)
+int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id)
{
- pgoff_t index, end;
+ pgoff_t index;
pgoff_t last_idx = ULONG_MAX;
struct pagevec pvec;
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
+ int nr_pages;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1485,15 +1646,10 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
retry:
pagevec_init(&pvec, 0);
index = 0;
- end = ULONG_MAX;
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
+
+ while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -1537,9 +1693,9 @@ continue_unlock:
if (IS_INODE(page)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
- update_inode(inode, page);
+ f2fs_update_inode(inode, page);
set_dentry_mark(page,
- need_dentry_mark(sbi, ino));
+ f2fs_need_dentry_mark(sbi, ino));
}
/* may be written by other thread */
if (!PageDirty(page))
@@ -1552,7 +1708,7 @@ continue_unlock:
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc, true,
- FS_NODE_IO);
+ FS_NODE_IO, seq_id);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
@@ -1589,33 +1745,37 @@ out:
return ret ? -EIO: 0;
}
-int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
+int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
+ struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
{
- pgoff_t index, end;
+ pgoff_t index;
struct pagevec pvec;
int step = 0;
int nwritten = 0;
int ret = 0;
+ int nr_pages, done = 0;
pagevec_init(&pvec, 0);
next_step:
index = 0;
- end = ULONG_MAX;
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- break;
+
+ while (!done && (nr_pages = pagevec_lookup_tag(&pvec,
+ NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
+ int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
bool submitted = false;
+ /* give a priority to WB_SYNC threads */
+ if (atomic_read(&sbi->wb_sync_req[NODE]) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
+ done = 1;
+ break;
+ }
+
/*
* flushing sequence with step:
* 0. indirect nodes
@@ -1631,7 +1791,9 @@ next_step:
!is_cold_node(page)))
continue;
lock_node:
- if (!trylock_page(page))
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ lock_page(page);
+ else if (!trylock_page(page))
continue;
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
@@ -1663,7 +1825,7 @@ continue_unlock:
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted,
- wbc, do_balance, io_type);
+ wbc, do_balance, io_type, NULL);
if (ret)
unlock_page(page);
else if (submitted)
@@ -1682,10 +1844,12 @@ continue_unlock:
}
if (step < 2) {
+ if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+ goto out;
step++;
goto next_step;
}
-
+out:
if (nwritten)
f2fs_submit_merged_write(sbi, NODE);
@@ -1694,37 +1858,40 @@ continue_unlock:
return ret;
}
-int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id)
{
- pgoff_t index = 0, end = ULONG_MAX;
- struct pagevec pvec;
+ struct fsync_node_entry *fn;
+ struct page *page;
+ struct list_head *head = &sbi->fsync_node_list;
+ unsigned long flags;
+ unsigned int cur_seq_id = 0;
int ret2 = 0, ret = 0;
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_WRITEBACK,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
+ while (seq_id && cur_seq_id < seq_id) {
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
+ }
+ fn = list_first_entry(head, struct fsync_node_entry, list);
+ if (fn->seq_id > seq_id) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
break;
+ }
+ cur_seq_id = fn->seq_id;
+ page = fn->page;
+ get_page(page);
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ f2fs_wait_on_page_writeback(page, NODE, true);
+ if (TestClearPageError(page))
+ ret = -EIO;
- /* until radix tree lookup accepts end_index */
- if (unlikely(page->index > end))
- continue;
+ put_page(page);
- if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE, true);
- if (TestClearPageError(page))
- ret = -EIO;
- }
- }
- pagevec_release(&pvec);
- cond_resched();
+ if (ret)
+ break;
}
if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
@@ -1733,6 +1900,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
ret2 = -EIO;
if (!ret)
ret = ret2;
+
return ret;
}
@@ -1753,14 +1921,21 @@ static int f2fs_write_node_pages(struct address_space *mapping,
if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
goto skip_write;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+ else if (atomic_read(&sbi->wb_sync_req[NODE]))
+ goto skip_write;
+
trace_f2fs_writepages(mapping->host, wbc, NODE);
diff = nr_pages_to_write(sbi, NODE, wbc);
- wbc->sync_mode = WB_SYNC_NONE;
blk_start_plug(&plug);
- sync_node_pages(sbi, wbc, true, FS_NODE_IO);
+ f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO);
blk_finish_plug(&plug);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_dec(&sbi->wb_sync_req[NODE]);
return 0;
skip_write:
@@ -1775,6 +1950,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (IS_INODE(page))
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
@@ -1906,20 +2085,20 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
* Thread A Thread B
* - f2fs_create
* - f2fs_new_inode
- * - alloc_nid
+ * - f2fs_alloc_nid
* - __insert_nid_to_list(PREALLOC_NID)
* - f2fs_balance_fs_bg
- * - build_free_nids
- * - __build_free_nids
+ * - f2fs_build_free_nids
+ * - __f2fs_build_free_nids
* - scan_nat_page
* - add_free_nid
* - __lookup_nat_cache
* - f2fs_add_link
- * - init_inode_metadata
- * - new_inode_page
- * - new_node_page
+ * - f2fs_init_inode_metadata
+ * - f2fs_new_inode_page
+ * - f2fs_new_node_page
* - set_node_addr
- * - alloc_nid_done
+ * - f2fs_alloc_nid_done
* - __remove_nid_from_list(PREALLOC_NID)
* - __insert_nid_to_list(FREE_NID)
*/
@@ -1969,7 +2148,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_sb_info *sbi,
+static int scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1987,7 +2166,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
+
+ if (blk_addr == NEW_ADDR)
+ return -EINVAL;
+
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
} else {
@@ -1996,6 +2178,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
+
+ return 0;
}
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
@@ -2051,10 +2235,11 @@ out:
up_read(&nm_i->nat_tree_lock);
}
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
+ bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- int i = 0;
+ int i = 0, ret;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
@@ -2062,21 +2247,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
- if (!sync && !available_free_memory(sbi, FREE_NIDS))
- return;
+ if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
+ return 0;
if (!mount) {
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
}
/* readahead nat pages to be scanned */
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
down_read(&nm_i->nat_tree_lock);
@@ -2086,8 +2271,16 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
+ ret = scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
+
+ if (ret) {
+ up_read(&nm_i->nat_tree_lock);
+ f2fs_bug_on(sbi, !mount);
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "NAT is corrupt, run fsck to fix it");
+ return -EINVAL;
+ }
}
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -2106,15 +2299,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
up_read(&nm_i->nat_tree_lock);
- ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
+ f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
+
+ return 0;
}
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
+ int ret;
+
mutex_lock(&NM_I(sbi)->build_lock);
- __build_free_nids(sbi, sync, mount);
+ ret = __f2fs_build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
+
+ return ret;
}
/*
@@ -2122,17 +2321,16 @@ void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
* from second parameter of this function.
* The returned nid could be used ino as well as nid when inode is created.
*/
-bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
+bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
}
-#endif
+
spin_lock(&nm_i->nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
@@ -2140,8 +2338,8 @@ retry:
return false;
}
- /* We should not use stale free nids created by build_free_nids */
- if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) {
+ /* We should not use stale free nids created by f2fs_build_free_nids */
+ if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) {
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
i = list_first_entry(&nm_i->free_nid_list,
struct free_nid, list);
@@ -2158,14 +2356,14 @@ retry:
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- build_free_nids(sbi, true, false);
+ f2fs_build_free_nids(sbi, true, false);
goto retry;
}
/*
- * alloc_nid() should be called prior to this function.
+ * f2fs_alloc_nid() should be called prior to this function.
*/
-void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
+void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
@@ -2180,9 +2378,9 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
}
/*
- * alloc_nid() should be called prior to this function.
+ * f2fs_alloc_nid() should be called prior to this function.
*/
-void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
+void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
@@ -2195,7 +2393,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(sbi, !i);
- if (!available_free_memory(sbi, FREE_NIDS)) {
+ if (!f2fs_available_free_memory(sbi, FREE_NIDS)) {
__remove_free_nid(sbi, i, PREALLOC_NID);
need_free = true;
} else {
@@ -2212,7 +2410,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
+int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next;
@@ -2240,14 +2438,14 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
return nr - nr_shrink;
}
-void recover_inline_xattr(struct inode *inode, struct page *page)
+void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
size_t inline_size;
struct page *ipage;
struct f2fs_inode *ri;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
ri = F2FS_INODE(page);
@@ -2265,11 +2463,11 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
- update_inode(inode, ipage);
+ f2fs_update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
}
-int recover_xattr_data(struct inode *inode, struct page *page)
+int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
@@ -2277,30 +2475,34 @@ int recover_xattr_data(struct inode *inode, struct page *page)
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
+ int err;
if (!prev_xnid)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- get_node_info(sbi, prev_xnid, &ni);
- invalidate_blocks(sbi, ni.blk_addr);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ if (err)
+ return err;
+
+ f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
recover_xnid:
/* 2: update xattr nid in inode */
- if (!alloc_nid(sbi, &new_xnid))
+ if (!f2fs_alloc_nid(sbi, &new_xnid))
return -ENOSPC;
set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
- xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
+ xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
- alloc_nid_failed(sbi, new_xnid);
+ f2fs_alloc_nid_failed(sbi, new_xnid);
return PTR_ERR(xpage);
}
- alloc_nid_done(sbi, new_xnid);
- update_inode_page(inode);
+ f2fs_alloc_nid_done(sbi, new_xnid);
+ f2fs_update_inode_page(inode);
/* 3: update and set xattr node page dirty */
memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
@@ -2311,14 +2513,17 @@ recover_xnid:
return 0;
}
-int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *src, *dst;
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
+ int err;
- get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni);
+ if (err)
+ return err;
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
@@ -2372,7 +2577,7 @@ retry:
return 0;
}
-void restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
@@ -2389,10 +2594,13 @@ void restore_node_summary(struct f2fs_sb_info *sbi,
nrpages = min(last_offset - i, BIO_MAX_PAGES);
/* readahead node pages */
- ra_meta_pages(sbi, addr, nrpages, META_POR, true);
+ f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
for (idx = addr; idx < addr + nrpages; idx++) {
- struct page *page = get_tmp_page(sbi, idx);
+ struct page *page = f2fs_get_tmp_page(sbi, idx);
+
+ if (IS_ERR(page))
+ return PTR_ERR(page);
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
@@ -2405,6 +2613,7 @@ void restore_node_summary(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
+ return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -2534,7 +2743,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
if (to_journal) {
- offset = lookup_journal_in_cursum(journal,
+ offset = f2fs_lookup_journal_in_cursum(journal,
NAT_JOURNAL, nid, 1);
f2fs_bug_on(sbi, offset < 0);
raw_ne = &nat_in_journal(journal, offset);
@@ -2571,7 +2780,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
/*
* This function is called during the checkpointing process.
*/
-void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2582,6 +2791,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
nid_t set_idx = 0;
LIST_HEAD(sets);
+ /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+ if (enabled_nat_bits(sbi, cpc)) {
+ down_write(&nm_i->nat_tree_lock);
+ remove_nats_in_journal(sbi);
+ up_write(&nm_i->nat_tree_lock);
+ }
+
if (!nm_i->dirty_nat_cnt)
return;
@@ -2634,7 +2850,13 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
- struct page *page = get_meta_page(sbi, nat_bits_addr++);
+ struct page *page;
+
+ page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ if (IS_ERR(page)) {
+ disable_nat_bits(sbi, true);
+ return PTR_ERR(page);
+ }
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
@@ -2718,6 +2940,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
+ spin_lock_init(&nm_i->nat_list_lock);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
@@ -2753,15 +2976,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
int i;
- nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks *
- sizeof(unsigned char *), GFP_KERNEL);
+ nm_i->free_nid_bitmap =
+ f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
+ nm_i->nat_blocks),
+ GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
for (i = 0; i < nm_i->nat_blocks; i++) {
nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
- NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
- if (!nm_i->free_nid_bitmap)
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap[i])
return -ENOMEM;
}
@@ -2770,14 +2995,16 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
if (!nm_i->nat_block_bitmap)
return -ENOMEM;
- nm_i->free_nid_count = f2fs_kvzalloc(sbi, nm_i->nat_blocks *
- sizeof(unsigned short), GFP_KERNEL);
+ nm_i->free_nid_count =
+ f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short),
+ nm_i->nat_blocks),
+ GFP_KERNEL);
if (!nm_i->free_nid_count)
return -ENOMEM;
return 0;
}
-int build_node_manager(struct f2fs_sb_info *sbi)
+int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
{
int err;
@@ -2797,11 +3024,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
/* load free nid status from nat_bits table */
load_free_nid_bitmap(sbi);
- build_free_nids(sbi, true, true);
- return 0;
+ return f2fs_build_free_nids(sbi, true, true);
}
-void destroy_node_manager(struct f2fs_sb_info *sbi)
+void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next_i;
@@ -2833,8 +3059,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
- for (idx = 0; idx < found; idx++)
+ for (idx = 0; idx < found; idx++) {
+ spin_lock(&nm_i->nat_list_lock);
+ list_del(&natvec[idx]->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, natvec[idx]);
+ }
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
@@ -2873,7 +3104,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
kfree(nm_i);
}
-int __init create_node_manager_caches(void)
+int __init f2fs_create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry));
@@ -2889,8 +3120,15 @@ int __init create_node_manager_caches(void)
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
+
+ fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+ sizeof(struct fsync_node_entry));
+ if (!fsync_node_entry_slab)
+ goto destroy_nat_entry_set;
return 0;
+destroy_nat_entry_set:
+ kmem_cache_destroy(nat_entry_set_slab);
destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
destroy_nat_entry:
@@ -2899,8 +3137,9 @@ fail:
return -ENOMEM;
}
-void destroy_node_manager_caches(void)
+void f2fs_destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(fsync_node_entry_slab);
kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b95e49e4a928..0f4db7a61254 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -135,6 +135,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
+static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
+{
+ return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
+}
+
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@@ -444,6 +449,10 @@ static inline void set_mark(struct page *page, int mark, int type)
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4ddc2262baf1..501bb0fdda1b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -47,7 +47,7 @@
static struct kmem_cache *fsync_entry_slab;
-bool space_for_roll_forward(struct f2fs_sb_info *sbi)
+bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
{
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
@@ -162,7 +162,7 @@ retry:
goto out_put;
}
- err = acquire_orphan_inode(F2FS_I_SB(inode));
+ err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode));
if (err) {
iput(einode);
goto out_put;
@@ -173,7 +173,7 @@ retry:
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
- err = __f2fs_do_add_link(dir, &fname, inode,
+ err = f2fs_add_dentry(dir, &fname, inode,
inode->i_ino, inode->i_mode);
}
if (err == -ENOMEM)
@@ -204,8 +204,6 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
set_inode_flag(inode, FI_DATA_EXIST);
else
clear_inode_flag(inode, FI_DATA_EXIST);
- if (!(ri->i_inline & F2FS_INLINE_DOTS))
- clear_inode_flag(inode, FI_INLINE_DOTS);
}
static void recover_inode(struct inode *inode, struct page *page)
@@ -243,8 +241,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
- unsigned int free_blocks = sbi->user_block_count -
- valid_user_blocks(sbi);
+ unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
+ valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
@@ -254,10 +252,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
- page = get_tmp_page(sbi, blkaddr);
+ page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page))
break;
@@ -271,7 +273,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
- err = recover_inode_page(sbi, page);
+ err = f2fs_recover_inode_page(sbi, page);
if (err)
break;
quota_inode = true;
@@ -312,7 +314,7 @@ next:
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
- ra_meta_pages_cond(sbi, blkaddr);
+ f2fs_ra_meta_pages_cond(sbi, blkaddr);
}
f2fs_put_page(page, 1);
return err;
@@ -355,7 +357,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
}
}
- sum_page = get_sum_page(sbi, segno);
+ sum_page = f2fs_get_sum_page(sbi, segno);
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
sum = sum_node->entries[blkoff];
f2fs_put_page(sum_page, 1);
@@ -375,7 +377,7 @@ got_it:
}
/* Get the node page */
- node_page = get_node_page(sbi, nid);
+ node_page = f2fs_get_node_page(sbi, nid);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
@@ -400,7 +402,8 @@ got_it:
inode = dn->inode;
}
- bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
+ bidx = f2fs_start_bidx_of_node(offset, inode) +
+ le16_to_cpu(sum.ofs_in_node);
/*
* if inode page is locked, unlock temporarily, but its reference
@@ -410,11 +413,11 @@ got_it:
unlock_page(dn->inode_page);
set_new_dnode(&tdn, inode, NULL, NULL, 0);
- if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
+ if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
goto out;
if (tdn.data_blkaddr == blkaddr)
- truncate_data_blocks_range(&tdn, 1);
+ f2fs_truncate_data_blocks_range(&tdn, 1);
f2fs_put_dnode(&tdn);
out:
@@ -427,7 +430,7 @@ out:
truncate_out:
if (datablock_addr(tdn.inode, tdn.node_page,
tdn.ofs_in_node) == blkaddr)
- truncate_data_blocks_range(&tdn, 1);
+ f2fs_truncate_data_blocks_range(&tdn, 1);
if (dn->inode->i_ino == nid && !dn->inode_page_locked)
unlock_page(dn->inode_page);
return 0;
@@ -443,25 +446,25 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
/* step 1: recover xattr */
if (IS_INODE(page)) {
- recover_inline_xattr(inode, page);
+ f2fs_recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
- err = recover_xattr_data(inode, page);
+ err = f2fs_recover_xattr_data(inode, page);
if (!err)
recovered++;
goto out;
}
/* step 2: recover inline data */
- if (recover_inline_data(inode, page))
+ if (f2fs_recover_inline_data(inode, page))
goto out;
/* step 3: recover data indices */
- start = start_bidx_of_node(ofs_of_node(page), inode);
+ start = f2fs_start_bidx_of_node(ofs_of_node(page), inode);
end = start + ADDRS_PER_PAGE(page, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
retry_dn:
- err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -472,7 +475,10 @@ retry_dn:
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
- get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err)
+ goto err;
+
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
@@ -488,7 +494,7 @@ retry_dn:
/* dest is invalid, just invalidate src block */
if (dest == NULL_ADDR) {
- truncate_data_blocks_range(&dn, 1);
+ f2fs_truncate_data_blocks_range(&dn, 1);
continue;
}
@@ -502,20 +508,19 @@ retry_dn:
* and then reserve one new block in dnode page.
*/
if (dest == NEW_ADDR) {
- truncate_data_blocks_range(&dn, 1);
- reserve_new_block(&dn);
+ f2fs_truncate_data_blocks_range(&dn, 1);
+ f2fs_reserve_new_block(&dn);
continue;
}
/* dest is valid block, try to recover from src to dest */
- if (is_valid_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
- err = reserve_new_block(&dn);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- while (err)
- err = reserve_new_block(&dn);
-#endif
+ err = f2fs_reserve_new_block(&dn);
+ while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
+ err = f2fs_reserve_new_block(&dn);
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
@@ -569,12 +574,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
- ra_meta_pages_cond(sbi, blkaddr);
+ f2fs_ra_meta_pages_cond(sbi, blkaddr);
- page = get_tmp_page(sbi, blkaddr);
+ page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
@@ -612,11 +621,11 @@ next:
f2fs_put_page(page, 1);
}
if (!err)
- allocate_new_segments(sbi);
+ f2fs_allocate_new_segments(sbi);
return err;
}
-int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
+int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
struct list_head inode_list;
struct list_head dir_list;
@@ -629,7 +638,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & MS_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
@@ -691,7 +701,7 @@ skip:
struct cp_control cpc = {
.reason = CP_RECOVERY,
};
- err = write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
}
kmem_cache_destroy(fsync_entry_slab);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1c7b75a760da..fdc17721e41e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -169,7 +169,7 @@ found:
return result - size + __reverse_ffz(tmp);
}
-bool need_SSR(struct f2fs_sb_info *sbi)
+bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
@@ -177,14 +177,14 @@ bool need_SSR(struct f2fs_sb_info *sbi)
if (test_opt(sbi, LFS))
return false;
- if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+ if (sbi->gc_mode == GC_URGENT)
return true;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
}
-void register_inmem_page(struct inode *inode, struct page *page)
+void f2fs_register_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -230,6 +230,8 @@ static int __revoke_inmem_pages(struct inode *inode,
lock_page(page);
+ f2fs_wait_on_page_writeback(page, DATA, true);
+
if (recover) {
struct dnode_of_data dn;
struct node_info ni;
@@ -237,7 +239,8 @@ static int __revoke_inmem_pages(struct inode *inode,
trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
retry:
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, page->index,
+ LOOKUP_NODE);
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -247,9 +250,15 @@ retry:
err = -EAGAIN;
goto next;
}
- get_node_info(sbi, dn.nid, &ni);
+
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
+
if (cur->old_addr == NEW_ADDR) {
- invalidate_blocks(sbi, dn.data_blkaddr);
+ f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
} else
f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
@@ -271,7 +280,7 @@ next:
return err;
}
-void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
+void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
{
struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
struct inode *inode;
@@ -287,15 +296,23 @@ next:
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
if (inode) {
- drop_inmem_pages(inode);
+ if (gc_failure) {
+ if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
+ goto drop;
+ goto skip;
+ }
+drop:
+ set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+ f2fs_drop_inmem_pages(inode);
iput(inode);
}
+skip:
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
goto next;
}
-void drop_inmem_pages(struct inode *inode)
+void f2fs_drop_inmem_pages(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -309,11 +326,11 @@ void drop_inmem_pages(struct inode *inode)
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_FILE);
- clear_inode_flag(inode, FI_HOT_DATA);
+ fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
}
-void drop_inmem_page(struct inode *inode, struct page *page)
+void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -328,7 +345,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
break;
}
- f2fs_bug_on(sbi, !cur || cur->page != page);
+ f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
list_del(&cur->list);
mutex_unlock(&fi->inmem_lock);
@@ -343,8 +360,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}
-static int __commit_inmem_pages(struct inode *inode,
- struct list_head *revoke_list)
+static int __f2fs_commit_inmem_pages(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -357,9 +373,12 @@ static int __commit_inmem_pages(struct inode *inode,
.op_flags = REQ_SYNC | REQ_PRIO,
.io_type = FS_DATA_IO,
};
+ struct list_head revoke_list;
pgoff_t last_idx = ULONG_MAX;
int err = 0;
+ INIT_LIST_HEAD(&revoke_list);
+
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
struct page *page = cur->page;
@@ -371,14 +390,14 @@ static int __commit_inmem_pages(struct inode *inode,
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
- remove_dirty_inode(inode);
+ f2fs_remove_dirty_inode(inode);
}
retry:
fio.page = page;
fio.old_blkaddr = NULL_ADDR;
fio.encrypted_page = NULL;
fio.need_lock = LOCK_DONE;
- err = do_write_data_page(&fio);
+ err = f2fs_do_write_data_page(&fio);
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -393,50 +412,48 @@ retry:
last_idx = page->index;
}
unlock_page(page);
- list_move_tail(&cur->list, revoke_list);
+ list_move_tail(&cur->list, &revoke_list);
}
if (last_idx != ULONG_MAX)
f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
- if (!err)
- __revoke_inmem_pages(inode, revoke_list, false, false);
+ if (err) {
+ /*
+ * try to revoke all committed pages, but still we could fail
+ * due to no memory or other reason, if that happened, EAGAIN
+ * will be returned, which means in such case, transaction is
+ * already not integrity, caller should use journal to do the
+ * recovery or rewrite & commit last transaction. For other
+ * error number, revoking was done by filesystem itself.
+ */
+ err = __revoke_inmem_pages(inode, &revoke_list, false, true);
+
+ /* drop all uncommitted pages */
+ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ } else {
+ __revoke_inmem_pages(inode, &revoke_list, false, false);
+ }
return err;
}
-int commit_inmem_pages(struct inode *inode)
+int f2fs_commit_inmem_pages(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct list_head revoke_list;
int err;
- INIT_LIST_HEAD(&revoke_list);
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
+ down_write(&fi->i_gc_rwsem[WRITE]);
+
+ f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
mutex_lock(&fi->inmem_lock);
- err = __commit_inmem_pages(inode, &revoke_list);
- if (err) {
- int ret;
- /*
- * try to revoke all committed pages, but still we could fail
- * due to no memory or other reason, if that happened, EAGAIN
- * will be returned, which means in such case, transaction is
- * already not integrity, caller should use journal to do the
- * recovery or rewrite & commit last transaction. For other
- * error number, revoking was done by filesystem itself.
- */
- ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
- if (ret)
- err = ret;
+ err = __f2fs_commit_inmem_pages(inode);
- /* drop all uncommitted pages */
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
- }
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
@@ -446,6 +463,8 @@ int commit_inmem_pages(struct inode *inode)
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+
return err;
}
@@ -455,12 +474,10 @@ int commit_inmem_pages(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
@@ -482,32 +499,34 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
return;
/* try to shrink extent cache when there is no enough memory */
- if (!available_free_memory(sbi, EXTENT_CACHE))
+ if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
/* check the # of cached NAT entries */
- if (!available_free_memory(sbi, NAT_ENTRIES))
- try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
+ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
+ f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
- if (!available_free_memory(sbi, FREE_NIDS))
- try_to_free_nids(sbi, MAX_FREE_NIDS);
+ if (!f2fs_available_free_memory(sbi, FREE_NIDS))
+ f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
else
- build_free_nids(sbi, false, false);
+ f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) && !excess_dirty_nats(sbi))
+ if (!is_idle(sbi) &&
+ (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
/* checkpoint is the only way to shrink partial cached entries */
- if (!available_free_memory(sbi, NAT_ENTRIES) ||
- !available_free_memory(sbi, INO_ENTRIES) ||
+ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
+ !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
+ excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
blk_start_plug(&plug);
- sync_dirty_inodes(sbi, FILE_INODE);
+ f2fs_sync_dirty_inodes(sbi, FILE_INODE);
blk_finish_plug(&plug);
}
f2fs_sync_fs(sbi->sb, true);
@@ -540,7 +559,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
- if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
+ if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
continue;
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
if (ret)
@@ -651,7 +670,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return cmd.ret;
}
-int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
struct flush_cmd_control *fcc;
@@ -688,7 +707,7 @@ init_thread:
return err;
}
-void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
+void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
@@ -822,9 +841,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
+ dc->issuing = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
+ spin_lock_init(&dc->lock);
+ dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
@@ -851,7 +873,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_dec(&dcc->issing_discard);
+ atomic_sub(dc->issuing, &dcc->issing_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
@@ -866,9 +888,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->bio_ref) {
+ spin_unlock_irqrestore(&dc->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
@@ -884,10 +914,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+ unsigned long flags;
dc->error = bio->bi_error;
- dc->state = D_DONE;
- complete_all(&dc->wait);
+
+ spin_lock_irqsave(&dc->lock, flags);
+ dc->bio_ref--;
+ if (!dc->bio_ref && dc->state == D_SUBMIT) {
+ dc->state = D_DONE;
+ complete_all(&dc->wait);
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio);
}
@@ -1006,6 +1043,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
+ dpolicy->ordered = false;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
@@ -1013,62 +1051,136 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
+ dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
}
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
+ dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
}
}
-
+static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t lstart,
+ block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
-static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
+static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
- struct discard_cmd *dc)
+ struct discard_cmd *dc,
+ unsigned int *issued)
{
+ struct block_device *bdev = dc->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0;
+ block_t lstart, start, len, total_len;
+ int err = 0;
if (dc->state != D_PREP)
- return;
+ return 0;
- trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return 0;
- dc->error = __blkdev_issue_discard(dc->bdev,
- SECTOR_FROM_BLOCK(dc->start),
- SECTOR_FROM_BLOCK(dc->len),
- GFP_NOFS, 0, &bio);
- if (!dc->error) {
- /* should keep before submission to avoid D_DONE right away */
- dc->state = D_SUBMIT;
- atomic_inc(&dcc->issued_discard);
- atomic_inc(&dcc->issing_discard);
- if (bio) {
- bio->bi_private = dc;
- bio->bi_end_io = f2fs_submit_discard_endio;
- submit_bio(flag, bio);
- list_move_tail(&dc->list, wait_list);
- __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
+ trace_f2fs_issue_discard(bdev, dc->start, dc->len);
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ lstart = dc->lstart;
+ start = dc->start;
+ len = dc->len;
+ total_len = len;
+
+ dc->len = 0;
+
+ while (total_len && *issued < dpolicy->max_requests && !err) {
+ struct bio *bio = NULL;
+ unsigned long flags;
+ bool last = true;
+
+ if (len > max_discard_blocks) {
+ len = max_discard_blocks;
+ last = false;
}
- } else {
- __remove_discard_cmd(sbi, dc);
+
+ (*issued)++;
+ if (*issued == dpolicy->max_requests)
+ last = true;
+
+ dc->len += len;
+
+ if (time_to_inject(sbi, FAULT_DISCARD)) {
+ f2fs_show_injection_info(FAULT_DISCARD);
+ err = -EIO;
+ goto submit;
+ }
+ err = __blkdev_issue_discard(bdev,
+ SECTOR_FROM_BLOCK(start),
+ SECTOR_FROM_BLOCK(len),
+ GFP_NOFS, 0, &bio);
+submit:
+ if (err) {
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->state == D_PARTIAL)
+ dc->state = D_SUBMIT;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ break;
+ }
+
+ f2fs_bug_on(sbi, !bio);
+
+ /*
+ * should keep before submission to avoid D_DONE
+ * right away
+ */
+ spin_lock_irqsave(&dc->lock, flags);
+ if (last)
+ dc->state = D_SUBMIT;
+ else
+ dc->state = D_PARTIAL;
+ dc->bio_ref++;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ atomic_inc(&dcc->issing_discard);
+ dc->issuing++;
+ list_move_tail(&dc->list, wait_list);
+
+ /* sanity check on discard range */
+ __check_sit_bitmap(sbi, start, start + len);
+
+ bio->bi_private = dc;
+ bio->bi_end_io = f2fs_submit_discard_endio;
+ submit_bio(flag, bio);
+
+ atomic_inc(&dcc->issued_discard);
+
+ f2fs_update_iostat(sbi, FS_DISCARD, 1);
+
+ lstart += len;
+ start += len;
+ total_len -= len;
+ len = total_len;
}
+
+ if (!err && len)
+ __update_discard_tree_range(sbi, bdev, lstart, start, len);
+ return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
@@ -1088,7 +1200,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
goto do_insert;
}
- p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
do_insert:
dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
if (!dc)
@@ -1149,11 +1261,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len;
- mutex_lock(&dcc->cmd_lock);
-
- dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
@@ -1192,7 +1305,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
- __is_discard_back_mergeable(&di, &prev_dc->di)) {
+ __is_discard_back_mergeable(&di, &prev_dc->di,
+ max_discard_blocks)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
@@ -1203,7 +1317,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
- __is_discard_front_mergeable(&di, &next_dc->di)) {
+ __is_discard_front_mergeable(&di, &next_dc->di,
+ max_discard_blocks)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
@@ -1226,8 +1341,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
-
- mutex_unlock(&dcc->cmd_lock);
}
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1242,10 +1355,72 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
blkstart -= FDEV(devi).start_blk;
}
+ mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
+ mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0;
}
+static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ unsigned int pos = dcc->next_pos;
+ unsigned int issued = 0;
+ bool io_interrupted = false;
+
+ mutex_lock(&dcc->cmd_lock);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
+ NULL, pos,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc) {
+ struct rb_node *node;
+ int err = 0;
+
+ if (dc->state != D_PREP)
+ goto next;
+
+ if (dpolicy->io_aware && !is_idle(sbi)) {
+ io_interrupted = true;
+ break;
+ }
+
+ dcc->next_pos = dc->lstart + dc->len;
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
+ break;
+next:
+ node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+ }
+
+ blk_finish_plug(&plug);
+
+ if (!dc)
+ dcc->next_pos = 0;
+
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (!issued && io_interrupted)
+ issued = -1;
+
+ return issued;
+}
+
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@@ -1253,18 +1428,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, iter = 0, issued = 0;
+ int i, issued = 0;
bool io_interrupted = false;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
+
+ if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ return __issue_discard_cmd_orderly(sbi, dpolicy);
+
pend_list = &dcc->pend_list[i];
mutex_lock(&dcc->cmd_lock);
if (list_empty(pend_list))
goto next;
- f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1272,20 +1453,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi)) {
io_interrupted = true;
- goto skip;
+ break;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
- issued++;
-skip:
- if (++iter >= dpolicy->max_requests)
+ __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
break;
}
blk_finish_plug(&plug);
next:
mutex_unlock(&dcc->cmd_lock);
- if (iter >= dpolicy->max_requests)
+ if (issued >= dpolicy->max_requests || io_interrupted)
break;
}
@@ -1317,7 +1497,7 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
return dropped;
}
-void drop_discard_cmd(struct f2fs_sb_info *sbi)
+void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
{
__drop_discard_cmd(sbi);
}
@@ -1383,21 +1563,22 @@ next:
return trimmed;
}
-static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_policy dp;
+ unsigned int discard_blks;
- if (dpolicy) {
- __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
- return;
- }
+ if (dpolicy)
+ return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+
+ return discard_blks;
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1408,7 +1589,8 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
bool need_wait = false;
mutex_lock(&dcc->cmd_lock);
- dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
+ NULL, blkaddr);
if (dc) {
if (dc->state == D_PREP) {
__punch_discard_cmd(sbi, dc, blkaddr);
@@ -1423,7 +1605,7 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
__wait_one_discard_bio(sbi, dc);
}
-void stop_discard_thread(struct f2fs_sb_info *sbi)
+void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
@@ -1449,6 +1631,8 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
+
+ f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
return dropped;
}
@@ -1471,25 +1655,32 @@ static int issue_discard_thread(void *data)
kthread_should_stop() || freezing(current) ||
dcc->discard_wake,
msecs_to_jiffies(wait_ms));
+
+ if (dcc->discard_wake)
+ dcc->discard_wake = 0;
+
if (try_to_freeze())
continue;
if (f2fs_readonly(sbi->sb))
continue;
if (kthread_should_stop())
return 0;
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ wait_ms = dpolicy.max_interval;
+ continue;
+ }
- if (dcc->discard_wake)
- dcc->discard_wake = 0;
-
- if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+ if (sbi->gc_mode == GC_URGENT)
__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
sb_start_intwrite(sbi->sb);
issued = __issue_discard_cmd(sbi, &dpolicy);
- if (issued) {
+ if (issued > 0) {
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
+ } else if (issued == -1){
+ wait_ms = dpolicy.mid_interval;
} else {
wait_ms = dpolicy.max_interval;
}
@@ -1658,20 +1849,24 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
return false;
}
-void release_discard_addrs(struct f2fs_sb_info *sbi)
+static void release_discard_addr(struct discard_entry *entry)
+{
+ list_del(&entry->list);
+ kmem_cache_free(discard_entry_slab, entry);
+}
+
+void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
{
struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
struct discard_entry *entry, *this;
/* drop caches */
- list_for_each_entry_safe(entry, this, head, list) {
- list_del(&entry->list);
- kmem_cache_free(discard_entry_slab, entry);
- }
+ list_for_each_entry_safe(entry, this, head, list)
+ release_discard_addr(entry);
}
/*
- * Should call clear_prefree_segments after checkpoint is done.
+ * Should call f2fs_clear_prefree_segments after checkpoint is done.
*/
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
@@ -1684,7 +1879,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
}
-void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *head = &dcc->entry_list;
@@ -1694,21 +1890,30 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
+
+ if (need_align && end != -1)
+ end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- for (i = start; i < end; i++)
- clear_bit(i, prefree_map);
+ if (need_align) {
+ start = rounddown(start, sbi->segs_per_sec);
+ end = roundup(end, sbi->segs_per_sec);
+ }
- dirty_i->nr_dirty[PRE] -= end - start;
+ for (i = start; i < end; i++) {
+ if (test_and_clear_bit(i, prefree_map))
+ dirty_i->nr_dirty[PRE]--;
+ }
if (!test_opt(sbi, DISCARD))
continue;
@@ -1767,9 +1972,8 @@ skip:
if (cur_pos < sbi->blocks_per_seg)
goto find_next;
- list_del(&entry->list);
+ release_discard_addr(entry);
dcc->nr_discards -= total_len;
- kmem_cache_free(discard_entry_slab, entry);
}
wake_up_discard_thread(sbi, false);
@@ -1803,7 +2007,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
+ dcc->next_pos = 0;
dcc->root = RB_ROOT;
+ dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
@@ -1827,7 +2033,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
if (!dcc)
return;
- stop_discard_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
@@ -1874,8 +2080,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
(new_vblocks > sbi->blocks_per_seg)));
se->valid_blocks = new_vblocks;
- se->mtime = get_mtime(sbi);
- SIT_I(sbi)->max_mtime = se->mtime;
+ se->mtime = get_mtime(sbi, false);
+ if (se->mtime > SIT_I(sbi)->max_mtime)
+ SIT_I(sbi)->max_mtime = se->mtime;
/* Update valid block bitmap */
if (del > 0) {
@@ -1943,7 +2150,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
+void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
{
unsigned int segno = GET_SEGNO(sbi, addr);
struct sit_info *sit_i = SIT_I(sbi);
@@ -1952,6 +2159,8 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
if (addr == NEW_ADDR)
return;
+ invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
+
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
@@ -1963,14 +2172,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
up_write(&sit_i->sentry_lock);
}
-bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
+bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, offset;
struct seg_entry *se;
bool is_cp = false;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -2002,7 +2211,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
/*
* Calculate the number of current summary pages for writing
*/
-int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
+int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
{
int valid_sum_count = 0;
int i, sum_in_page;
@@ -2032,14 +2241,15 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
/*
* Caller should put this summary page
*/
-struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
+struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
}
-void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
+void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
+ void *src, block_t blk_addr)
{
- struct page *page = grab_meta_page(sbi, blk_addr);
+ struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
memcpy(page_address(page), src, PAGE_SIZE);
set_page_dirty(page);
@@ -2049,18 +2259,19 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
static void write_sum_page(struct f2fs_sb_info *sbi,
struct f2fs_summary_block *sum_blk, block_t blk_addr)
{
- update_meta_page(sbi, (void *)sum_blk, blk_addr);
+ f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
}
static void write_current_sum_page(struct f2fs_sb_info *sbi,
int type, block_t blk_addr)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
- struct page *page = grab_meta_page(sbi, blk_addr);
+ struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
struct f2fs_summary_block *src = curseg->sum_blk;
struct f2fs_summary_block *dst;
dst = (struct f2fs_summary_block *)page_address(page);
+ memset(dst, 0, PAGE_SIZE);
mutex_lock(&curseg->curseg_mutex);
@@ -2300,7 +2511,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = SSR;
__next_free_blkoff(sbi, curseg, 0);
- sum_page = get_sum_page(sbi, new_segno);
+ sum_page = f2fs_get_sum_page(sbi, new_segno);
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
@@ -2314,7 +2525,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
int i, cnt;
bool reversed = false;
- /* need_SSR() already forces to do this */
+ /* f2fs_need_SSR() already forces to do this */
if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
curseg->next_segno = segno;
return 1;
@@ -2366,7 +2577,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
new_curseg(sbi, type, false);
else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
new_curseg(sbi, type, false);
- else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
+ else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type);
else
new_curseg(sbi, type, false);
@@ -2374,7 +2585,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
-void allocate_new_segments(struct f2fs_sb_info *sbi)
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
unsigned int old_segno;
@@ -2396,7 +2607,8 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
-bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
{
__u64 trim_start = cpc->trim_start;
bool has_candidate = false;
@@ -2414,7 +2626,7 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return has_candidate;
}
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
@@ -2424,14 +2636,17 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
+ unsigned int trimmed = 0;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
- dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
@@ -2443,6 +2658,7 @@ next:
while (dc && dc->lstart <= end) {
struct rb_node *node;
+ int err = 0;
if (dc->len < dpolicy->granularity)
goto skip;
@@ -2452,19 +2668,24 @@ next:
goto skip;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
- if (++issued >= dpolicy->max_requests) {
+ if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
- __wait_all_discard_cmd(sbi, NULL);
+ trimmed += __wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
@@ -2473,6 +2694,8 @@ skip:
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
+
+ return trimmed;
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
@@ -2485,23 +2708,28 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- if (end <= MAIN_BLKADDR(sbi))
+ if (end < MAIN_BLKADDR(sbi))
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Found FS corruption, run fsck to fix.");
- goto out;
+ return -EIO;
}
/* start/end segment number in main_area */
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+ if (need_align) {
+ start_segno = rounddown(start_segno, sbi->segs_per_sec);
+ end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+ }
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
@@ -2512,29 +2740,32 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
goto out;
mutex_lock(&sbi->gc_mutex);
- err = write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
if (err)
goto out;
- start_block = START_BLOCK(sbi, start_segno);
- end_block = START_BLOCK(sbi, end_segno + 1);
-
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
- __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
-
/*
* We filed discard candidates, but actually we don't need to wait for
* all of them, since they'll be issued in idle time along with runtime
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (!test_opt(sbi, DISCARD)) {
- trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ if (test_opt(sbi, DISCARD))
+ goto out;
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, end_segno + 1);
+
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
+
+ trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
- range->len = F2FS_BLK_TO_BYTES(trimmed);
- }
out:
+ if (!err)
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2546,7 +2777,7 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
return false;
}
-int rw_hint_to_seg_type(enum rw_hint hint)
+int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
{
switch (hint) {
case WRITE_LIFE_SHORT:
@@ -2619,7 +2850,7 @@ int rw_hint_to_seg_type(enum rw_hint hint)
* WRITE_LIFE_LONG " WRITE_LIFE_LONG
*/
-enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
@@ -2686,9 +2917,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (is_cold_data(fio->page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
- is_inode_flag_set(inode, FI_HOT_DATA))
+ is_inode_flag_set(inode, FI_HOT_DATA) ||
+ f2fs_is_atomic_file(inode) ||
+ f2fs_is_volatile_file(inode))
return CURSEG_HOT_DATA;
- /* rw_hint_to_seg_type(inode->i_write_hint); */
+ /* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */
return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(fio->page))
@@ -2725,7 +2958,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
return type;
}
-void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list)
@@ -2785,6 +3018,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
+ fio->retry = false;
io = sbi->write_io[fio->type] + fio->temp;
spin_lock(&io->io_lock);
list_add_tail(&fio->list, &io->io_list);
@@ -2807,7 +3041,7 @@ static void update_device_state(struct f2fs_io_info *fio)
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
/* update device state for fsync */
- set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+ f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
/* update device state for checkpoint */
if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
@@ -2820,23 +3054,31 @@ static void update_device_state(struct f2fs_io_info *fio)
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio);
- int err;
+ bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
+ if (keep_order)
+ down_read(&fio->sbi->io_order_lock);
reallocate:
- allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio, true);
+ if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(fio->sbi),
+ fio->old_blkaddr, fio->old_blkaddr);
/* writeout dirty page into bdev */
- err = f2fs_submit_page_write(fio);
- if (err == -EAGAIN) {
+ f2fs_submit_page_write(fio);
+ if (fio->retry) {
fio->old_blkaddr = fio->new_blkaddr;
goto reallocate;
- } else if (!err) {
- update_device_state(fio);
}
+
+ update_device_state(fio);
+
+ if (keep_order)
+ up_read(&fio->sbi->io_order_lock);
}
-void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
enum iostat_type io_type)
{
struct f2fs_io_info fio = {
@@ -2862,7 +3104,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
}
-void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
+void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
{
struct f2fs_summary sum;
@@ -2872,22 +3114,21 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
}
-void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
+void f2fs_outplace_write_data(struct dnode_of_data *dn,
+ struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
- struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
- get_node_info(sbi, dn->nid, &ni);
- set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
}
-int rewrite_data_page(struct f2fs_io_info *fio)
+int f2fs_inplace_write_data(struct f2fs_io_info *fio)
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
@@ -2922,7 +3163,7 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
return i;
}
-void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr)
{
@@ -2977,8 +3218,11 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
update_sit_entry(sbi, old_blkaddr, -1);
+ }
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
@@ -3007,7 +3251,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
- __f2fs_replace_block(sbi, &sum, old_addr, new_addr,
+ f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
recover_curseg, recover_newaddr);
f2fs_update_data_blkaddr(dn, new_addr);
@@ -3032,7 +3276,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct page *cpage;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
@@ -3042,7 +3286,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
}
}
-static void read_compacted_summaries(struct f2fs_sb_info *sbi)
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *seg_i;
@@ -3053,7 +3297,9 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
start = start_sum_block(sbi);
- page = get_meta_page(sbi, start++);
+ page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
/* Step 1: restore nat cache */
@@ -3093,12 +3339,15 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
page = NULL;
- page = get_meta_page(sbi, start++);
+ page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
offset = 0;
}
}
f2fs_put_page(page, 1);
+ return 0;
}
static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -3110,6 +3359,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
unsigned short blk_off;
unsigned int segno = 0;
block_t blk_addr = 0;
+ int err = 0;
/* get segment number and block addr */
if (IS_DATASEG(type)) {
@@ -3132,7 +3382,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
blk_addr = GET_SUM_BLOCK(sbi, segno);
}
- new = get_meta_page(sbi, blk_addr);
+ new = f2fs_get_meta_page(sbi, blk_addr);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
@@ -3144,7 +3396,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- restore_node_summary(sbi, segno, sum);
+ err = f2fs_restore_node_summary(sbi, segno, sum);
+ if (err)
+ goto out;
}
}
@@ -3164,8 +3418,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = ckpt->alloc_type[type];
curseg->next_blkoff = blk_off;
mutex_unlock(&curseg->curseg_mutex);
+out:
f2fs_put_page(new, 1);
- return 0;
+ return err;
}
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
@@ -3176,19 +3431,21 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
int err;
if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
- int npages = npages_for_summary_flush(sbi, true);
+ int npages = f2fs_npages_for_summary_flush(sbi, true);
if (npages >= 2)
- ra_meta_pages(sbi, start_sum_block(sbi), npages,
+ f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
META_CP, true);
/* restore for compacted data summary */
- read_compacted_summaries(sbi);
+ err = read_compacted_summaries(sbi);
+ if (err)
+ return err;
type = CURSEG_HOT_NODE;
}
if (__exist_node_summaries(sbi))
- ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
+ f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
NR_CURSEG_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
@@ -3214,8 +3471,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
int written_size = 0;
int i, j;
- page = grab_meta_page(sbi, blkaddr++);
+ page = f2fs_grab_meta_page(sbi, blkaddr++);
kaddr = (unsigned char *)page_address(page);
+ memset(kaddr, 0, PAGE_SIZE);
/* Step 1: write nat cache */
seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -3238,8 +3496,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
for (j = 0; j < blkoff; j++) {
if (!page) {
- page = grab_meta_page(sbi, blkaddr++);
+ page = f2fs_grab_meta_page(sbi, blkaddr++);
kaddr = (unsigned char *)page_address(page);
+ memset(kaddr, 0, PAGE_SIZE);
written_size = 0;
}
summary = (struct f2fs_summary *)(kaddr + written_size);
@@ -3274,7 +3533,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
write_current_sum_page(sbi, i, blkaddr + (i - type));
}
-void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
write_compacted_summaries(sbi, start_blk);
@@ -3282,12 +3541,12 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
}
-void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
}
-int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc)
{
int i;
@@ -3312,7 +3571,7 @@ int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return get_meta_page(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -3325,7 +3584,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
src_off = current_sit_addr(sbi, start);
dst_off = next_sit_addr(sbi, src_off);
- page = grab_meta_page(sbi, dst_off);
+ page = f2fs_grab_meta_page(sbi, dst_off);
seg_info_to_sit_page(sbi, page, start);
set_page_dirty(page);
@@ -3421,7 +3680,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
* CP calls this function, which flushes SIT entries including sit_journal,
* and moves prefree segs to free segs.
*/
-void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
@@ -3480,6 +3739,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int offset, sit_offset;
se = get_seg_entry(sbi, segno);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
+ SIT_VBLOCK_MAP_SIZE))
+ f2fs_bug_on(sbi, 1);
+#endif
/* add discard candidates */
if (!(cpc->reason & CP_DISCARD)) {
@@ -3488,17 +3752,21 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (to_journal) {
- offset = lookup_journal_in_cursum(journal,
+ offset = f2fs_lookup_journal_in_cursum(journal,
SIT_JOURNAL, segno, 1);
f2fs_bug_on(sbi, offset < 0);
segno_in_journal(journal, offset) =
cpu_to_le32(segno);
seg_info_to_raw_sit(se,
&sit_in_journal(journal, offset));
+ check_block_count(sbi, segno,
+ &sit_in_journal(journal, offset));
} else {
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
seg_info_to_raw_sit(se,
&raw_sit->entries[sit_offset]);
+ check_block_count(sbi, segno,
+ &raw_sit->entries[sit_offset]);
}
__clear_bit(segno, bitmap);
@@ -3546,8 +3814,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
SM_I(sbi)->sit_info = sit_i;
- sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) *
- sizeof(struct seg_entry), GFP_KERNEL);
+ sit_i->sentries =
+ f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
+ MAIN_SEGS(sbi)),
+ GFP_KERNEL);
if (!sit_i->sentries)
return -ENOMEM;
@@ -3587,8 +3857,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
if (sbi->segs_per_sec > 1) {
- sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) *
- sizeof(struct sec_entry), GFP_KERNEL);
+ sit_i->sec_entries =
+ f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
+ MAIN_SECS(sbi)),
+ GFP_KERNEL);
if (!sit_i->sec_entries)
return -ENOMEM;
}
@@ -3664,7 +3936,8 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+ array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
+ GFP_KERNEL);
if (!array)
return -ENOMEM;
@@ -3697,9 +3970,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int err = 0;
+ block_t total_node_blocks = 0;
do {
- readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
+ readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
META_SIT, true);
start = start_blk * sit_i->sents_per_block;
@@ -3719,6 +3993,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (err)
return err;
seg_info_from_raw_sit(se, &sit);
+ if (IS_NODESEG(se->type))
+ total_node_blocks += se->valid_blocks;
/* build discard map only one time */
if (f2fs_discard_en(sbi)) {
@@ -3747,15 +4023,28 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int old_valid_blocks;
start = le32_to_cpu(segno_in_journal(journal, i));
+ if (start >= MAIN_SEGS(sbi)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong journal entry on segno %u",
+ start);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ break;
+ }
+
se = &sit_i->sentries[start];
sit = sit_in_journal(journal, i);
old_valid_blocks = se->valid_blocks;
+ if (IS_NODESEG(se->type))
+ total_node_blocks -= old_valid_blocks;
err = check_block_count(sbi, start, &sit);
if (err)
break;
seg_info_from_raw_sit(se, &sit);
+ if (IS_NODESEG(se->type))
+ total_node_blocks += se->valid_blocks;
if (f2fs_discard_en(sbi)) {
if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
@@ -3764,16 +4053,28 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
} else {
memcpy(se->discard_map, se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks -
- se->valid_blocks;
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
}
}
- if (sbi->segs_per_sec > 1)
+ if (sbi->segs_per_sec > 1) {
get_sec_entry(sbi, start)->valid_blocks +=
- se->valid_blocks - old_valid_blocks;
+ se->valid_blocks;
+ get_sec_entry(sbi, start)->valid_blocks -=
+ old_valid_blocks;
+ }
}
up_read(&curseg->journal_rwsem);
+
+ if (!err && total_node_blocks != valid_node_count(sbi)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "SIT is corrupted node# %u vs %u",
+ total_node_blocks, valid_node_count(sbi));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ }
+
return err;
}
@@ -3872,7 +4173,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
down_write(&sit_i->sentry_lock);
- sit_i->min_mtime = LLONG_MAX;
+ sit_i->min_mtime = ULLONG_MAX;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
unsigned int i;
@@ -3886,11 +4187,11 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
if (sit_i->min_mtime > mtime)
sit_i->min_mtime = mtime;
}
- sit_i->max_mtime = get_mtime(sbi);
+ sit_i->max_mtime = get_mtime(sbi, false);
up_write(&sit_i->sentry_lock);
}
-int build_segment_manager(struct f2fs_sb_info *sbi)
+int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -3919,6 +4220,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+ sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
@@ -3927,7 +4229,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
init_rwsem(&sm_info->curseg_lock);
if (!f2fs_readonly(sbi->sb)) {
- err = create_flush_cmd_control(sbi);
+ err = f2fs_create_flush_cmd_control(sbi);
if (err)
return err;
}
@@ -4052,13 +4354,13 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kfree(sit_i);
}
-void destroy_segment_manager(struct f2fs_sb_info *sbi)
+void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
if (!sm_info)
return;
- destroy_flush_cmd_control(sbi, true);
+ f2fs_destroy_flush_cmd_control(sbi, true);
destroy_discard_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
@@ -4068,7 +4370,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
kfree(sm_info);
}
-int __init create_segment_manager_caches(void)
+int __init f2fs_create_segment_manager_caches(void)
{
discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
sizeof(struct discard_entry));
@@ -4101,7 +4403,7 @@ fail:
return -ENOMEM;
}
-void destroy_segment_manager_caches(void)
+void f2fs_destroy_segment_manager_caches(void)
{
kmem_cache_destroy(sit_entry_set_slab);
kmem_cache_destroy(discard_cmd_slab);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 96a2d57ba8a4..b3d9e317ff0c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -215,6 +215,8 @@ struct segment_allocation {
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
+#define MAX_SKIP_GC_COUNT 16
+
struct inmem_pages {
struct list_head list;
struct page *page;
@@ -375,6 +377,7 @@ static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi,
int i;
raw_sit = (struct f2fs_sit_block *)page_address(page);
+ memset(raw_sit, 0, PAGE_SIZE);
for (i = 0; i < end - start; i++) {
rs = &raw_sit->entries[i];
se = get_seg_entry(sbi, start + i);
@@ -445,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
+ if (IS_CURSEC(sbi, secno))
+ goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
@@ -452,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
+skip_free:
spin_unlock(&free_i->segmap_lock);
}
@@ -642,13 +648,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (PAGE_TYPE_OF_BIO(fio->type) == META &&
- (!is_read_io(fio->op) || fio->is_meta))
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
- blk_addr >= MAIN_BLKADDR(sbi));
+ if (__is_meta_io(fio))
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
- BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
- blk_addr >= MAX_BLKADDR(sbi));
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
@@ -742,11 +745,23 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
#endif
}
-static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
+static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi,
+ bool base_time)
{
struct sit_info *sit_i = SIT_I(sbi);
- return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
- sit_i->mounted_time;
+ time64_t diff, now = ktime_get_real_seconds();
+
+ if (now >= sit_i->mounted_time)
+ return sit_i->elapsed_time + now - sit_i->mounted_time;
+
+ /* system time is set to the past */
+ if (!base_time) {
+ diff = sit_i->mounted_time - now;
+ if (sit_i->elapsed_time >= diff)
+ return sit_i->elapsed_time - diff;
+ return 0;
+ }
+ return sit_i->elapsed_time;
}
static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
@@ -770,15 +785,6 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
- (base + 1) + type;
}
-static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
- unsigned int secno)
-{
- if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >
- sbi->fggc_threshold)
- return true;
- return false;
-}
-
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 0b5664a1a6cc..36cfd816c160 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -109,11 +109,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
/* shrink clean nat cache entries */
if (freed < nr)
- freed += try_to_free_nats(sbi, nr - freed);
+ freed += f2fs_try_to_free_nats(sbi, nr - freed);
/* shrink free nids cache entries */
if (freed < nr)
- freed += try_to_free_nids(sbi, nr - freed);
+ freed += f2fs_try_to_free_nids(sbi, nr - freed);
spin_lock(&f2fs_list_lock);
p = p->next;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 177e773b8fc2..6b71328cbc6f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -41,7 +41,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *fault_name[FAULT_MAX] = {
+char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -55,20 +55,24 @@ char *fault_name[FAULT_MAX] = {
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
};
-static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
- unsigned int rate)
+void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
- ffi->inject_type = (1 << FAULT_MAX) - 1;
- } else {
- memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
+
+ if (type)
+ ffi->inject_type = type;
+
+ if (!rate && !type)
+ memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
#endif
@@ -113,6 +117,7 @@ enum {
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
+ Opt_fault_type,
Opt_lazytime,
Opt_nolazytime,
Opt_quota,
@@ -170,6 +175,7 @@ static match_table_t f2fs_tokens = {
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
+ {Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
@@ -348,12 +354,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return -1;
- }
return 0;
}
#endif
@@ -607,7 +607,18 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, arg);
+ f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
+ set_opt(sbi, FAULT_INJECTION);
+#else
+ f2fs_msg(sb, KERN_INFO,
+ "FAULT_INJECTION was not selected");
+#endif
+ break;
+ case Opt_fault_type:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ f2fs_build_fault_attr(sbi, 0, arg);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
@@ -776,6 +787,19 @@ static int parse_options(struct super_block *sb, char *options)
#ifdef CONFIG_QUOTA
if (f2fs_check_quota_options(sbi))
return -EINVAL;
+#else
+ if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be "
+ "mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
@@ -831,15 +855,14 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
- fi->i_current_depth = 1;
init_rwsem(&fi->i_sem);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
- init_rwsem(&fi->dio_rwsem[READ]);
- init_rwsem(&fi->dio_rwsem[WRITE]);
+ init_rwsem(&fi->i_gc_rwsem[READ]);
+ init_rwsem(&fi->i_gc_rwsem[WRITE]);
init_rwsem(&fi->i_mmap_sem);
init_rwsem(&fi->i_xattr_sem);
@@ -867,7 +890,7 @@ static int f2fs_drop_inode(struct inode *inode)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- drop_inmem_pages(inode);
+ f2fs_drop_inmem_pages(inode);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
@@ -1004,7 +1027,7 @@ static void f2fs_put_super(struct super_block *sb)
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
- write_checkpoint(sbi, &cpc);
+ f2fs_write_checkpoint(sbi, &cpc);
}
/* be sure to wait for any on-going discard commands */
@@ -1014,17 +1037,17 @@ static void f2fs_put_super(struct super_block *sb)
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
- write_checkpoint(sbi, &cpc);
+ f2fs_write_checkpoint(sbi, &cpc);
}
- /* write_checkpoint can update stat informaion */
+ /* f2fs_write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
/*
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
*/
- release_ino_entry(sbi, true);
+ f2fs_release_ino_entry(sbi, true);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
@@ -1032,12 +1055,16 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ f2fs_bug_on(sbi, sbi->fsync_node_num);
+
iput(sbi->node_inode);
iput(sbi->meta_inode);
/* destroy f2fs internal modules */
- destroy_node_manager(sbi);
- destroy_segment_manager(sbi);
+ f2fs_destroy_node_manager(sbi);
+ f2fs_destroy_segment_manager(sbi);
kfree(sbi->ckpt);
@@ -1080,7 +1107,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
cpc.reason = __get_cp_reason(sbi);
mutex_lock(&sbi->gc_mutex);
- err = write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
}
f2fs_trace_ios(NULL, 1);
@@ -1314,9 +1341,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (test_opt(sbi, FAULT_INJECTION))
+ if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
F2FS_OPTION(sbi).fault_info.inject_rate);
+ seq_printf(seq, ",fault_type=%u",
+ F2FS_OPTION(sbi).fault_info.inject_type);
+ }
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
@@ -1347,6 +1377,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
seq_printf(seq, ",fsync_mode=%s", "strict");
+ else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
+ seq_printf(seq, ",fsync_mode=%s", "nobarrier");
return 0;
}
@@ -1359,7 +1391,8 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).test_dummy_encryption = false;
- sbi->readdir_ra = 1;
+ F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1369,12 +1402,12 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_has_blkzoned(sbi->sb)) {
- set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
set_opt(sbi, DISCARD);
- } else {
+ if (f2fs_sb_has_blkzoned(sbi->sb))
+ set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
- }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1383,9 +1416,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, POSIX_ACL);
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, 0);
-#endif
+ f2fs_build_fault_attr(sbi, 0, 0);
}
#ifdef CONFIG_QUOTA
@@ -1484,11 +1515,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
if (sbi->gc_thread) {
- stop_gc_thread(sbi);
+ f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
}
} else if (!sbi->gc_thread) {
- err = start_gc_thread(sbi);
+ err = f2fs_start_gc_thread(sbi);
if (err)
goto restore_opts;
need_stop_gc = true;
@@ -1511,9 +1542,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
clear_opt(sbi, FLUSH_MERGE);
- destroy_flush_cmd_control(sbi, false);
+ f2fs_destroy_flush_cmd_control(sbi, false);
} else {
- err = create_flush_cmd_control(sbi);
+ err = f2fs_create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
}
@@ -1531,11 +1562,11 @@ skip:
return 0;
restore_gc:
if (need_restart_gc) {
- if (start_gc_thread(sbi))
+ if (f2fs_start_gc_thread(sbi))
f2fs_msg(sbi->sb, KERN_WARNING,
"background gc thread has stopped");
} else if (need_stop_gc) {
- stop_gc_thread(sbi);
+ f2fs_stop_gc_thread(sbi);
}
restore_opts:
#ifdef CONFIG_QUOTA
@@ -1807,7 +1838,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
inode = d_inode(path->dentry);
inode_lock(inode);
- F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL;
+ F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
S_NOATIME | S_IMMUTABLE);
inode_unlock(inode);
@@ -1831,7 +1862,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
goto out_put;
inode_lock(inode);
- F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL);
+ F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
@@ -1940,19 +1971,13 @@ static bool f2fs_dummy_context(struct inode *inode)
return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
}
-static unsigned f2fs_max_namelen(struct inode *inode)
-{
- return S_ISLNK(inode->i_mode) ?
- inode->i_sb->s_blocksize : F2FS_NAME_LEN;
-}
-
static const struct fscrypt_operations f2fs_cryptops = {
.key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
.dummy_context = f2fs_dummy_context,
.empty_dir = f2fs_empty_dir,
- .max_namelen = f2fs_max_namelen,
+ .max_namelen = F2FS_NAME_LEN,
};
#endif
@@ -1962,7 +1987,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- if (check_nid_range(sbi, ino))
+ if (f2fs_check_nid_range(sbi, ino))
return ERR_PTR(-ESTALE);
/*
@@ -2145,6 +2170,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
+ block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
struct super_block *sb = sbi->sb;
@@ -2201,6 +2228,72 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
+ segment_count = le32_to_cpu(raw_super->segment_count);
+ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+ total_sections = le32_to_cpu(raw_super->section_count);
+
+ /* blocks_per_seg should be 512, given the above check */
+ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment_count > F2FS_MAX_SEGMENT ||
+ segment_count < F2FS_MIN_SEGMENTS) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment count (%u)",
+ segment_count);
+ return 1;
+ }
+
+ if (total_sections > segment_count ||
+ total_sections < F2FS_MIN_SEGMENTS ||
+ segs_per_sec > segment_count || !segs_per_sec) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
+ return 1;
+ }
+
+ if ((segment_count / segs_per_sec) < total_sections) {
+ f2fs_msg(sb, KERN_INFO,
+ "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
+ return 1;
+ }
+
+ if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong segment_count / block_count (%u > %u)",
+ segment_count, le32_to_cpu(raw_super->block_count));
+ return 1;
+ }
+
+ if (secs_per_zone > total_sections || !secs_per_zone) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
+ return 1;
+ }
+ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
+ raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
+ (le32_to_cpu(raw_super->extension_count) +
+ raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) {
+ f2fs_msg(sb, KERN_INFO,
+ "Corrupted extension count (%u + %u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ raw_super->hot_ext_count,
+ F2FS_MAX_EXTENSION);
+ return 1;
+ }
+
+ if (le32_to_cpu(raw_super->cp_payload) >
+ (blocks_per_seg - F2FS_CP_PACKS)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
+ return 1;
+ }
+
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
@@ -2213,13 +2306,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
- if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
- le32_to_cpu(raw_super->segment_count));
- return 1;
- }
-
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sbi, bh))
return 1;
@@ -2227,19 +2313,27 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 0;
}
-int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
{
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
+ unsigned int sit_segs, nat_segs;
+ unsigned int sit_bitmap_size, nat_bitmap_size;
+ unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
- fsmeta += le32_to_cpu(raw_super->segment_count_sit);
- fsmeta += le32_to_cpu(raw_super->segment_count_nat);
+ sit_segs = le32_to_cpu(raw_super->segment_count_sit);
+ fsmeta += sit_segs;
+ nat_segs = le32_to_cpu(raw_super->segment_count_nat);
+ fsmeta += nat_segs;
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
@@ -2256,6 +2350,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong user_block_count: %u", user_block_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2270,6 +2374,28 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+ nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+
+ if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+ nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
+ return 1;
+ }
+
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -2308,13 +2434,15 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
- atomic_set(&sbi->wb_sync_req, 0);
+ for (i = 0; i < META; i++)
+ atomic_set(&sbi->wb_sync_req[i], 0);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
for (i = 0; i < NR_PAGE_TYPE - 1; i++)
for (j = HOT; j < NR_TEMP_TYPE; j++)
mutex_init(&sbi->wio_mutex[i][j]);
+ init_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
sbi->dirty_device = 0;
@@ -2369,8 +2497,10 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
#define F2FS_REPORT_NR_ZONES 4096
- zones = f2fs_kzalloc(sbi, sizeof(struct blk_zone) *
- F2FS_REPORT_NR_ZONES, GFP_KERNEL);
+ zones = f2fs_kzalloc(sbi,
+ array_size(F2FS_REPORT_NR_ZONES,
+ sizeof(struct blk_zone)),
+ GFP_KERNEL);
if (!zones)
return -ENOMEM;
@@ -2514,8 +2644,10 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
* Initialize multiple devices information, or single
* zoned block device information.
*/
- sbi->devs = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_info) *
- max_devices, GFP_KERNEL);
+ sbi->devs = f2fs_kzalloc(sbi,
+ array_size(max_devices,
+ sizeof(struct f2fs_dev_info)),
+ GFP_KERNEL);
if (!sbi->devs)
return -ENOMEM;
@@ -2601,6 +2733,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
+
+ sbi->readdir_ra = 1;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -2650,9 +2784,6 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
- F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
- F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
-
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -2721,6 +2852,7 @@ try_onemore:
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
+ mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -2737,9 +2869,11 @@ try_onemore:
int n = (i == META) ? 1: NR_TEMP_TYPE;
int j;
- sbi->write_io[i] = f2fs_kmalloc(sbi,
- n * sizeof(struct f2fs_bio_info),
- GFP_KERNEL);
+ sbi->write_io[i] =
+ f2fs_kmalloc(sbi,
+ array_size(n,
+ sizeof(struct f2fs_bio_info)),
+ GFP_KERNEL);
if (!sbi->write_io[i]) {
err = -ENOMEM;
goto free_options;
@@ -2779,7 +2913,7 @@ try_onemore:
goto free_io_dummy;
}
- err = get_valid_checkpoint(sbi);
+ err = f2fs_get_valid_checkpoint(sbi);
if (err) {
f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
goto free_meta_inode;
@@ -2809,18 +2943,20 @@ try_onemore:
spin_lock_init(&sbi->inode_lock[i]);
}
- init_extent_cache_info(sbi);
+ f2fs_init_extent_cache_info(sbi);
+
+ f2fs_init_ino_entry_info(sbi);
- init_ino_entry_info(sbi);
+ f2fs_init_fsync_node_info(sbi);
/* setup f2fs internal modules */
- err = build_segment_manager(sbi);
+ err = f2fs_build_segment_manager(sbi);
if (err) {
f2fs_msg(sb, KERN_ERR,
"Failed to initialize F2FS segment manager");
goto free_sm;
}
- err = build_node_manager(sbi);
+ err = f2fs_build_node_manager(sbi);
if (err) {
f2fs_msg(sb, KERN_ERR,
"Failed to initialize F2FS node manager");
@@ -2838,7 +2974,7 @@ try_onemore:
sbi->kbytes_written =
le64_to_cpu(seg_i->journal->info.kbytes_written);
- build_gc_manager(sbi);
+ f2fs_build_gc_manager(sbi);
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
@@ -2859,10 +2995,11 @@ try_onemore:
err = PTR_ERR(root);
goto free_stats;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
+ !root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_node_inode;
+ goto free_stats;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -2876,10 +3013,7 @@ try_onemore:
goto free_root_inode;
#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
+ /* Enable quota usage during mount */
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err) {
@@ -2890,7 +3024,7 @@ try_onemore:
}
#endif
/* if there are nt orphan nodes free them */
- err = recover_orphan_inodes(sbi);
+ err = f2fs_recover_orphan_inodes(sbi);
if (err)
goto free_meta;
@@ -2912,7 +3046,7 @@ try_onemore:
if (!retry)
goto skip_recovery;
- err = recover_fsync_data(sbi, false);
+ err = f2fs_recover_fsync_data(sbi, false);
if (err < 0) {
need_fsck = true;
f2fs_msg(sb, KERN_ERR,
@@ -2920,7 +3054,7 @@ try_onemore:
goto free_meta;
}
} else {
- err = recover_fsync_data(sbi, true);
+ err = f2fs_recover_fsync_data(sbi, true);
if (!f2fs_readonly(sb) && err > 0) {
err = -EINVAL;
@@ -2930,7 +3064,7 @@ try_onemore:
}
}
skip_recovery:
- /* recover_fsync_data() cleared this already */
+ /* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
/*
@@ -2939,7 +3073,7 @@ skip_recovery:
*/
if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
- err = start_gc_thread(sbi);
+ err = f2fs_start_gc_thread(sbi);
if (err)
goto free_meta;
}
@@ -2970,10 +3104,10 @@ free_meta:
#endif
f2fs_sync_inode_meta(sbi);
/*
- * Some dirty meta pages can be produced by recover_orphan_inodes()
+ * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
- * followed by write_checkpoint() through f2fs_write_node_pages(), which
- * falls into an infinite loop in sync_meta_pages().
+ * followed by f2fs_write_checkpoint() through f2fs_write_node_pages(), which
+ * falls into an infinite loop in f2fs_sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
#ifdef CONFIG_QUOTA
@@ -2986,13 +3120,13 @@ free_root_inode:
free_stats:
f2fs_destroy_stats(sbi);
free_node_inode:
- release_ino_entry(sbi, true);
+ f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
free_nm:
- destroy_node_manager(sbi);
+ f2fs_destroy_node_manager(sbi);
free_sm:
- destroy_segment_manager(sbi);
+ f2fs_destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
kfree(sbi->ckpt);
@@ -3037,9 +3171,19 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root) {
- set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
- stop_gc_thread(F2FS_SB(sb));
- stop_discard_thread(F2FS_SB(sb));
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ set_sbi_flag(sbi, SBI_IS_CLOSE);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
+
+ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ struct cp_control cpc = {
+ .reason = CP_UMOUNT,
+ };
+ f2fs_write_checkpoint(sbi, &cpc);
+ }
}
kill_block_super(sb);
}
@@ -3088,16 +3232,16 @@ static int __init init_f2fs_fs(void)
err = init_inodecache();
if (err)
goto fail;
- err = create_node_manager_caches();
+ err = f2fs_create_node_manager_caches();
if (err)
goto free_inodecache;
- err = create_segment_manager_caches();
+ err = f2fs_create_segment_manager_caches();
if (err)
goto free_node_manager_caches;
- err = create_checkpoint_caches();
+ err = f2fs_create_checkpoint_caches();
if (err)
goto free_segment_manager_caches;
- err = create_extent_cache();
+ err = f2fs_create_extent_cache();
if (err)
goto free_checkpoint_caches;
err = f2fs_init_sysfs();
@@ -3126,13 +3270,13 @@ free_shrinker:
free_sysfs:
f2fs_exit_sysfs();
free_extent_cache:
- destroy_extent_cache();
+ f2fs_destroy_extent_cache();
free_checkpoint_caches:
- destroy_checkpoint_caches();
+ f2fs_destroy_checkpoint_caches();
free_segment_manager_caches:
- destroy_segment_manager_caches();
+ f2fs_destroy_segment_manager_caches();
free_node_manager_caches:
- destroy_node_manager_caches();
+ f2fs_destroy_node_manager_caches();
free_inodecache:
destroy_inodecache();
fail:
@@ -3146,10 +3290,10 @@ static void __exit exit_f2fs_fs(void)
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
f2fs_exit_sysfs();
- destroy_extent_cache();
- destroy_checkpoint_caches();
- destroy_segment_manager_caches();
- destroy_node_manager_caches();
+ f2fs_destroy_extent_cache();
+ f2fs_destroy_checkpoint_caches();
+ f2fs_destroy_segment_manager_caches();
+ f2fs_destroy_node_manager_caches();
destroy_inodecache();
f2fs_destroy_trace_ios();
}
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2c53de9251be..30fd016afeb3 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
@@ -147,13 +148,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
int len = 0, i;
len += snprintf(buf + len, PAGE_SIZE - len,
- "cold file extenstion:\n");
+ "cold file extension:\n");
for (i = 0; i < cold_count; i++)
len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
extlist[i]);
len += snprintf(buf + len, PAGE_SIZE - len,
- "hot file extenstion:\n");
+ "hot file extension:\n");
for (i = cold_count; i < cold_count + hot_count; i++)
len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
extlist[i]);
@@ -165,7 +166,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
}
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+static ssize_t __sbi_store(struct f2fs_attr *a,
struct f2fs_sb_info *sbi,
const char *buf, size_t count)
{
@@ -201,13 +202,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
down_write(&sbi->sb_lock);
- ret = update_extension_list(sbi, name, hot, set);
+ ret = f2fs_update_extension_list(sbi, name, hot, set);
if (ret)
goto out;
ret = f2fs_commit_super(sbi, false);
if (ret)
- update_extension_list(sbi, name, hot, !set);
+ f2fs_update_extension_list(sbi, name, hot, !set);
out:
up_write(&sbi->sb_lock);
return ret ? ret : count;
@@ -248,17 +249,54 @@ out:
if (!strcmp(a->attr.name, "trim_sections"))
return -EINVAL;
+ if (!strcmp(a->attr.name, "gc_urgent")) {
+ if (t >= 1) {
+ sbi->gc_mode = GC_URGENT;
+ if (sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
+ wake_up_interruptible_all(
+ &sbi->gc_thread->gc_wait_queue_head);
+ wake_up_discard_thread(sbi, true);
+ }
+ } else {
+ sbi->gc_mode = GC_NORMAL;
+ }
+ return count;
+ }
+ if (!strcmp(a->attr.name, "gc_idle")) {
+ if (t == GC_IDLE_CB)
+ sbi->gc_mode = GC_IDLE_CB;
+ else if (t == GC_IDLE_GREEDY)
+ sbi->gc_mode = GC_IDLE_GREEDY;
+ else
+ sbi->gc_mode = GC_NORMAL;
+ return count;
+ }
+
*ui = t;
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
f2fs_reset_iostat(sbi);
- if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
- sbi->gc_thread->gc_wake = 1;
- wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
- wake_up_discard_thread(sbi, true);
+ return count;
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ ssize_t ret;
+ bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
+ a->struct_type == GC_THREAD);
+
+ if (gc_entry) {
+ if (!down_read_trylock(&sbi->sb->s_umount))
+ return -EAGAIN;
}
+ ret = __sbi_store(a, sbi, buf, count);
+ if (gc_entry)
+ up_read(&sbi->sb->s_umount);
- return count;
+ return ret;
}
static ssize_t f2fs_attr_show(struct kobject *kobj,
@@ -349,8 +387,8 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
@@ -359,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -411,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
@@ -482,7 +522,8 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -509,7 +550,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -533,7 +575,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int iostat_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -575,6 +618,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
+static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ int i;
+
+ seq_puts(seq, "format: victim_secmap bitmaps\n");
+
+ for (i = 0; i < MAIN_SECS(sbi); i++) {
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
+ if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+ return 0;
+}
+
#define F2FS_PROC_FILE_DEF(_name) \
static int _name##_open_fs(struct inode *inode, struct file *file) \
{ \
@@ -591,6 +656,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \
F2FS_PROC_FILE_DEF(segment_info);
F2FS_PROC_FILE_DEF(segment_bits);
F2FS_PROC_FILE_DEF(iostat_info);
+F2FS_PROC_FILE_DEF(victim_bits);
int __init f2fs_init_sysfs(void)
{
@@ -641,6 +707,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
&f2fs_seq_segment_bits_fops, sb);
proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
&f2fs_seq_iostat_info_fops, sb);
+ proc_create_data("victim_bits", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_victim_bits_fops, sb);
}
return 0;
}
@@ -651,6 +719,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 116be979b897..152078bb4829 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -38,9 +38,6 @@ static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -69,9 +66,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -142,6 +136,8 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct inode *inode = d_inode(dentry);
+ unsigned char old_advise = F2FS_I(inode)->i_advise;
+ unsigned char new_advise;
if (strcmp(name, "") != 0)
return -EINVAL;
@@ -150,7 +146,14 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
if (value == NULL)
return -EINVAL;
- F2FS_I(inode)->i_advise |= *(char *)value;
+ new_advise = *(char *)value;
+ if (new_advise & ~FADVISE_MODIFIABLE_BITS)
+ return -EINVAL;
+
+ new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
+ new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
+
+ F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
@@ -299,7 +302,7 @@ static int read_inline_xattr(struct inode *inode, struct page *ipage,
if (ipage) {
inline_addr = inline_xattr_addr(inode, ipage);
} else {
- page = get_node_page(sbi, inode->i_ino);
+ page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(page))
return PTR_ERR(page);
@@ -320,7 +323,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr)
void *xattr_addr;
/* The inode already has an extended attribute block. */
- xpage = get_node_page(sbi, xnid);
+ xpage = f2fs_get_node_page(sbi, xnid);
if (IS_ERR(xpage))
return PTR_ERR(xpage);
@@ -444,7 +447,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
int err = 0;
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
- if (!alloc_nid(sbi, &new_nid))
+ if (!f2fs_alloc_nid(sbi, &new_nid))
return -ENOSPC;
/* write to inline xattr */
@@ -452,9 +455,9 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
if (ipage) {
inline_addr = inline_xattr_addr(inode, ipage);
} else {
- in_page = get_node_page(sbi, inode->i_ino);
+ in_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(in_page)) {
- alloc_nid_failed(sbi, new_nid);
+ f2fs_alloc_nid_failed(sbi, new_nid);
return PTR_ERR(in_page);
}
inline_addr = inline_xattr_addr(inode, in_page);
@@ -464,8 +467,8 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
NODE, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
- err = truncate_xattr_node(inode);
- alloc_nid_failed(sbi, new_nid);
+ err = f2fs_truncate_xattr_node(inode);
+ f2fs_alloc_nid_failed(sbi, new_nid);
if (err) {
f2fs_put_page(in_page, 1);
return err;
@@ -478,10 +481,10 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
/* write to xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
- xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ xpage = f2fs_get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
- alloc_nid_failed(sbi, new_nid);
+ f2fs_alloc_nid_failed(sbi, new_nid);
goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
@@ -489,13 +492,13 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
- xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
+ xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
- alloc_nid_failed(sbi, new_nid);
+ f2fs_alloc_nid_failed(sbi, new_nid);
goto in_page_out;
}
- alloc_nid_done(sbi, new_nid);
+ f2fs_alloc_nid_done(sbi, new_nid);
}
xattr_addr = page_address(xpage);
@@ -733,7 +736,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (err)
return err;
- /* this case is only from init_inode_metadata */
+ /* this case is only from f2fs_init_inode_metadata */
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 93fc62232ec2..9ae2c4d7e921 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -224,7 +224,8 @@ static inline void cache_init(struct fat_cache_id *cid, int fclus, int dclus)
int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
{
struct super_block *sb = inode->i_sb;
- const int limit = sb->s_maxbytes >> MSDOS_SB(sb)->cluster_bits;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const int limit = sb->s_maxbytes >> sbi->cluster_bits;
struct fat_entry fatent;
struct fat_cache_id cid;
int nr;
@@ -233,6 +234,12 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
*fclus = 0;
*dclus = MSDOS_I(inode)->i_start;
+ if (!fat_valid_entry(sbi, *dclus)) {
+ fat_fs_error_ratelimit(sb,
+ "%s: invalid start cluster (i_pos %lld, start %08x)",
+ __func__, MSDOS_I(inode)->i_pos, *dclus);
+ return -EIO;
+ }
if (cluster == 0)
return 0;
@@ -249,9 +256,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
/* prevent the infinite loop of cluster chain */
if (*fclus > limit) {
fat_fs_error_ratelimit(sb,
- "%s: detected the cluster chain loop"
- " (i_pos %lld)", __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: detected the cluster chain loop (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
}
@@ -261,9 +267,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
goto out;
else if (nr == FAT_ENT_FREE) {
fat_fs_error_ratelimit(sb,
- "%s: invalid cluster chain (i_pos %lld)",
- __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: invalid cluster chain (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
} else if (nr == FAT_ENT_EOF) {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index be5e15323bab..1849b1adb6b9 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -344,6 +344,11 @@ static inline void fatent_brelse(struct fat_entry *fatent)
fatent->fat_inode = NULL;
}
+static inline bool fat_valid_entry(struct msdos_sb_info *sbi, int entry)
+{
+ return FAT_START_ENT <= entry && entry < sbi->max_cluster;
+}
+
extern void fat_ent_access_init(struct super_block *sb);
extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
int entry);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 8226557130a2..a70e37c47a78 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -23,7 +23,7 @@ static void fat12_ent_blocknr(struct super_block *sb, int entry,
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = entry + (entry >> 1);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
@@ -33,7 +33,7 @@ static void fat_ent_blocknr(struct super_block *sb, int entry,
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = (entry << sbi->fatent_shift);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
@@ -353,7 +353,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
int err, offset;
sector_t blocknr;
- if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {
+ if (!fat_valid_entry(sbi, entry)) {
fatent_brelse(fatent);
fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
return -EIO;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1caee0534587..582ef53f2104 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -249,22 +249,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
for(i = 0; i < nr_pages; i++) {
struct page *page = pvec->pages[i];
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end) {
- /*
- * can't be range_cyclic (1st pass) because
- * end == -1 in that case.
- */
- ret = 1;
- break;
- }
-
*done_index = page->index;
lock_page(page);
@@ -382,8 +366,8 @@ retry:
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
break;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 61296ecbd0e2..09476bb8f6cd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1476,7 +1476,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
lblock = offset >> shift;
lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
- if (lblock_stop > end_of_file)
+ if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
return 1;
size = (lblock_stop - lblock) << shift;
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 6fc766df0461..2a6f3c67cb3f 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -74,9 +74,10 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len)
if (!fd->bnode) {
if (!tree->root)
hfs_btree_inc_height(tree);
- fd->bnode = hfs_bnode_find(tree, tree->leaf_head);
- if (IS_ERR(fd->bnode))
- return PTR_ERR(fd->bnode);
+ node = hfs_bnode_find(tree, tree->leaf_head);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+ fd->bnode = node;
fd->record = -1;
}
new_node = NULL;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d0f39dcbb58e..2b6e2ad57bf9 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -77,13 +77,13 @@ again:
cpu_to_be32(HFSP_HARDLINK_TYPE) &&
entry.file.user_info.fdCreator ==
cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
+ HFSPLUS_SB(sb)->hidden_dir &&
(entry.file.create_date ==
HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
create_date ||
entry.file.create_date ==
HFSPLUS_I(d_inode(sb->s_root))->
- create_date) &&
- HFSPLUS_SB(sb)->hidden_dir) {
+ create_date)) {
struct qstr str;
char name[32];
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index fa40e756c501..422e00dc5f3b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -521,8 +521,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
- if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
+ if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) {
+ err = -EINVAL;
goto out_put_root;
+ }
inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a861bbdfe577..fa8b484d035d 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -162,7 +162,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
chunk = div_u64(offset, dev->chunk_size);
div_u64_rem(chunk, dev->nr_children, &chunk_idx);
- if (chunk_idx > dev->nr_children) {
+ if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
__func__, chunk_idx, offset, dev->chunk_size);
/* error, should not happen */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e2e857affbf2..0647cb1ede56 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -911,16 +911,21 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
- if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
+ if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) {
+ if (cps.clp)
+ nfs_put_client(cps.clp);
goto out_invalidcred;
+ }
}
cps.minorversion = hdr_arg.minorversion;
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
- if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
+ if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) {
+ if (cps.clp)
+ nfs_put_client(cps.clp);
return rpc_system_err;
-
+ }
while (status == 0 && nops != hdr_arg.nops) {
status = process_op(nops, rqstp, &xdr_in,
argp, &xdr_out, resp, &cps);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 3a3821b00486..9deca59be7e5 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2147,8 +2147,8 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
pagevec_init(&pvec, 0);
- while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
+ while (pagevec_lookup_tag(&pvec, btcache, &index,
+ PAGECACHE_TAG_DIRTY)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 45d650addd56..447999563737 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -262,8 +262,7 @@ int nilfs_copy_dirty_pages(struct address_space *dmap,
pagevec_init(&pvec, 0);
repeat:
- if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE))
+ if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY))
return 0;
for (i = 0; i < pagevec_count(&pvec); i++) {
@@ -382,8 +381,8 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
pagevec_init(&pvec, 0);
- while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
+ while (pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 34c22fe4eca0..092c0496aef4 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -705,18 +705,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
pagevec_init(&pvec, 0);
repeat:
if (unlikely(index > last) ||
- !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- min_t(pgoff_t, last - index,
- PAGEVEC_SIZE - 1) + 1))
+ !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
+ PAGECACHE_TAG_DIRTY))
return ndirties;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct buffer_head *bh, *head;
struct page *page = pvec.pages[i];
- if (unlikely(page->index > last))
- break;
-
lock_page(page);
if (!page_has_buffers(page))
create_empty_buffers(page, i_blocksize(inode), 0);
@@ -753,8 +749,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
pagevec_init(&pvec, 0);
- while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
+ while (pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index fe50ded1b4ce..272269f1c310 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -336,6 +336,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
* for this bh as it's not marked locally
* uptodate. */
status = -EIO;
+ clear_buffer_needs_validate(bh);
put_bh(bh);
bhs[i] = NULL;
continue;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 63a0d0ba36de..64c5386d0c1b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -317,7 +317,6 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct dentry *upperdir;
struct dentry *upperdentry;
const struct cred *old_cred;
- struct cred *override_cred;
char *link = NULL;
if (WARN_ON(!workdir))
@@ -336,28 +335,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return PTR_ERR(link);
}
- err = -ENOMEM;
- override_cred = prepare_creds();
- if (!override_cred)
- goto out_free_link;
-
- override_cred->fsuid = stat->uid;
- override_cred->fsgid = stat->gid;
- /*
- * CAP_SYS_ADMIN for copying up extended attributes
- * CAP_DAC_OVERRIDE for create
- * CAP_FOWNER for chmod, timestamp update
- * CAP_FSETID for chmod
- * CAP_CHOWN for chown
- * CAP_MKNOD for mknod
- */
- cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
- cap_raise(override_cred->cap_effective, CAP_FOWNER);
- cap_raise(override_cred->cap_effective, CAP_FSETID);
- cap_raise(override_cred->cap_effective, CAP_CHOWN);
- cap_raise(override_cred->cap_effective, CAP_MKNOD);
- old_cred = override_creds(override_cred);
+ old_cred = ovl_override_creds(dentry->d_sb);
err = -EIO;
if (lock_rename(workdir, upperdir) != NULL) {
@@ -380,9 +358,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
out_unlock:
unlock_rename(workdir, upperdir);
revert_creds(old_cred);
- put_cred(override_cred);
-out_free_link:
if (link)
free_page((unsigned long) link);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 327177df03a5..f8aa54272121 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -408,28 +408,13 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
} else {
const struct cred *old_cred;
- struct cred *override_cred;
- err = -ENOMEM;
- override_cred = prepare_creds();
- if (!override_cred)
- goto out_iput;
-
- /*
- * CAP_SYS_ADMIN for setting opaque xattr
- * CAP_DAC_OVERRIDE for create in workdir, rename
- * CAP_FOWNER for removing whiteout from sticky dir
- */
- cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
- cap_raise(override_cred->cap_effective, CAP_FOWNER);
- old_cred = override_creds(override_cred);
+ old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
hardlink);
revert_creds(old_cred);
- put_cred(override_cred);
}
if (!err)
@@ -659,32 +644,11 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (OVL_TYPE_PURE_UPPER(type)) {
err = ovl_remove_upper(dentry, is_dir);
} else {
- const struct cred *old_cred;
- struct cred *override_cred;
-
- err = -ENOMEM;
- override_cred = prepare_creds();
- if (!override_cred)
- goto out_drop_write;
-
- /*
- * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
- * CAP_DAC_OVERRIDE for create in workdir, rename
- * CAP_FOWNER for removing whiteout from sticky dir
- * CAP_FSETID for chmod of opaque dir
- * CAP_CHOWN for chown of opaque dir
- */
- cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
- cap_raise(override_cred->cap_effective, CAP_FOWNER);
- cap_raise(override_cred->cap_effective, CAP_FSETID);
- cap_raise(override_cred->cap_effective, CAP_CHOWN);
- old_cred = override_creds(override_cred);
+ const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_remove_and_whiteout(dentry, is_dir);
revert_creds(old_cred);
- put_cred(override_cred);
}
out_drop_write:
ovl_drop_write(dentry);
@@ -723,7 +687,6 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
bool new_is_dir = false;
struct dentry *opaquedir = NULL;
const struct cred *old_cred = NULL;
- struct cred *override_cred = NULL;
err = -EINVAL;
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
@@ -792,26 +755,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
- if (old_opaque || new_opaque) {
- err = -ENOMEM;
- override_cred = prepare_creds();
- if (!override_cred)
- goto out_drop_write;
-
- /*
- * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
- * CAP_DAC_OVERRIDE for create in workdir
- * CAP_FOWNER for removing whiteout from sticky dir
- * CAP_FSETID for chmod of opaque dir
- * CAP_CHOWN for chown of opaque dir
- */
- cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
- cap_raise(override_cred->cap_effective, CAP_FOWNER);
- cap_raise(override_cred->cap_effective, CAP_FSETID);
- cap_raise(override_cred->cap_effective, CAP_CHOWN);
- old_cred = override_creds(override_cred);
- }
+ if (old_opaque || new_opaque)
+ old_cred = ovl_override_creds(old->d_sb);
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
opaquedir = ovl_check_empty_and_clear(new);
@@ -942,10 +887,8 @@ out_dput_old:
out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
- if (old_opaque || new_opaque) {
+ if (old_opaque || new_opaque)
revert_creds(old_cred);
- put_cred(override_cred);
- }
out_drop_write:
ovl_drop_write(old);
out:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 28316b292b8a..27a42975d7cd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -150,6 +150,7 @@ void ovl_drop_write(struct dentry *dentry);
bool ovl_dentry_is_opaque(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
bool ovl_is_whiteout(struct dentry *dentry);
+const struct cred *ovl_override_creds(struct super_block *sb);
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
@@ -164,6 +165,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ovl_cache_free(struct list_head *list);
int ovl_check_d_type_supported(struct path *realpath);
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+ struct dentry *dentry, int level);
/* inode.c */
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 0c59955c4653..da999e73c97a 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -36,7 +36,8 @@ struct ovl_dir_cache {
struct ovl_readdir_data {
struct dir_context ctx;
- bool is_merge;
+ struct dentry *dentry;
+ bool is_lowest;
struct rb_root root;
struct list_head *list;
struct list_head middle;
@@ -140,9 +141,9 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
return 0;
}
-static int ovl_fill_lower(struct ovl_readdir_data *rdd,
- const char *name, int namelen,
- loff_t offset, u64 ino, unsigned int d_type)
+static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
+ const char *name, int namelen,
+ loff_t offset, u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
@@ -194,10 +195,10 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name,
container_of(ctx, struct ovl_readdir_data, ctx);
rdd->count++;
- if (!rdd->is_merge)
+ if (!rdd->is_lowest)
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
else
- return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
+ return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
}
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
@@ -206,17 +207,8 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
struct ovl_cache_entry *p;
struct dentry *dentry;
const struct cred *old_cred;
- struct cred *override_cred;
-
- override_cred = prepare_creds();
- if (!override_cred)
- return -ENOMEM;
- /*
- * CAP_DAC_OVERRIDE for lookup
- */
- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
- old_cred = override_creds(override_cred);
+ old_cred = ovl_override_creds(rdd->dentry->d_sb);
err = mutex_lock_killable(&dir->d_inode->i_mutex);
if (!err) {
@@ -232,7 +224,6 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
mutex_unlock(&dir->d_inode->i_mutex);
}
revert_creds(old_cred);
- put_cred(override_cred);
return err;
}
@@ -257,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath,
err = rdd->err;
} while (!err && rdd->count);
- if (!err && rdd->first_maybe_whiteout)
+ if (!err && rdd->first_maybe_whiteout && rdd->dentry)
err = ovl_check_whiteouts(realpath->dentry, rdd);
fput(realfile);
@@ -288,9 +279,10 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
struct path realpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
+ .dentry = dentry,
.list = list,
.root = RB_ROOT,
- .is_merge = false,
+ .is_lowest = false,
};
int idx, next;
@@ -307,7 +299,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
* allows offsets to be reasonably constant
*/
list_add(&rdd.middle, rdd.list);
- rdd.is_merge = true;
+ rdd.is_lowest = true;
err = ovl_dir_read(&realpath, &rdd);
list_del(&rdd.middle);
}
@@ -618,3 +610,64 @@ int ovl_check_d_type_supported(struct path *realpath)
return rdd.d_type_supported;
}
+
+static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+{
+ int err;
+ struct inode *dir = path->dentry->d_inode;
+ LIST_HEAD(list);
+ struct ovl_cache_entry *p;
+ struct ovl_readdir_data rdd = {
+ .ctx.actor = ovl_fill_merge,
+ .dentry = NULL,
+ .list = &list,
+ .root = RB_ROOT,
+ .is_lowest = false,
+ };
+
+ err = ovl_dir_read(path, &rdd);
+ if (err)
+ goto out;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ list_for_each_entry(p, &list, l_node) {
+ struct dentry *dentry;
+
+ if (p->name[0] == '.') {
+ if (p->len == 1)
+ continue;
+ if (p->len == 2 && p->name[1] == '.')
+ continue;
+ }
+ dentry = lookup_one_len(p->name, path->dentry, p->len);
+ if (IS_ERR(dentry))
+ continue;
+ if (dentry->d_inode)
+ ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+ dput(dentry);
+ }
+ inode_unlock(dir);
+out:
+ ovl_cache_free(&list);
+}
+
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+ struct dentry *dentry, int level)
+{
+ int err;
+
+ if (!d_is_dir(dentry) || level > 1) {
+ ovl_cleanup(dir, dentry);
+ return;
+ }
+
+ err = ovl_do_rmdir(dir, dentry);
+ if (err) {
+ struct path path = { .mnt = mnt, .dentry = dentry };
+
+ inode_unlock(dir);
+ ovl_workdir_cleanup_recurse(&path, level + 1);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ ovl_cleanup(dir, dentry);
+ }
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 0035cb80ecd1..fa20c95bd456 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -42,6 +42,8 @@ struct ovl_fs {
long lower_namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config;
+ /* creds of process who forced instantiation of super block */
+ const struct cred *creator_cred;
};
struct ovl_dir_cache;
@@ -246,6 +248,13 @@ bool ovl_is_whiteout(struct dentry *dentry)
return inode && IS_WHITEOUT(inode);
}
+const struct cred *ovl_override_creds(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return override_creds(ofs->creator_cred);
+}
+
static bool ovl_is_opaquedir(struct dentry *dentry)
{
int res;
@@ -587,6 +596,7 @@ static void ovl_put_super(struct super_block *sb)
kfree(ufs->config.lowerdir);
kfree(ufs->config.upperdir);
kfree(ufs->config.workdir);
+ put_cred(ufs->creator_cred);
kfree(ufs);
}
@@ -774,7 +784,7 @@ retry:
goto out_dput;
retried = true;
- ovl_cleanup(dir, work);
+ ovl_workdir_cleanup(dir, mnt, work, 0);
dput(work);
goto retry;
}
@@ -1107,10 +1117,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
else
sb->s_d_op = &ovl_dentry_operations;
+ ufs->creator_cred = prepare_creds();
+ if (!ufs->creator_cred)
+ goto out_put_lower_mnt;
+
err = -ENOMEM;
oe = ovl_alloc_entry(numlower);
if (!oe)
- goto out_put_lower_mnt;
+ goto out_put_cred;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe));
if (!root_dentry)
@@ -1143,6 +1157,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
out_free_oe:
kfree(oe);
+out_put_cred:
+ put_cred(ufs->creator_cred);
out_put_lower_mnt:
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index e11672aa4575..ecdb3baa1283 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -421,7 +421,12 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
vaddr = vmap(pages, page_count, VM_MAP, prot);
kfree(pages);
- return vaddr;
+ /*
+ * Since vmap() uses page granularity, we must add the offset
+ * into the page here, to get the byte granularity address
+ * into the mapping to represent the actual "start" location.
+ */
+ return vaddr + offset_in_page(start);
}
static void *persistent_ram_iomap(phys_addr_t start, size_t size,
@@ -440,6 +445,11 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size,
else
va = ioremap_wc(start, size);
+ /*
+ * Since request_mem_region() and ioremap() are byte-granularity
+ * there is no need handle anything special like we do when the
+ * vmap() case in persistent_ram_vmap() above.
+ */
return va;
}
@@ -460,7 +470,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
return -ENOMEM;
}
- prz->buffer = prz->vaddr + offset_in_page(start);
+ prz->buffer = prz->vaddr;
prz->buffer_size = size - sizeof(struct persistent_ram_buffer);
return 0;
@@ -507,7 +517,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
if (prz->vaddr) {
if (pfn_valid(prz->paddr >> PAGE_SHIFT)) {
- vunmap(prz->vaddr);
+ /* We must vunmap() at page-granularity. */
+ vunmap(prz->vaddr - offset_in_page(prz->paddr));
} else {
iounmap(prz->vaddr);
release_mem_region(prz->paddr, prz->size);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 3746367098fd..bb0d643481c8 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -17,6 +17,7 @@
#include <linux/quotaops.h>
#include <linux/types.h>
#include <linux/writeback.h>
+#include <linux/nospec.h>
static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
qid_t id)
@@ -644,6 +645,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
return -EINVAL;
+ type = array_index_nospec(type, MAXQUOTAS);
/*
* Quota not supported on this fs? Check this before s_quota_types
* since they needn't be set if quota is not supported at all.
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 6ca00471afbf..d920a646b578 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -270,7 +270,7 @@ struct reiserfs_journal_list {
struct mutex j_commit_mutex;
unsigned int j_trans_id;
- time_t j_timestamp;
+ time64_t j_timestamp; /* write-only but useful for crash dump analysis */
struct reiserfs_list_bitmap *j_list_bitmap;
struct buffer_head *j_commit_bh; /* commit buffer head */
struct reiserfs_journal_cnode *j_realblock;
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
index 1461254f301d..271c4c4cb760 100644
--- a/fs/sdcardfs/file.c
+++ b/fs/sdcardfs/file.c
@@ -118,7 +118,11 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
goto out;
/* save current_cred and override it */
- OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(file_inode(file)));
+ saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data);
+ if (!saved_cred) {
+ err = -ENOMEM;
+ goto out;
+ }
if (lower_file->f_op->unlocked_ioctl)
err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
@@ -127,7 +131,7 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
if (!err)
sdcardfs_copy_and_fix_attrs(file_inode(file),
file_inode(lower_file));
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out:
return err;
}
@@ -149,12 +153,16 @@ static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd,
goto out;
/* save current_cred and override it */
- OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(file_inode(file)));
+ saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data);
+ if (!saved_cred) {
+ err = -ENOMEM;
+ goto out;
+ }
if (lower_file->f_op->compat_ioctl)
err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out:
return err;
}
@@ -241,7 +249,11 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
}
/* save current_cred and override it */
- OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(inode));
+ saved_cred = override_fsids(sbi, SDCARDFS_I(inode)->data);
+ if (!saved_cred) {
+ err = -ENOMEM;
+ goto out_err;
+ }
file->private_data =
kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL);
@@ -271,7 +283,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode));
out_revert_cred:
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_err:
dput(parent);
return err;
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 75a8ab2ce5a8..6c0039284ae0 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -22,7 +22,6 @@
#include <linux/fs_struct.h>
#include <linux/ratelimit.h>
-/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
const struct cred *override_fsids(struct sdcardfs_sb_info *sbi,
struct sdcardfs_inode_data *data)
{
@@ -50,7 +49,6 @@ const struct cred *override_fsids(struct sdcardfs_sb_info *sbi,
return old_cred;
}
-/* Do not directly use this function, use REVERT_CRED() instead. */
void revert_fsids(const struct cred *old_cred)
{
const struct cred *cur_cred;
@@ -78,7 +76,10 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
}
/* save current_cred and override it */
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+ saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+ SDCARDFS_I(dir)->data);
+ if (!saved_cred)
+ return -ENOMEM;
sdcardfs_get_lower_path(dentry, &lower_path);
lower_dentry = lower_path.dentry;
@@ -95,8 +96,11 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
err = -ENOMEM;
goto out_unlock;
}
+ copied_fs->umask = 0;
+ task_lock(current);
current->fs = copied_fs;
- current->fs->umask = 0;
+ task_unlock(current);
+
err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl);
if (err)
goto out;
@@ -110,58 +114,18 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
fixup_lower_ownership(dentry, dentry->d_name.name);
out:
+ task_lock(current);
current->fs = saved_fs;
+ task_unlock(current);
free_fs_struct(copied_fs);
out_unlock:
unlock_dir(lower_parent_dentry);
sdcardfs_put_lower_path(dentry, &lower_path);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_eacces:
return err;
}
-#if 0
-static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry)
-{
- struct dentry *lower_old_dentry;
- struct dentry *lower_new_dentry;
- struct dentry *lower_dir_dentry;
- u64 file_size_save;
- int err;
- struct path lower_old_path, lower_new_path;
-
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
-
- file_size_save = i_size_read(d_inode(old_dentry));
- sdcardfs_get_lower_path(old_dentry, &lower_old_path);
- sdcardfs_get_lower_path(new_dentry, &lower_new_path);
- lower_old_dentry = lower_old_path.dentry;
- lower_new_dentry = lower_new_path.dentry;
- lower_dir_dentry = lock_parent(lower_new_dentry);
-
- err = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
- lower_new_dentry, NULL);
- if (err || !d_inode(lower_new_dentry))
- goto out;
-
- err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path);
- if (err)
- goto out;
- fsstack_copy_attr_times(dir, d_inode(lower_new_dentry));
- fsstack_copy_inode_size(dir, d_inode(lower_new_dentry));
- set_nlink(d_inode(old_dentry),
- sdcardfs_lower_inode(d_inode(old_dentry))->i_nlink);
- i_size_write(d_inode(new_dentry), file_size_save);
-out:
- unlock_dir(lower_dir_dentry);
- sdcardfs_put_lower_path(old_dentry, &lower_old_path);
- sdcardfs_put_lower_path(new_dentry, &lower_new_path);
- REVERT_CRED();
- return err;
-}
-#endif
-
static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
{
int err;
@@ -178,7 +142,10 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
}
/* save current_cred and override it */
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+ saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+ SDCARDFS_I(dir)->data);
+ if (!saved_cred)
+ return -ENOMEM;
sdcardfs_get_lower_path(dentry, &lower_path);
lower_dentry = lower_path.dentry;
@@ -209,43 +176,11 @@ out:
unlock_dir(lower_dir_dentry);
dput(lower_dentry);
sdcardfs_put_lower_path(dentry, &lower_path);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_eacces:
return err;
}
-#if 0
-static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- int err;
- struct dentry *lower_dentry;
- struct dentry *lower_parent_dentry = NULL;
- struct path lower_path;
-
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
-
- sdcardfs_get_lower_path(dentry, &lower_path);
- lower_dentry = lower_path.dentry;
- lower_parent_dentry = lock_parent(lower_dentry);
-
- err = vfs_symlink(d_inode(lower_parent_dentry), lower_dentry, symname);
- if (err)
- goto out;
- err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
- if (err)
- goto out;
- fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
- fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
-
-out:
- unlock_dir(lower_parent_dentry);
- sdcardfs_put_lower_path(dentry, &lower_path);
- REVERT_CRED();
- return err;
-}
-#endif
-
static int touch(char *abs_path, mode_t mode)
{
struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode);
@@ -287,7 +222,10 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
}
/* save current_cred and override it */
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+ saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+ SDCARDFS_I(dir)->data);
+ if (!saved_cred)
+ return -ENOMEM;
/* check disk space */
parent_dentry = dget_parent(dentry);
@@ -316,8 +254,11 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
unlock_dir(lower_parent_dentry);
goto out_unlock;
}
+ copied_fs->umask = 0;
+ task_lock(current);
current->fs = copied_fs;
- current->fs->umask = 0;
+ task_unlock(current);
+
err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode);
if (err) {
@@ -366,23 +307,34 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
if (make_nomedia_in_obb ||
((pd->perm == PERM_ANDROID)
&& (qstr_case_eq(&dentry->d_name, &q_data)))) {
- REVERT_CRED(saved_cred);
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(d_inode(dentry)));
+ revert_fsids(saved_cred);
+ saved_cred = override_fsids(sbi,
+ SDCARDFS_I(d_inode(dentry))->data);
+ if (!saved_cred) {
+ pr_err("sdcardfs: failed to set up .nomedia in %s: %d\n",
+ lower_path.dentry->d_name.name,
+ -ENOMEM);
+ goto out;
+ }
set_fs_pwd(current->fs, &lower_path);
touch_err = touch(".nomedia", 0664);
if (touch_err) {
pr_err("sdcardfs: failed to create .nomedia in %s: %d\n",
- lower_path.dentry->d_name.name, touch_err);
+ lower_path.dentry->d_name.name,
+ touch_err);
goto out;
}
}
out:
+ task_lock(current);
current->fs = saved_fs;
+ task_unlock(current);
+
free_fs_struct(copied_fs);
out_unlock:
sdcardfs_put_lower_path(dentry, &lower_path);
out_revert:
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_eacces:
return err;
}
@@ -402,7 +354,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
}
/* save current_cred and override it */
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+ saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+ SDCARDFS_I(dir)->data);
+ if (!saved_cred)
+ return -ENOMEM;
/* sdcardfs_get_real_lower(): in case of remove an user's obb dentry
* the dentry on the original path should be deleted.
@@ -427,44 +382,11 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
out:
unlock_dir(lower_dir_dentry);
sdcardfs_put_real_lower(dentry, &lower_path);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_eacces:
return err;
}
-#if 0
-static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t dev)
-{
- int err;
- struct dentry *lower_dentry;
- struct dentry *lower_parent_dentry = NULL;
- struct path lower_path;
-
- OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
-
- sdcardfs_get_lower_path(dentry, &lower_path);
- lower_dentry = lower_path.dentry;
- lower_parent_dentry = lock_parent(lower_dentry);
-
- err = vfs_mknod(d_inode(lower_parent_dentry), lower_dentry, mode, dev);
- if (err)
- goto out;
-
- err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
- if (err)
- goto out;
- fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
- fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
-
-out:
- unlock_dir(lower_parent_dentry);
- sdcardfs_put_lower_path(dentry, &lower_path);
- REVERT_CRED();
- return err;
-}
-#endif
-
/*
* The locking rules in sdcardfs_rename are complex. We could use a simpler
* superblock-level name-space lock for renames and copy-ups.
@@ -489,7 +411,10 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
/* save current_cred and override it */
- OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred, SDCARDFS_I(new_dir));
+ saved_cred = override_fsids(SDCARDFS_SB(old_dir->i_sb),
+ SDCARDFS_I(new_dir)->data);
+ if (!saved_cred)
+ return -ENOMEM;
sdcardfs_get_real_lower(old_dentry, &lower_old_path);
sdcardfs_get_lower_path(new_dentry, &lower_new_path);
@@ -536,7 +461,7 @@ out:
dput(lower_new_dir_dentry);
sdcardfs_put_real_lower(old_dentry, &lower_old_path);
sdcardfs_put_lower_path(new_dentry, &lower_new_path);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_eacces:
return err;
}
@@ -655,33 +580,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma
if (IS_POSIXACL(inode))
pr_warn("%s: This may be undefined behavior...\n", __func__);
err = generic_permission(&tmp, mask);
- /* XXX
- * Original sdcardfs code calls inode_permission(lower_inode,.. )
- * for checking inode permission. But doing such things here seems
- * duplicated work, because the functions called after this func,
- * such as vfs_create, vfs_unlink, vfs_rename, and etc,
- * does exactly same thing, i.e., they calls inode_permission().
- * So we just let they do the things.
- * If there are any security hole, just uncomment following if block.
- */
-#if 0
- if (!err) {
- /*
- * Permission check on lower_inode(=EXT4).
- * we check it with AID_MEDIA_RW permission
- */
- struct inode *lower_inode;
-
- OVERRIDE_CRED(SDCARDFS_SB(inode->sb));
-
- lower_inode = sdcardfs_lower_inode(inode);
- err = inode_permission(lower_inode, mask);
-
- REVERT_CRED();
- }
-#endif
return err;
-
}
static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia)
@@ -756,7 +655,10 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct
goto out_err;
/* save current_cred and override it */
- OVERRIDE_CRED(SDCARDFS_SB(dentry->d_sb), saved_cred, SDCARDFS_I(inode));
+ saved_cred = override_fsids(SDCARDFS_SB(dentry->d_sb),
+ SDCARDFS_I(inode)->data);
+ if (!saved_cred)
+ return -ENOMEM;
sdcardfs_get_lower_path(dentry, &lower_path);
lower_dentry = lower_path.dentry;
@@ -815,7 +717,7 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct
out:
sdcardfs_put_lower_path(dentry, &lower_path);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_err:
return err;
}
@@ -898,13 +800,6 @@ const struct inode_operations sdcardfs_dir_iops = {
.setattr = sdcardfs_setattr_wrn,
.setattr2 = sdcardfs_setattr,
.getattr = sdcardfs_getattr,
- /* XXX Following operations are implemented,
- * but FUSE(sdcard) or FAT does not support them
- * These methods are *NOT* perfectly tested.
- .symlink = sdcardfs_symlink,
- .link = sdcardfs_link,
- .mknod = sdcardfs_mknod,
- */
};
const struct inode_operations sdcardfs_main_iops = {
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index 206f8cbc7d7d..a671ae2338ea 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c
@@ -426,7 +426,12 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
}
/* save current_cred and override it */
- OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
+ saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+ SDCARDFS_I(dir)->data);
+ if (!saved_cred) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out_err;
+ }
sdcardfs_get_lower_path(parent, &lower_parent_path);
@@ -457,7 +462,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
out:
sdcardfs_put_lower_path(parent, &lower_parent_path);
- REVERT_CRED(saved_cred);
+ revert_fsids(saved_cred);
out_err:
dput(parent);
return ret;
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 30e0c431a1ea..27ec726e7a46 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -295,6 +295,13 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
atomic_inc(&lower_sb->s_active);
sdcardfs_set_lower_super(sb, lower_sb);
+ sb->s_stack_depth = lower_sb->s_stack_depth + 1;
+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("sdcardfs: maximum fs stacking depth exceeded\n");
+ err = -EINVAL;
+ goto out_sput;
+ }
+
/* inherit maxbytes from lower file system */
sb->s_maxbytes = lower_sb->s_maxbytes;
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index 055e413509e4..99227a07a8d6 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -88,31 +88,6 @@
(x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\
} while (0)
-/* OVERRIDE_CRED() and REVERT_CRED()
- * OVERRIDE_CRED()
- * backup original task->cred
- * and modifies task->cred->fsuid/fsgid to specified value.
- * REVERT_CRED()
- * restore original task->cred->fsuid/fsgid.
- * These two macro should be used in pair, and OVERRIDE_CRED() should be
- * placed at the beginning of a function, right after variable declaration.
- */
-#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \
- do { \
- saved_cred = override_fsids(sdcardfs_sbi, info->data); \
- if (!saved_cred) \
- return -ENOMEM; \
- } while (0)
-
-#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \
- do { \
- saved_cred = override_fsids(sdcardfs_sbi, info->data); \
- if (!saved_cred) \
- return ERR_PTR(-ENOMEM); \
- } while (0)
-
-#define REVERT_CRED(saved_cred) revert_fsids(saved_cred)
-
/* Android 5.0 support */
/* Permission mode for a specific node. Controls how file permissions
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 22dba8837a86..539fa934ed93 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -661,6 +661,11 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
spin_lock(&ui->ui_lock);
ui->synced_i_size = ui->ui_size;
spin_unlock(&ui->ui_lock);
+ if (xent) {
+ spin_lock(&host_ui->ui_lock);
+ host_ui->synced_i_size = host_ui->ui_size;
+ spin_unlock(&host_ui->ui_lock);
+ }
mark_inode_clean(c, ui);
mark_inode_clean(c, host_ui);
return 0;
@@ -1107,7 +1112,7 @@ static int recomp_data_node(const struct ubifs_info *c,
int err, len, compr_type, out_len;
out_len = le32_to_cpu(dn->size);
- buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
+ buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS);
if (!buf)
return -ENOMEM;
@@ -1186,7 +1191,16 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
else if (err)
goto out_free;
else {
- if (le32_to_cpu(dn->size) <= dlen)
+ int dn_len = le32_to_cpu(dn->size);
+
+ if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
+ ubifs_err(c, "bad data node (block %u, inode %lu)",
+ blk, inode->i_ino);
+ ubifs_dump_node(c, dn);
+ goto out_free;
+ }
+
+ if (dn_len <= dlen)
dlen = 0; /* Nothing to do */
else {
int compr_type = le16_to_cpu(dn->compr_type);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index a0011aa3a779..f43f162e36f4 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1091,10 +1091,6 @@ static int scan_check_cb(struct ubifs_info *c,
}
}
- buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
- if (!buf)
- return -ENOMEM;
-
/*
* After an unclean unmount, empty and freeable LEBs
* may contain garbage - do not scan them.
@@ -1113,6 +1109,10 @@ static int scan_check_cb(struct ubifs_info *c,
return LPT_SCAN_CONTINUE;
}
+ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
ret = PTR_ERR(sleb);
diff --git a/fs/xattr.c b/fs/xattr.c
index a40f49cc04c3..7444fb1b3484 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -453,7 +453,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_to_user(kvalue, size);
+ posix_acl_fix_xattr_to_user(kvalue, error);
if (size && copy_to_user(value, kvalue, error))
error = -EFAULT;
} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 2cf7a61ece59..ce6619c339fe 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -19,12 +19,46 @@
struct rtattr;
+struct skcipher_instance {
+ void (*free)(struct skcipher_instance *inst);
+ union {
+ struct {
+ char head[offsetof(struct skcipher_alg, base)];
+ struct crypto_instance base;
+ } s;
+ struct skcipher_alg alg;
+ };
+};
+
struct crypto_skcipher_spawn {
struct crypto_spawn base;
};
extern const struct crypto_type crypto_givcipher_type;
+static inline struct crypto_instance *skcipher_crypto_instance(
+ struct skcipher_instance *inst)
+{
+ return &inst->s.base;
+}
+
+static inline struct skcipher_instance *skcipher_alg_instance(
+ struct crypto_skcipher *skcipher)
+{
+ return container_of(crypto_skcipher_alg(skcipher),
+ struct skcipher_instance, alg);
+}
+
+static inline void *skcipher_instance_ctx(struct skcipher_instance *inst)
+{
+ return crypto_instance_ctx(skcipher_crypto_instance(inst));
+}
+
+static inline void skcipher_request_complete(struct skcipher_request *req, int err)
+{
+ req->base.complete(&req->base, err);
+}
+
static inline void crypto_set_skcipher_spawn(
struct crypto_skcipher_spawn *spawn, struct crypto_instance *inst)
{
@@ -33,6 +67,8 @@ static inline void crypto_set_skcipher_spawn(
int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name,
u32 type, u32 mask);
+int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn,
+ const char *name, u32 type, u32 mask);
struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask);
@@ -47,6 +83,12 @@ static inline struct crypto_alg *crypto_skcipher_spawn_alg(
return spawn->base.alg;
}
+static inline struct skcipher_alg *crypto_spawn_skcipher_alg(
+ struct crypto_skcipher_spawn *spawn)
+{
+ return container_of(spawn->base.alg, struct skcipher_alg, base);
+}
+
static inline struct crypto_ablkcipher *crypto_spawn_skcipher(
struct crypto_skcipher_spawn *spawn)
{
@@ -55,6 +97,25 @@ static inline struct crypto_ablkcipher *crypto_spawn_skcipher(
crypto_skcipher_mask(0)));
}
+static inline struct crypto_skcipher *crypto_spawn_skcipher2(
+ struct crypto_skcipher_spawn *spawn)
+{
+ return crypto_spawn_tfm2(&spawn->base);
+}
+
+static inline void crypto_skcipher_set_reqsize(
+ struct crypto_skcipher *skcipher, unsigned int reqsize)
+{
+ skcipher->reqsize = reqsize;
+}
+
+int crypto_register_skcipher(struct skcipher_alg *alg);
+void crypto_unregister_skcipher(struct skcipher_alg *alg);
+int crypto_register_skciphers(struct skcipher_alg *algs, int count);
+void crypto_unregister_skciphers(struct skcipher_alg *algs, int count);
+int skcipher_register_instance(struct crypto_template *tmpl,
+ struct skcipher_instance *inst);
+
int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req);
int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req);
const char *crypto_default_geniv(const struct crypto_alg *alg);
@@ -122,5 +183,31 @@ static inline u32 skcipher_request_flags(struct skcipher_request *req)
return req->base.flags;
}
+static inline unsigned int crypto_skcipher_alg_min_keysize(
+ struct skcipher_alg *alg)
+{
+ if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+ CRYPTO_ALG_TYPE_BLKCIPHER)
+ return alg->base.cra_blkcipher.min_keysize;
+
+ if (alg->base.cra_ablkcipher.encrypt)
+ return alg->base.cra_ablkcipher.min_keysize;
+
+ return alg->min_keysize;
+}
+
+static inline unsigned int crypto_skcipher_alg_max_keysize(
+ struct skcipher_alg *alg)
+{
+ if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+ CRYPTO_ALG_TYPE_BLKCIPHER)
+ return alg->base.cra_blkcipher.max_keysize;
+
+ if (alg->base.cra_ablkcipher.encrypt)
+ return alg->base.cra_ablkcipher.max_keysize;
+
+ return alg->max_keysize;
+}
+
#endif /* _CRYPTO_INTERNAL_SKCIPHER_H */
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index fd8742a40ff3..5c90d3edf975 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -60,12 +60,80 @@ struct crypto_skcipher {
unsigned int ivsize;
unsigned int reqsize;
-
- bool has_setkey;
+ unsigned int keysize;
struct crypto_tfm base;
};
+/**
+ * struct skcipher_alg - symmetric key cipher definition
+ * @min_keysize: Minimum key size supported by the transformation. This is the
+ * smallest key length supported by this transformation algorithm.
+ * This must be set to one of the pre-defined values as this is
+ * not hardware specific. Possible values for this field can be
+ * found via git grep "_MIN_KEY_SIZE" include/crypto/
+ * @max_keysize: Maximum key size supported by the transformation. This is the
+ * largest key length supported by this transformation algorithm.
+ * This must be set to one of the pre-defined values as this is
+ * not hardware specific. Possible values for this field can be
+ * found via git grep "_MAX_KEY_SIZE" include/crypto/
+ * @setkey: Set key for the transformation. This function is used to either
+ * program a supplied key into the hardware or store the key in the
+ * transformation context for programming it later. Note that this
+ * function does modify the transformation context. This function can
+ * be called multiple times during the existence of the transformation
+ * object, so one must make sure the key is properly reprogrammed into
+ * the hardware. This function is also responsible for checking the key
+ * length for validity. In case a software fallback was put in place in
+ * the @cra_init call, this function might need to use the fallback if
+ * the algorithm doesn't support all of the key sizes.
+ * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt
+ * the supplied scatterlist containing the blocks of data. The crypto
+ * API consumer is responsible for aligning the entries of the
+ * scatterlist properly and making sure the chunks are correctly
+ * sized. In case a software fallback was put in place in the
+ * @cra_init call, this function might need to use the fallback if
+ * the algorithm doesn't support all of the key sizes. In case the
+ * key was stored in transformation context, the key might need to be
+ * re-programmed into the hardware in this function. This function
+ * shall not modify the transformation context, as this function may
+ * be called in parallel with the same transformation object.
+ * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt
+ * and the conditions are exactly the same.
+ * @init: Initialize the cryptographic transformation object. This function
+ * is used to initialize the cryptographic transformation object.
+ * This function is called only once at the instantiation time, right
+ * after the transformation context was allocated. In case the
+ * cryptographic hardware has some special requirements which need to
+ * be handled by software, this function shall check for the precise
+ * requirement of the transformation and put any software fallbacks
+ * in place.
+ * @exit: Deinitialize the cryptographic transformation object. This is a
+ * counterpart to @init, used to remove various changes set in
+ * @init.
+ * @ivsize: IV size applicable for transformation. The consumer must provide an
+ * IV of exactly that size to perform the encrypt or decrypt operation.
+ * @chunksize: Equal to the block size except for stream ciphers such as
+ * CTR where it is set to the underlying block size.
+ *
+ * All fields except @ivsize are mandatory and must be filled.
+ */
+struct skcipher_alg {
+ int (*setkey)(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen);
+ int (*encrypt)(struct skcipher_request *req);
+ int (*decrypt)(struct skcipher_request *req);
+ int (*init)(struct crypto_skcipher *tfm);
+ void (*exit)(struct crypto_skcipher *tfm);
+
+ unsigned int min_keysize;
+ unsigned int max_keysize;
+ unsigned int ivsize;
+ unsigned int chunksize;
+
+ struct crypto_alg base;
+};
+
#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \
char __##name##_desc[sizeof(struct skcipher_request) + \
crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \
@@ -233,6 +301,43 @@ static inline int crypto_has_skcipher(const char *alg_name, u32 type,
}
/**
+ * crypto_has_skcipher2() - Search for the availability of an skcipher.
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ * skcipher
+ * @type: specifies the type of the skcipher
+ * @mask: specifies the mask for the skcipher
+ *
+ * Return: true when the skcipher is known to the kernel crypto API; false
+ * otherwise
+ */
+int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask);
+
+static inline const char *crypto_skcipher_driver_name(
+ struct crypto_skcipher *tfm)
+{
+ return crypto_tfm_alg_name(crypto_skcipher_tfm(tfm));
+}
+
+static inline struct skcipher_alg *crypto_skcipher_alg(
+ struct crypto_skcipher *tfm)
+{
+ return container_of(crypto_skcipher_tfm(tfm)->__crt_alg,
+ struct skcipher_alg, base);
+}
+
+static inline unsigned int crypto_skcipher_alg_ivsize(struct skcipher_alg *alg)
+{
+ if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+ CRYPTO_ALG_TYPE_BLKCIPHER)
+ return alg->base.cra_blkcipher.ivsize;
+
+ if (alg->base.cra_ablkcipher.encrypt)
+ return alg->base.cra_ablkcipher.ivsize;
+
+ return alg->ivsize;
+}
+
+/**
* crypto_skcipher_ivsize() - obtain IV size
* @tfm: cipher handle
*
@@ -246,6 +351,36 @@ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm)
return tfm->ivsize;
}
+static inline unsigned int crypto_skcipher_alg_chunksize(
+ struct skcipher_alg *alg)
+{
+ if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+ CRYPTO_ALG_TYPE_BLKCIPHER)
+ return alg->base.cra_blocksize;
+
+ if (alg->base.cra_ablkcipher.encrypt)
+ return alg->base.cra_blocksize;
+
+ return alg->chunksize;
+}
+
+/**
+ * crypto_skcipher_chunksize() - obtain chunk size
+ * @tfm: cipher handle
+ *
+ * The block size is set to one for ciphers such as CTR. However,
+ * you still need to provide incremental updates in multiples of
+ * the underlying block size as the IV does not have sub-block
+ * granularity. This is known in this API as the chunk size.
+ *
+ * Return: chunk size in bytes
+ */
+static inline unsigned int crypto_skcipher_chunksize(
+ struct crypto_skcipher *tfm)
+{
+ return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm));
+}
+
/**
* crypto_skcipher_blocksize() - obtain block size of cipher
* @tfm: cipher handle
@@ -309,7 +444,13 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm)
{
- return tfm->has_setkey;
+ return tfm->keysize;
+}
+
+static inline unsigned int crypto_skcipher_default_keysize(
+ struct crypto_skcipher *tfm)
+{
+ return tfm->keysize;
}
/**
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 7f4818673c41..833746d361cf 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -109,4 +109,23 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
}
#endif /* CONFIG_GENERIC_BUG */
+
+/*
+ * Since detected data corruption should stop operation on the affected
+ * structures. Return value must be checked and sanely acted on by caller.
+ */
+static inline __must_check bool check_data_corruption(bool v) { return v; }
+#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \
+ check_data_corruption(({ \
+ bool corruption = unlikely(condition); \
+ if (corruption) { \
+ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+ pr_err(fmt, ##__VA_ARGS__); \
+ BUG(); \
+ } else \
+ WARN(1, fmt, ##__VA_ARGS__); \
+ } \
+ corruption; \
+ }))
+
#endif /* _LINUX_BUG_H */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index b7c1e1a7ebac..d7c8b37b2e95 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -48,6 +48,7 @@
#define CRYPTO_ALG_TYPE_AEAD 0x00000003
#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004
#define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005
+#define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005
#define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006
#define CRYPTO_ALG_TYPE_DIGEST 0x00000008
#define CRYPTO_ALG_TYPE_HASH 0x00000008
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 2ebfa01b7091..8e6a18582566 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -304,11 +304,6 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
-#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
-#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \
- ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \
- BITS_PER_LONG * BITS_PER_LONG)
-
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
index 44bd4fbd3ec5..e2729c6d9829 100644
--- a/include/linux/fscrypt_notsupp.h
+++ b/include/linux/fscrypt_notsupp.h
@@ -67,16 +67,6 @@ static inline void fscrypt_restore_control_page(struct page *page)
return;
}
-static inline void fscrypt_set_d_op(struct dentry *dentry)
-{
- return;
-}
-
-static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
-{
- return;
-}
-
/* policy.c */
static inline int fscrypt_ioctl_set_policy(struct file *filp,
const void __user *arg)
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index 9d1857302b73..46b62d82b6d6 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -28,7 +28,7 @@ struct fscrypt_operations {
int (*set_context)(struct inode *, const void *, size_t, void *);
bool (*dummy_context)(struct inode *);
bool (*empty_dir)(struct inode *);
- unsigned (*max_namelen)(struct inode *);
+ unsigned int max_namelen;
};
struct fscrypt_ctx {
@@ -74,20 +74,6 @@ static inline struct page *fscrypt_control_page(struct page *page)
extern void fscrypt_restore_control_page(struct page *);
-extern const struct dentry_operations fscrypt_d_ops;
-
-static inline void fscrypt_set_d_op(struct dentry *dentry)
-{
- d_set_d_op(dentry, &fscrypt_d_ops);
-}
-
-static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
- spin_unlock(&dentry->d_lock);
-}
-
/* policy.c */
extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
new file mode 100644
index 000000000000..e6e53a36104b
--- /dev/null
+++ b/include/linux/fscrypto.h
@@ -0,0 +1,411 @@
+/*
+ * General per-file encryption definition
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+
+#ifndef _LINUX_FSCRYPTO_H
+#define _LINUX_FSCRYPTO_H
+
+#include <linux/key.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <crypto/skcipher.h>
+#include <uapi/linux/fs.h>
+
+#define FS_KEY_DERIVATION_NONCE_SIZE 16
+#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
+
+#define FS_POLICY_FLAGS_PAD_4 0x00
+#define FS_POLICY_FLAGS_PAD_8 0x01
+#define FS_POLICY_FLAGS_PAD_16 0x02
+#define FS_POLICY_FLAGS_PAD_32 0x03
+#define FS_POLICY_FLAGS_PAD_MASK 0x03
+#define FS_POLICY_FLAGS_VALID 0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID 0
+#define FS_ENCRYPTION_MODE_AES_256_XTS 1
+#define FS_ENCRYPTION_MODE_AES_256_GCM 2
+#define FS_ENCRYPTION_MODE_AES_256_CBC 3
+#define FS_ENCRYPTION_MODE_AES_256_CTS 4
+
+/**
+ * Encryption context for inode
+ *
+ * Protector format:
+ * 1 byte: Protector format (1 = this version)
+ * 1 byte: File contents encryption mode
+ * 1 byte: File names encryption mode
+ * 1 byte: Flags
+ * 8 bytes: Master Key descriptor
+ * 16 bytes: Encryption Key derivation nonce
+ */
+struct fscrypt_context {
+ u8 format;
+ u8 contents_encryption_mode;
+ u8 filenames_encryption_mode;
+ u8 flags;
+ u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+} __packed;
+
+/* Encryption parameters */
+#define FS_XTS_TWEAK_SIZE 16
+#define FS_AES_128_ECB_KEY_SIZE 16
+#define FS_AES_256_GCM_KEY_SIZE 32
+#define FS_AES_256_CBC_KEY_SIZE 32
+#define FS_AES_256_CTS_KEY_SIZE 32
+#define FS_AES_256_XTS_KEY_SIZE 64
+#define FS_MAX_KEY_SIZE 64
+
+#define FS_KEY_DESC_PREFIX "fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE 8
+
+/* This is passed in from userspace into the kernel keyring */
+struct fscrypt_key {
+ u32 mode;
+ u8 raw[FS_MAX_KEY_SIZE];
+ u32 size;
+} __packed;
+
+struct fscrypt_info {
+ u8 ci_data_mode;
+ u8 ci_filename_mode;
+ u8 ci_flags;
+ struct crypto_skcipher *ci_ctfm;
+ struct key *ci_keyring_key;
+ u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
+#define FS_WRITE_PATH_FL 0x00000002
+
+struct fscrypt_ctx {
+ union {
+ struct {
+ struct page *bounce_page; /* Ciphertext page */
+ struct page *control_page; /* Original page */
+ } w;
+ struct {
+ struct bio *bio;
+ struct work_struct work;
+ } r;
+ struct list_head free_list; /* Free list */
+ };
+ u8 flags; /* Flags */
+ u8 mode; /* Encryption mode for tfm */
+};
+
+struct fscrypt_completion_result {
+ struct completion completion;
+ int res;
+};
+
+#define DECLARE_FS_COMPLETION_RESULT(ecr) \
+ struct fscrypt_completion_result ecr = { \
+ COMPLETION_INITIALIZER((ecr).completion), 0 }
+
+#define FS_FNAME_NUM_SCATTER_ENTRIES 4
+#define FS_CRYPTO_BLOCK_SIZE 16
+#define FS_FNAME_CRYPTO_DIGEST_SIZE 32
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct fscrypt_symlink_data {
+ __le16 len;
+ char encrypted_path[1];
+} __packed;
+
+/**
+ * This function is used to calculate the disk space required to
+ * store a filename of length l in encrypted symlink format.
+ */
+static inline u32 fscrypt_symlink_data_len(u32 l)
+{
+ if (l < FS_CRYPTO_BLOCK_SIZE)
+ l = FS_CRYPTO_BLOCK_SIZE;
+ return (l + sizeof(struct fscrypt_symlink_data) - 1);
+}
+
+struct fscrypt_str {
+ unsigned char *name;
+ u32 len;
+};
+
+struct fscrypt_name {
+ const struct qstr *usr_fname;
+ struct fscrypt_str disk_name;
+ u32 hash;
+ u32 minor_hash;
+ struct fscrypt_str crypto_buf;
+};
+
+#define FSTR_INIT(n, l) { .name = n, .len = l }
+#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
+#define fname_name(p) ((p)->disk_name.name)
+#define fname_len(p) ((p)->disk_name.len)
+
+/*
+ * crypto opertions for filesystems
+ */
+struct fscrypt_operations {
+ int (*get_context)(struct inode *, void *, size_t);
+ int (*key_prefix)(struct inode *, u8 **);
+ int (*prepare_context)(struct inode *);
+ int (*set_context)(struct inode *, const void *, size_t, void *);
+ int (*dummy_context)(struct inode *);
+ bool (*is_encrypted)(struct inode *);
+ bool (*empty_dir)(struct inode *);
+ unsigned (*max_namelen)(struct inode *);
+};
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+ if (inode->i_sb->s_cop->dummy_context &&
+ inode->i_sb->s_cop->dummy_context(inode))
+ return true;
+ return false;
+}
+
+static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
+{
+ return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
+}
+
+static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
+{
+ return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
+}
+
+static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
+{
+ if (str->len == 1 && str->name[0] == '.')
+ return true;
+
+ if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+ return true;
+
+ return false;
+}
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+#else
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+#endif
+}
+
+static inline int fscrypt_has_encryption_key(struct inode *inode)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ return (inode->i_crypt_info != NULL);
+#else
+ return 0;
+#endif
+}
+
+static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
+ spin_unlock(&dentry->d_lock);
+#endif
+}
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+extern const struct dentry_operations fscrypt_d_ops;
+#endif
+
+static inline void fscrypt_set_d_op(struct dentry *dentry)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ d_set_d_op(dentry, &fscrypt_d_ops);
+#endif
+}
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+/* crypto.c */
+extern struct kmem_cache *fscrypt_info_cachep;
+int fscrypt_initialize(void);
+
+extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
+extern void fscrypt_release_ctx(struct fscrypt_ctx *);
+extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t);
+extern int fscrypt_decrypt_page(struct page *);
+extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
+extern void fscrypt_pullback_bio_page(struct page **, bool);
+extern void fscrypt_restore_control_page(struct page *);
+extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
+ unsigned int);
+/* policy.c */
+extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
+extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
+extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
+extern int fscrypt_inherit_context(struct inode *, struct inode *,
+ void *, bool);
+/* keyinfo.c */
+extern int get_crypt_info(struct inode *);
+extern int fscrypt_get_encryption_info(struct inode *);
+extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
+
+/* fname.c */
+extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
+ int lookup, struct fscrypt_name *);
+extern void fscrypt_free_filename(struct fscrypt_name *);
+extern u32 fscrypt_fname_encrypted_size(struct inode *, u32);
+extern int fscrypt_fname_alloc_buffer(struct inode *, u32,
+ struct fscrypt_str *);
+extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
+extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
+ const struct fscrypt_str *, struct fscrypt_str *);
+extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
+ struct fscrypt_str *);
+#endif
+
+/* crypto.c */
+static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i,
+ gfp_t f)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
+{
+ return;
+}
+
+static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
+ struct page *p, gfp_t f)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int fscrypt_notsupp_decrypt_page(struct page *p)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c,
+ struct bio *b)
+{
+ return;
+}
+
+static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b)
+{
+ return;
+}
+
+static inline void fscrypt_notsupp_restore_control_page(struct page *p)
+{
+ return;
+}
+
+static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
+ sector_t s, unsigned int f)
+{
+ return -EOPNOTSUPP;
+}
+
+/* policy.c */
+static inline int fscrypt_notsupp_ioctl_set_policy(struct file *f,
+ const void __user *arg)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_notsupp_ioctl_get_policy(struct file *f,
+ void __user *arg)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_notsupp_has_permitted_context(struct inode *p,
+ struct inode *i)
+{
+ return 0;
+}
+
+static inline int fscrypt_notsupp_inherit_context(struct inode *p,
+ struct inode *i, void *v, bool b)
+{
+ return -EOPNOTSUPP;
+}
+
+/* keyinfo.c */
+static inline int fscrypt_notsupp_get_encryption_info(struct inode *i)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_notsupp_put_encryption_info(struct inode *i,
+ struct fscrypt_info *f)
+{
+ return;
+}
+
+ /* fname.c */
+static inline int fscrypt_notsupp_setup_filename(struct inode *dir,
+ const struct qstr *iname,
+ int lookup, struct fscrypt_name *fname)
+{
+ if (dir->i_sb->s_cop->is_encrypted(dir))
+ return -EOPNOTSUPP;
+
+ memset(fname, 0, sizeof(struct fscrypt_name));
+ fname->usr_fname = iname;
+ fname->disk_name.name = (unsigned char *)iname->name;
+ fname->disk_name.len = iname->len;
+ return 0;
+}
+
+static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname)
+{
+ return;
+}
+
+static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s)
+{
+ /* never happens */
+ WARN_ON(1);
+ return 0;
+}
+
+static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode,
+ u32 ilen, struct fscrypt_str *crypto_str)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c)
+{
+ return;
+}
+
+static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode,
+ u32 hash, u32 minor_hash,
+ const struct fscrypt_str *iname,
+ struct fscrypt_str *oname)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode,
+ const struct qstr *iname,
+ struct fscrypt_str *oname)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* _LINUX_FSCRYPTO_H */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 23e129ef6726..e353f6600b0b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -125,6 +125,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
* Extended Capability Register
*/
+#define ecap_dit(e) ((e >> 41) & 0x1)
#define ecap_pasid(e) ((e >> 40) & 0x1)
#define ecap_pss(e) ((e >> 35) & 0x1f)
#define ecap_eafs(e) ((e >> 34) & 0x1)
@@ -294,6 +295,7 @@ enum {
#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16)
#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
#define QI_DEV_IOTLB_SIZE 1
#define QI_DEV_IOTLB_MAX_INVS 32
@@ -318,6 +320,7 @@ enum {
#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16)
#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
#define QI_DEV_EIOTLB_MAX_INVS 32
#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55)
@@ -463,9 +466,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
u8 fm, u64 type);
extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type);
-extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
- u64 addr, unsigned mask);
-
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u16 qdep, u64 addr, unsigned mask);
extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
extern int dmar_ir_support(void);
diff --git a/include/linux/io.h b/include/linux/io.h
index de64c1e53612..8ab45611fc35 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -154,4 +154,26 @@ enum {
void *memremap(resource_size_t offset, size_t size, unsigned long flags);
void memunmap(void *addr);
+/*
+ * On x86 PAT systems we have memory tracking that keeps track of
+ * the allowed mappings on memory ranges. This tracking works for
+ * all the in-kernel mapping APIs (ioremap*), but where the user
+ * wishes to map a range from a physical device into user memory
+ * the tracking won't be updated. This API is to be used by
+ * drivers which remap physical device pages into userspace,
+ * and wants to make sure they are mapped WC and not UC.
+ */
+#ifndef arch_io_reserve_memtype_wc
+static inline int arch_io_reserve_memtype_wc(resource_size_t base,
+ resource_size_t size)
+{
+ return 0;
+}
+
+static inline void arch_io_free_memtype_wc(resource_size_t base,
+ resource_size_t size)
+{
+}
+#endif
+
#endif /* _LINUX_IO_H */
diff --git a/include/linux/list.h b/include/linux/list.h
index 993395a2e55c..d5750f2f1c36 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -28,27 +28,42 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
list->prev = list;
}
+#ifdef CONFIG_DEBUG_LIST
+extern bool __list_add_valid(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next);
+extern bool __list_del_entry_valid(struct list_head *entry);
+#else
+static inline bool __list_add_valid(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ return true;
+}
+static inline bool __list_del_entry_valid(struct list_head *entry)
+{
+ return true;
+}
+#endif
+
/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
-#ifndef CONFIG_DEBUG_LIST
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
+ if (!__list_add_valid(new, prev, next))
+ return;
+
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
-#else
-extern void __list_add(struct list_head *new,
- struct list_head *prev,
- struct list_head *next);
-#endif
/**
* list_add - add a new entry
@@ -96,22 +111,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
* Note: list_empty() on entry does not return true after this, the entry is
* in an undefined state.
*/
-#ifndef CONFIG_DEBUG_LIST
static inline void __list_del_entry(struct list_head *entry)
{
+ if (!__list_del_entry_valid(entry))
+ return;
+
__list_del(entry->prev, entry->next);
}
static inline void list_del(struct list_head *entry)
{
- __list_del(entry->prev, entry->next);
+ __list_del_entry(entry);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}
-#else
-extern void __list_del_entry(struct list_head *entry);
-extern void list_del(struct list_head *entry);
-#endif
/**
* list_replace - replace old entry by new one
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 907f94428ccc..e368ab06d63d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -400,7 +400,7 @@ struct kioctx_table;
struct mm_struct {
struct vm_area_struct *mmap; /* list of VMAs */
struct rb_root mm_rb;
- u32 vmacache_seqnum; /* per-thread vmacache */
+ u64 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
new file mode 100644
index 000000000000..8712ff70995f
--- /dev/null
+++ b/include/linux/overflow.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_add_overflow(__a, __b, __d); \
+})
+
+#define check_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_sub_overflow(__a, __b, __d); \
+})
+
+#define check_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_mul_overflow(__a, __b, __d); \
+})
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a + __b; \
+ *__d < __a; \
+})
+#define __unsigned_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a - __b; \
+ __a < __b; \
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a * __b; \
+ __builtin_constant_p(__b) ? \
+ __b > 0 && __a > type_max(typeof(__a)) / __b : \
+ __a > 0 && __b > type_max(typeof(__b)) / __a; \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a + (u64)__b; \
+ (((~(__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a - (u64)__b; \
+ ((((__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ typeof(a) __tmax = type_max(typeof(a)); \
+ typeof(a) __tmin = type_min(typeof(a)); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a * (u64)__b; \
+ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
+ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
+ (__b == (typeof(__b))-1 && __a == __tmin); \
+})
+
+
+#define check_add_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_add_overflow(a, b, d), \
+ __unsigned_add_overflow(a, b, d))
+
+#define check_sub_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_sub_overflow(a, b, d), \
+ __unsigned_sub_overflow(a, b, d))
+
+#define check_mul_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_mul_overflow(a, b, d), \
+ __unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+ if (check_mul_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(n, size, &bytes))
+ return SIZE_MAX;
+ if (check_add_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, n) \
+ __ab_c_size(n, \
+ sizeof(*(p)->member) + __must_be_array((p)->member),\
+ sizeof(*(p)))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 771774e13f10..443a063e6af8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -359,8 +359,16 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
- int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
+ pgoff_t end, int tag, unsigned int nr_pages,
+ struct page **pages);
+static inline unsigned find_get_pages_tag(struct address_space *mapping,
+ pgoff_t *index, int tag, unsigned int nr_pages,
+ struct page **pages)
+{
+ return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
+ nr_pages, pages);
+}
struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags);
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index b45d391b4540..cead4419f933 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -29,9 +29,17 @@ unsigned pagevec_lookup_entries(struct pagevec *pvec,
void pagevec_remove_exceptionals(struct pagevec *pvec);
unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
pgoff_t start, unsigned nr_pages);
-unsigned pagevec_lookup_tag(struct pagevec *pvec,
- struct address_space *mapping, pgoff_t *index, int tag,
- unsigned nr_pages);
+unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
+ struct address_space *mapping, pgoff_t *index, pgoff_t end,
+ int tag);
+unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
+ struct address_space *mapping, pgoff_t *index, pgoff_t end,
+ int tag, unsigned max_pages);
+static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
+ struct address_space *mapping, pgoff_t *index, int tag)
+{
+ return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag);
+}
static inline void pagevec_init(struct pagevec *pvec, int cold)
{
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 5ed540986019..0c94d17a4642 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -45,19 +45,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
-#ifndef CONFIG_DEBUG_LIST
static inline void __list_add_rcu(struct list_head *new,
struct list_head *prev, struct list_head *next)
{
+ if (!__list_add_valid(new, prev, next))
+ return;
+
new->next = next;
new->prev = prev;
rcu_assign_pointer(list_next_rcu(prev), new);
next->prev = new;
}
-#else
-void __list_add_rcu(struct list_head *new,
- struct list_head *prev, struct list_head *next);
-#endif
/**
* list_add_rcu - add a new entry to rcu-protected list
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ad2b620e041..b78769af4591 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1621,7 +1621,7 @@ struct task_struct {
struct mm_struct *mm, *active_mm;
/* per-thread vma caching */
- u32 vmacache_seqnum;
+ u64 vmacache_seqnum;
struct vm_area_struct *vmacache[VMACACHE_SIZE];
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f1a52c11de0e..a3c65d09e6d9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -150,8 +150,9 @@ enum {
SWP_FILE = (1 << 7), /* set after swap_activate success */
SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */
SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */
+ SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */
/* add others here before... */
- SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */
+ SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */
};
#define SWAP_CLUSTER_MAX 32UL
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c2b66a277e98..5d2779aa4bbe 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -205,6 +205,26 @@ extern struct trace_event_functions exit_syscall_print_funcs;
} \
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+/*
+ * Called before coming back to user-mode. Returning to user-mode with an
+ * address limit different than USER_DS can allow to overwrite kernel memory.
+ */
+static inline void addr_limit_user_check(void)
+{
+#ifdef TIF_FSCHECK
+ if (!test_thread_flag(TIF_FSCHECK))
+ return;
+#endif
+
+ if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+ "Invalid address limit on user-mode return"))
+ force_sig(SIGKILL, current);
+
+#ifdef TIF_FSCHECK
+ clear_thread_flag(TIF_FSCHECK);
+#endif
+}
+
asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
qid_t id, void __user *addr);
asmlinkage long sys_time(time_t __user *tloc);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 8ef3a61fdc74..fdac5800872d 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -88,7 +88,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_DEBUG_VM_VMACACHE
VMACACHE_FIND_CALLS,
VMACACHE_FIND_HITS,
- VMACACHE_FULL_FLUSHES,
#endif
NR_VM_EVENT_ITEMS
};
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index c3fa0fd43949..4f58ff2dacd6 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -15,7 +15,6 @@ static inline void vmacache_flush(struct task_struct *tsk)
memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
}
-extern void vmacache_flush_all(struct mm_struct *mm);
extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
unsigned long addr);
@@ -29,10 +28,6 @@ extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
static inline void vmacache_invalidate(struct mm_struct *mm)
{
mm->vmacache_seqnum++;
-
- /* deal with overflows */
- if (unlikely(mm->vmacache_seqnum == 0))
- vmacache_flush_all(mm);
}
#endif /* __LINUX_VMACACHE_H */
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 34eb16098a33..2219cce81ca4 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -41,12 +41,14 @@ struct zs_pool_stats {
struct zs_pool;
-struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
+struct zs_pool *zs_create_pool(const char *name);
void zs_destroy_pool(struct zs_pool *pool);
-unsigned long zs_malloc(struct zs_pool *pool, size_t size);
+unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags);
void zs_free(struct zs_pool *pool, unsigned long obj);
+size_t zs_huge_class_size(struct zs_pool *pool);
+
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm);
void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 316694dafa5b..008f466d1da7 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -87,7 +87,7 @@ struct nfc_hci_pipe {
* According to specification 102 622 chapter 4.4 Pipes,
* the pipe identifier is 7 bits long.
*/
-#define NFC_HCI_MAX_PIPES 127
+#define NFC_HCI_MAX_PIPES 128
struct nfc_hci_init_data {
u8 gate_count;
struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES];
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 5539933b3491..bd0da0e992b8 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -246,6 +246,15 @@ struct binder_node_debug_info {
__u32 has_weak_ref;
};
+struct binder_node_info_for_ref {
+ __u32 handle;
+ __u32 strong_count;
+ __u32 weak_count;
+ __u32 reserved1;
+ __u32 reserved2;
+ __u32 reserved3;
+};
+
#define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read)
#define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64)
#define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32)
@@ -254,6 +263,7 @@ struct binder_node_debug_info {
#define BINDER_THREAD_EXIT _IOW('b', 8, __s32)
#define BINDER_VERSION _IOWR('b', 9, struct binder_version)
#define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info)
+#define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct binder_node_info_for_ref)
/*
* NOTE: Two special error codes you should check for when calling
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index cd1629170103..08f47e0e9f8d 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -819,13 +819,13 @@ struct ethtool_rx_flow_spec {
static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
{
return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
-};
+}
static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
{
return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
-};
+}
/**
* struct ethtool_rxnfc - command to get or set RX flow classification rules
diff --git a/include/video/udlfb.h b/include/video/udlfb.h
index f9466fa54ba4..2ad9a6d37ff4 100644
--- a/include/video/udlfb.h
+++ b/include/video/udlfb.h
@@ -87,7 +87,7 @@ struct dlfb_data {
#define MIN_RAW_PIX_BYTES 2
#define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES)
-#define DL_DEFIO_WRITE_DELAY 5 /* fb_deferred_io.delay in jiffies */
+#define DL_DEFIO_WRITE_DELAY msecs_to_jiffies(HZ <= 300 ? 4 : 10) /* optimal value for 720p video */
#define DL_DEFIO_WRITE_DISABLE (HZ*60) /* "disable" with long delay */
/* remove these once align.h patch is taken into kernel */
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index a162661c9d60..f45a9a5d3e47 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -419,6 +419,13 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
struct path parent_path;
int h, ret = 0;
+ /*
+ * When we will be calling audit_add_to_parent, krule->watch might have
+ * been updated and watch might have been freed.
+ * So we need to keep a reference of watch.
+ */
+ audit_get_watch(watch);
+
mutex_unlock(&audit_filter_mutex);
/* Avoid calling path_lookup under audit_filter_mutex. */
@@ -427,8 +434,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- if (ret)
+ if (ret) {
+ audit_put_watch(watch);
return ret;
+ }
/* either find an old parent or attach a new one */
parent = audit_find_parent(d_backing_inode(parent_path.dentry));
@@ -446,6 +455,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
*list = &audit_inode_hash[h];
error:
path_put(&parent_path);
+ audit_put_watch(watch);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 80445ca0420b..5a90b0f6d668 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1136,7 +1136,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM;
atomic_set(&sig->count, 1);
+ spin_lock_irq(&current->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ spin_unlock_irq(&current->sighand->siglock);
return 0;
}
@@ -1364,6 +1366,18 @@ static struct task_struct *copy_process(unsigned long clone_flags,
cpufreq_task_times_init(p);
+ /*
+ * This _must_ happen before we call free_task(), i.e. before we jump
+ * to any of the bad_fork_* labels. This is to avoid freeing
+ * p->set_child_tid which is (ab)used as a kthread's data pointer for
+ * kernel threads (PF_KTHREAD).
+ */
+ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+ /*
+ * Clear TID on mm_release()?
+ */
+ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
@@ -1525,11 +1539,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
}
- p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
- /*
- * Clear TID on mm_release()?
- */
- p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 0dfa25276f39..6a0cd0d585d2 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -373,7 +373,6 @@ void handle_nested_irq(unsigned int irq)
raw_spin_lock_irq(&desc->lock);
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
action = desc->action;
if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
@@ -381,6 +380,7 @@ void handle_nested_irq(unsigned int irq)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock_irq(&desc->lock);
@@ -447,13 +447,13 @@ void handle_simple_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
out_unlock:
@@ -497,7 +497,6 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -508,6 +507,7 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
cond_unmask_irq(desc);
@@ -567,7 +567,6 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -579,6 +578,7 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
}
+ kstat_incr_irqs_this_cpu(desc);
if (desc->istate & IRQS_ONESHOT)
mask_irq(desc);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index d9b0be5c6a5f..f529aafc1180 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -313,10 +313,16 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
task = create->result;
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 };
+ char name[TASK_COMM_LEN];
va_list args;
va_start(args, namefmt);
- vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
+ /*
+ * task is already visible to other tasks, so updating
+ * COMM must be protected.
+ */
+ vsnprintf(name, sizeof(name), namefmt, args);
+ set_task_comm(task, name);
va_end(args);
/*
* root may have changed our (kthreadd's) priority or CPU mask.
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 05a37857ab55..8d7047ecef4e 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -104,6 +104,19 @@ bool osq_lock(struct optimistic_spin_queue *lock)
prev = decode_cpu(old);
node->prev = prev;
+
+ /*
+ * osq_lock() unqueue
+ *
+ * node->prev = prev osq_wait_next()
+ * WMB MB
+ * prev->next = node next->prev = prev // unqueue-C
+ *
+ * Here 'node->prev' and 'next->prev' are the same variable and we need
+ * to ensure these stores happen in-order to avoid corrupting the list.
+ */
+ smp_wmb();
+
WRITE_ONCE(prev->next, node);
/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4d4de05b2d1..1be33caf157d 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -511,6 +511,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
unsigned long flags;
/*
+ * __rwsem_down_write_failed_common(sem)
+ * rwsem_optimistic_spin(sem)
+ * osq_unlock(sem->osq)
+ * ...
+ * atomic_long_add_return(&sem->count)
+ *
+ * - VS -
+ *
+ * __up_write()
+ * if (atomic_long_sub_return_release(&sem->count) < 0)
+ * rwsem_wake(sem)
+ * osq_is_locked(&sem->osq)
+ *
+ * And __up_write() must observe !osq_is_locked() when it observes the
+ * atomic_long_add_return() in order to not miss a wakeup.
+ *
+ * This boils down to:
+ *
+ * [S.rel] X = 1 [RmW] r0 = (Y += 0)
+ * MB RMB
+ * [RmW] Y += 1 [L] r1 = X
+ *
+ * exists (r0=1 /\ r1=0)
+ */
+ smp_rmb();
+
+ /*
* If a spinner is present, it is not necessary to do the wakeup.
* Try to do wakeup only if the trylock succeeds to minimize
* spinlock contention which may introduce too much delay in the
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 2098f7d5fd9e..e746f7d1dc16 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -119,6 +119,7 @@ config PM_SLEEP
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
select PM
+ select SRCU
config PM_SLEEP_SMP
def_bool y
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index d444fc1a4d58..5e47c29b44f6 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -569,7 +569,7 @@ prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 prefer_idle)
{
struct schedtune *st = css_st(css);
- st->prefer_idle = prefer_idle;
+ st->prefer_idle = !!prefer_idle;
return 0;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 0df4753d4969..ede0c1f4b860 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1144,18 +1144,19 @@ static int override_release(char __user *release, size_t len)
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
- int errno = 0;
+ struct new_utsname tmp;
down_read(&uts_sem);
- if (copy_to_user(name, utsname(), sizeof *name))
- errno = -EFAULT;
+ memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!errno && override_release(name->release, sizeof(name->release)))
- errno = -EFAULT;
- if (!errno && override_architecture(name))
- errno = -EFAULT;
- return errno;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ if (override_architecture(name))
+ return -EFAULT;
+ return 0;
}
#ifdef __ARCH_WANT_SYS_OLD_UNAME
@@ -1164,55 +1165,46 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
*/
SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
{
- int error = 0;
+ struct old_utsname tmp;
if (!name)
return -EFAULT;
down_read(&uts_sem);
- if (copy_to_user(name, utsname(), sizeof(*name)))
- error = -EFAULT;
+ memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!error && override_release(name->release, sizeof(name->release)))
- error = -EFAULT;
- if (!error && override_architecture(name))
- error = -EFAULT;
- return error;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ if (override_architecture(name))
+ return -EFAULT;
+ return 0;
}
SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
- int error;
+ struct oldold_utsname tmp = {};
if (!name)
return -EFAULT;
- if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
- return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname, &utsname()->sysname,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename, &utsname()->nodename,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->release, &utsname()->release,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->release + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->version, &utsname()->version,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->version + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine, &utsname()->machine,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->machine + __OLD_UTS_LEN);
+ memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
+ memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
+ memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN);
+ memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN);
+ memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN);
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!error && override_architecture(name))
- error = -EFAULT;
- if (!error && override_release(name->release, sizeof(name->release)))
- error = -EFAULT;
- return error ? -EFAULT : 0;
+ if (override_architecture(name))
+ return -EFAULT;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ return 0;
}
#endif
@@ -1226,17 +1218,18 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- struct new_utsname *u = utsname();
+ struct new_utsname *u;
+ down_write(&uts_sem);
+ u = utsname();
memcpy(u->nodename, tmp, len);
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
errno = 0;
uts_proc_notify(UTS_PROC_HOSTNAME);
+ up_write(&uts_sem);
}
- up_write(&uts_sem);
return errno;
}
@@ -1244,8 +1237,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
{
- int i, errno;
+ int i;
struct new_utsname *u;
+ char tmp[__NEW_UTS_LEN + 1];
if (len < 0)
return -EINVAL;
@@ -1254,11 +1248,11 @@ SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
i = 1 + strlen(u->nodename);
if (i > len)
i = len;
- errno = 0;
- if (copy_to_user(name, u->nodename, i))
- errno = -EFAULT;
+ memcpy(tmp, u->nodename, i);
up_read(&uts_sem);
- return errno;
+ if (copy_to_user(name, tmp, i))
+ return -EFAULT;
+ return 0;
}
#endif
@@ -1277,17 +1271,18 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- struct new_utsname *u = utsname();
+ struct new_utsname *u;
+ down_write(&uts_sem);
+ u = utsname();
memcpy(u->domainname, tmp, len);
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
errno = 0;
uts_proc_notify(UTS_PROC_DOMAINNAME);
+ up_write(&uts_sem);
}
- up_write(&uts_sem);
return errno;
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7ab5eafea8b2..210b8e726a97 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1716,6 +1716,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
mutex_lock(&bdev->bd_mutex);
if (attr == &dev_attr_enable) {
+ if (!!value == !!q->blk_trace) {
+ ret = 0;
+ goto out_unlock_bdev;
+ }
if (value)
ret = blk_trace_setup_queue(q, bdev);
else
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index fdaa88f38aec..74b20e3ab8c6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1513,6 +1513,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
tmp_iter_page = first_page;
do {
+ cond_resched();
+
to_remove_page = tmp_iter_page;
rb_inc_page(cpu_buffer, &tmp_iter_page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index efdddec2af6e..2bee793749d8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1359,11 +1359,11 @@ void tracing_reset_all_online_cpus(void)
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
-static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
struct saved_cmdlines_buffer {
unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
unsigned *map_cmdline_to_pid;
+ unsigned *map_cmdline_to_tgid;
unsigned cmdline_num;
int cmdline_idx;
char *saved_cmdlines;
@@ -1397,12 +1397,23 @@ static int allocate_cmdlines_buffer(unsigned int val,
return -ENOMEM;
}
+ s->map_cmdline_to_tgid = kmalloc_array(val,
+ sizeof(*s->map_cmdline_to_tgid),
+ GFP_KERNEL);
+ if (!s->map_cmdline_to_tgid) {
+ kfree(s->map_cmdline_to_pid);
+ kfree(s->saved_cmdlines);
+ return -ENOMEM;
+ }
+
s->cmdline_idx = 0;
s->cmdline_num = val;
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
sizeof(s->map_pid_to_cmdline));
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
val * sizeof(*s->map_cmdline_to_pid));
+ memset(s->map_cmdline_to_tgid, NO_CMDLINE_MAP,
+ val * sizeof(*s->map_cmdline_to_tgid));
return 0;
}
@@ -1568,14 +1579,17 @@ static int trace_save_cmdline(struct task_struct *tsk)
if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
return 0;
+ preempt_disable();
/*
* It's not the end of the world if we don't get
* the lock, but we also don't want to spin
* nor do we want to disable interrupts,
* so if we miss here, then better luck next time.
*/
- if (!arch_spin_trylock(&trace_cmdline_lock))
+ if (!arch_spin_trylock(&trace_cmdline_lock)) {
+ preempt_enable();
return 0;
+ }
idx = savedcmd->map_pid_to_cmdline[tsk->pid];
if (idx == NO_CMDLINE_MAP) {
@@ -1598,8 +1612,9 @@ static int trace_save_cmdline(struct task_struct *tsk)
}
set_cmdline(idx, tsk->comm);
- saved_tgids[idx] = tsk->tgid;
+ savedcmd->map_cmdline_to_tgid[idx] = tsk->tgid;
arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
return 1;
}
@@ -1641,19 +1656,29 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}
-int trace_find_tgid(int pid)
+static int __find_tgid_locked(int pid)
{
unsigned map;
int tgid;
- preempt_disable();
- arch_spin_lock(&trace_cmdline_lock);
map = savedcmd->map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
- tgid = saved_tgids[map];
+ tgid = savedcmd->map_cmdline_to_tgid[map];
else
tgid = -1;
+ return tgid;
+}
+
+int trace_find_tgid(int pid)
+{
+ int tgid;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+
+ tgid = __find_tgid_locked(pid);
+
arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
@@ -3970,10 +3995,15 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
{
char buf[64];
int r;
+ unsigned int n;
+ preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
- r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
+ n = savedcmd->cmdline_num;
arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+
+ r = scnprintf(buf, sizeof(buf), "%u\n", n);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -3982,6 +4012,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
{
kfree(s->saved_cmdlines);
kfree(s->map_cmdline_to_pid);
+ kfree(s->map_cmdline_to_tgid);
kfree(s);
}
@@ -3998,10 +4029,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val)
return -ENOMEM;
}
+ preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
savedcmd = s;
arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
free_saved_cmdlines_buffer(savedcmd_temp);
return 0;
@@ -4220,33 +4253,61 @@ tracing_saved_tgids_read(struct file *file, char __user *ubuf,
char *file_buf;
char *buf;
int len = 0;
- int pid;
int i;
+ int *pids;
+ int n = 0;
- file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
- if (!file_buf)
- return -ENOMEM;
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
- buf = file_buf;
+ pids = kmalloc_array(savedcmd->cmdline_num, 2*sizeof(int), GFP_KERNEL);
+ if (!pids) {
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+ return -ENOMEM;
+ }
- for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
- int tgid;
- int r;
+ for (i = 0; i < savedcmd->cmdline_num; i++) {
+ int pid;
pid = savedcmd->map_cmdline_to_pid[i];
if (pid == -1 || pid == NO_CMDLINE_MAP)
continue;
- tgid = trace_find_tgid(pid);
- r = sprintf(buf, "%d %d\n", pid, tgid);
+ pids[n] = pid;
+ pids[n+1] = __find_tgid_locked(pid);
+ n += 2;
+ }
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+
+ if (n == 0) {
+ kfree(pids);
+ return 0;
+ }
+
+ /* enough to hold max pair of pids + space, lr and nul */
+ len = n * 12;
+ file_buf = kmalloc(len, GFP_KERNEL);
+ if (!file_buf) {
+ kfree(pids);
+ return -ENOMEM;
+ }
+
+ buf = file_buf;
+ for (i = 0; i < n && len > 0; i += 2) {
+ int r;
+
+ r = snprintf(buf, len, "%d %d\n", pids[i], pids[i+1]);
buf += r;
- len += r;
+ len -= r;
}
len = simple_read_from_buffer(ubuf, cnt, ppos,
- file_buf, len);
+ file_buf, buf - file_buf);
kfree(file_buf);
+ kfree(pids);
return len;
}
@@ -6587,7 +6648,9 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
if (buffer) {
mutex_lock(&trace_types_lock);
- if (val) {
+ if (!!val == tracer_tracing_is_on(tr)) {
+ val = 0; /* do nothing */
+ } else if (val) {
tracer_tracing_on(tr);
if (tr->current_trace->start)
tr->current_trace->start(tr);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 68bb89ad9d28..1dc887bab085 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -969,7 +969,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
list_del_rcu(&link->list);
/* synchronize with u{,ret}probe_trace_func */
- synchronize_sched();
+ synchronize_rcu();
kfree(link);
if (!list_empty(&tu->tp.files))
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 88fefa68c516..a965df4b54f5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -602,9 +602,26 @@ static ssize_t map_write(struct file *file, const char __user *buf,
struct uid_gid_map new_map;
unsigned idx;
struct uid_gid_extent *extent = NULL;
- unsigned long page = 0;
+ unsigned long page;
char *kbuf, *pos, *next_line;
- ssize_t ret = -EINVAL;
+ ssize_t ret;
+
+ /* Only allow < page size writes at the beginning of the file */
+ if ((*ppos != 0) || (count >= PAGE_SIZE))
+ return -EINVAL;
+
+ /* Get a buffer */
+ page = __get_free_page(GFP_TEMPORARY);
+ kbuf = (char *) page;
+ if (!page)
+ return -ENOMEM;
+
+ /* Slurp in the user data */
+ if (copy_from_user(kbuf, buf, count)) {
+ free_page(page);
+ return -EFAULT;
+ }
+ kbuf[count] = '\0';
/*
* The userns_state_mutex serializes all writes to any given map.
@@ -638,24 +655,6 @@ static ssize_t map_write(struct file *file, const char __user *buf,
if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
goto out;
- /* Get a buffer */
- ret = -ENOMEM;
- page = __get_free_page(GFP_TEMPORARY);
- kbuf = (char *) page;
- if (!page)
- goto out;
-
- /* Only allow < page size writes at the beginning of the file */
- ret = -EINVAL;
- if ((*ppos != 0) || (count >= PAGE_SIZE))
- goto out;
-
- /* Slurp in the user data */
- ret = -EFAULT;
- if (copy_from_user(kbuf, buf, count))
- goto out;
- kbuf[count] = '\0';
-
/* Parse the user data */
ret = -EINVAL;
pos = kbuf;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index c8eac43267e9..d2b3b2973456 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -17,7 +17,7 @@
#ifdef CONFIG_PROC_SYSCTL
-static void *get_uts(struct ctl_table *table, int write)
+static void *get_uts(struct ctl_table *table)
{
char *which = table->data;
struct uts_namespace *uts_ns;
@@ -25,21 +25,9 @@ static void *get_uts(struct ctl_table *table, int write)
uts_ns = current->nsproxy->uts_ns;
which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
- if (!write)
- down_read(&uts_sem);
- else
- down_write(&uts_sem);
return which;
}
-static void put_uts(struct ctl_table *table, int write, void *which)
-{
- if (!write)
- up_read(&uts_sem);
- else
- up_write(&uts_sem);
-}
-
/*
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
@@ -49,13 +37,34 @@ static int proc_do_uts_string(struct ctl_table *table, int write,
{
struct ctl_table uts_table;
int r;
+ char tmp_data[__NEW_UTS_LEN + 1];
+
memcpy(&uts_table, table, sizeof(uts_table));
- uts_table.data = get_uts(table, write);
+ uts_table.data = tmp_data;
+
+ /*
+ * Buffer the value in tmp_data so that proc_dostring() can be called
+ * without holding any locks.
+ * We also need to read the original value in the write==1 case to
+ * support partial writes.
+ */
+ down_read(&uts_sem);
+ memcpy(tmp_data, get_uts(table), sizeof(tmp_data));
+ up_read(&uts_sem);
r = proc_dostring(&uts_table, write, buffer, lenp, ppos);
- put_uts(table, write, uts_table.data);
- if (write)
+ if (write) {
+ /*
+ * Write back the new value.
+ * Note that, since we dropped uts_sem, the result can
+ * theoretically be incorrect if there are two parallel writes
+ * at non-zero offsets to the same sysctl.
+ */
+ down_write(&uts_sem);
+ memcpy(get_uts(table), tmp_data, sizeof(tmp_data));
+ up_write(&uts_sem);
proc_sys_poll_notify(table->poll);
+ }
return r;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 17d43dcf38b9..34c170a4a99d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1898,6 +1898,16 @@ config TEST_STATIC_KEYS
If unsure, say N.
+config BUG_ON_DATA_CORRUPTION
+ bool "Trigger a BUG when data corruption is detected"
+ select CONFIG_DEBUG_LIST
+ help
+ Select this option if the kernel should BUG when it encounters
+ data corruption in kernel memory structures when they get checked
+ for validity.
+
+ If unsure, say N.
+
source "samples/Kconfig"
source "lib/Kconfig.kgdb"
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 547f7f923dbc..a26328ec39f1 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -295,9 +295,12 @@ static void debug_object_is_on_stack(void *addr, int onstack)
limit++;
if (is_on_stack)
- pr_warn("object is on stack, but not annotated\n");
+ pr_warn("object %p is on stack %p, but NOT annotated.\n", addr,
+ task_stack_page(current));
else
- pr_warn("object is not on stack, but annotated\n");
+ pr_warn("object %p is NOT on stack %p, but annotated.\n", addr,
+ task_stack_page(current));
+
WARN_ON(1);
}
diff --git a/lib/list_debug.c b/lib/list_debug.c
index c24c2f7e296f..a34db8d27667 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -2,8 +2,7 @@
* Copyright 2006, Red Hat, Inc., Dave Jones
* Released under the General Public License (GPL).
*
- * This file contains the linked list implementations for
- * DEBUG_LIST.
+ * This file contains the linked list validation for DEBUG_LIST.
*/
#include <linux/export.h>
@@ -13,88 +12,51 @@
#include <linux/rculist.h>
/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
+ * Check that the data structures for the list manipulations are reasonably
+ * valid. Failures here indicate memory corruption (and possibly an exploit
+ * attempt).
*/
-void __list_add(struct list_head *new,
- struct list_head *prev,
- struct list_head *next)
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+ struct list_head *next)
{
- WARN(next->prev != prev,
- "list_add corruption. next->prev should be "
- "prev (%p), but was %p. (next=%p).\n",
- prev, next->prev, next);
- WARN(prev->next != next,
- "list_add corruption. prev->next should be "
- "next (%p), but was %p. (prev=%p).\n",
- next, prev->next, prev);
- WARN(new == prev || new == next,
- "list_add double add: new=%p, prev=%p, next=%p.\n",
- new, prev, next);
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
+ if (CHECK_DATA_CORRUPTION(next->prev != prev,
+ "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+ prev, next->prev, next) ||
+ CHECK_DATA_CORRUPTION(prev->next != next,
+ "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+ next, prev->next, prev) ||
+ CHECK_DATA_CORRUPTION(new == prev || new == next,
+ "list_add double add: new=%p, prev=%p, next=%p.\n",
+ new, prev, next))
+ return false;
+
+ return true;
}
-EXPORT_SYMBOL(__list_add);
+EXPORT_SYMBOL(__list_add_valid);
-void __list_del_entry(struct list_head *entry)
+bool __list_del_entry_valid(struct list_head *entry)
{
struct list_head *prev, *next;
prev = entry->prev;
next = entry->next;
- if (WARN(next == LIST_POISON1,
- "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
- entry, LIST_POISON1) ||
- WARN(prev == LIST_POISON2,
- "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
- entry, LIST_POISON2) ||
- WARN(prev->next != entry,
- "list_del corruption. prev->next should be %p, "
- "but was %p\n", entry, prev->next) ||
- WARN(next->prev != entry,
- "list_del corruption. next->prev should be %p, "
- "but was %p\n", entry, next->prev))
- return;
-
- __list_del(prev, next);
-}
-EXPORT_SYMBOL(__list_del_entry);
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is
- * in an undefined state.
- */
-void list_del(struct list_head *entry)
-{
- __list_del_entry(entry);
- entry->next = LIST_POISON1;
- entry->prev = LIST_POISON2;
-}
-EXPORT_SYMBOL(list_del);
+ if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+ entry, LIST_POISON1) ||
+ CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+ "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+ entry, LIST_POISON2) ||
+ CHECK_DATA_CORRUPTION(prev->next != entry,
+ "list_del corruption. prev->next should be %p, but was %p\n",
+ entry, prev->next) ||
+ CHECK_DATA_CORRUPTION(next->prev != entry,
+ "list_del corruption. next->prev should be %p, but was %p\n",
+ entry, next->prev))
+ return false;
+
+ return true;
-/*
- * RCU variants.
- */
-void __list_add_rcu(struct list_head *new,
- struct list_head *prev, struct list_head *next)
-{
- WARN(next->prev != prev,
- "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
- prev, next->prev, next);
- WARN(prev->next != next,
- "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
- next, prev->next, prev);
- new->next = next;
- new->prev = prev;
- rcu_assign_pointer(list_next_rcu(prev), new);
- next->prev = new;
}
-EXPORT_SYMBOL(__list_add_rcu);
+EXPORT_SYMBOL(__list_del_entry_valid);
diff --git a/mm/debug.c b/mm/debug.c
index 668aa35191ca..689b6e911cae 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -168,7 +168,7 @@ EXPORT_SYMBOL(dump_vma);
void dump_mm(const struct mm_struct *mm)
{
- pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
+ pr_emerg("mm %p mmap %p seqnum %llu task_size %lu\n"
#ifdef CONFIG_MMU
"get_unmapped_area %p\n"
#endif
@@ -198,7 +198,7 @@ void dump_mm(const struct mm_struct *mm)
#endif
"%s", /* This is here to hold the comma */
- mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
+ mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size,
#ifdef CONFIG_MMU
mm->get_unmapped_area,
#endif
diff --git a/mm/fadvise.c b/mm/fadvise.c
index b8a5bc66b0c0..001877e32f0c 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -68,8 +68,12 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
goto out;
}
- /* Careful about overflows. Len == 0 means "as much as possible" */
- endbyte = offset + len;
+ /*
+ * Careful about overflows. Len == 0 means "as much as possible". Use
+ * unsigned math because signed overflows are undefined and UBSan
+ * complains.
+ */
+ endbyte = (u64)offset + (u64)len;
if (!len || endbyte < len)
endbyte = -1;
else
diff --git a/mm/filemap.c b/mm/filemap.c
index 21e750b6e810..38cacca05d77 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -394,19 +394,17 @@ static int __filemap_fdatawait_range(struct address_space *mapping,
goto out;
pagevec_init(&pvec, 0);
- while ((index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_WRITEBACK,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
+ while (index <= end) {
unsigned i;
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
+ end, PAGECACHE_TAG_WRITEBACK);
+ if (!nr_pages)
+ break;
+
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- /* until radix tree lookup accepts end_index */
- if (page->index > end)
- continue;
-
wait_on_page_writeback(page);
if (TestClearPageError(page))
ret = -EIO;
@@ -1424,9 +1422,10 @@ repeat:
EXPORT_SYMBOL(find_get_pages_contig);
/**
- * find_get_pages_tag - find and return pages that match @tag
+ * find_get_pages_range_tag - find and return pages in given range matching @tag
* @mapping: the address_space to search
* @index: the starting page index
+ * @end: The final page index (inclusive)
* @tag: the tag index
* @nr_pages: the maximum number of pages
* @pages: where the resulting pages are placed
@@ -1434,8 +1433,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
* Like find_get_pages, except we only return pages which are tagged with
* @tag. We update @index to index the next page for the traversal.
*/
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
- int tag, unsigned int nr_pages, struct page **pages)
+unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
+ pgoff_t end, int tag, unsigned int nr_pages,
+ struct page **pages)
{
struct radix_tree_iter iter;
void **slot;
@@ -1449,6 +1449,9 @@ restart:
radix_tree_for_each_tagged(slot, &mapping->page_tree,
&iter, *index, tag) {
struct page *page;
+
+ if (iter.index > end)
+ break;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1487,18 +1490,28 @@ repeat:
}
pages[ret] = page;
- if (++ret == nr_pages)
- break;
+ if (++ret == nr_pages) {
+ *index = pages[ret - 1]->index + 1;
+ goto out;
+ }
}
+ /*
+ * We come here when we got at @end. We take care to not overflow the
+ * index @index as it confuses some of the callers. This breaks the
+ * iteration when there is page at index -1 but that is already broken
+ * anyway.
+ */
+ if (end == (pgoff_t)-1)
+ *index = (pgoff_t)-1;
+ else
+ *index = end + 1;
+out:
rcu_read_unlock();
- if (ret)
- *index = pages[ret - 1]->index + 1;
-
return ret;
}
-EXPORT_SYMBOL(find_get_pages_tag);
+EXPORT_SYMBOL(find_get_pages_range_tag);
/*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d64d48ca789c..67f5a8ca0af1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1392,12 +1392,12 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
+ page_nid = -1;
if (!get_page_unless_zero(page))
goto out_unlock;
spin_unlock(ptl);
wait_on_page_locked(page);
put_page(page);
- page_nid = -1;
goto out;
}
diff --git a/mm/memory.c b/mm/memory.c
index 42db644f5ec4..5aee9ec8b8c6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -361,15 +361,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct mmu_table_batch **batch = &tlb->batch;
- /*
- * When there's less then two users of this mm there cannot be a
- * concurrent page-table walk.
- */
- if (atomic_read(&tlb->mm->mm_users) < 2) {
- __tlb_remove_table(table);
- return;
- }
-
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8e80ea58c7e7..f6cf7379c3f9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2198,30 +2198,14 @@ retry:
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end) {
- /*
- * can't be range_cyclic (1st pass) because
- * end == -1 in that case.
- */
- done = 1;
- break;
- }
-
done_index = page->index;
lock_page(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index 79997e8cf807..afdd2b957f1b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1464,6 +1464,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
mpol_shared_policy_init(&info->policy, NULL);
break;
}
+
+ lockdep_annotate_inode_mutex_key(inode);
} else
shmem_free_inode(sb);
return inode;
diff --git a/mm/swap.c b/mm/swap.c
index 39395fb549c0..8e6bcb688779 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1130,15 +1130,25 @@ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
}
EXPORT_SYMBOL(pagevec_lookup);
-unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
- pgoff_t *index, int tag, unsigned nr_pages)
+unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
+ struct address_space *mapping, pgoff_t *index, pgoff_t end,
+ int tag)
{
- pvec->nr = find_get_pages_tag(mapping, index, tag,
- nr_pages, pvec->pages);
+ pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
+ PAGEVEC_SIZE, pvec->pages);
return pagevec_count(pvec);
}
-EXPORT_SYMBOL(pagevec_lookup_tag);
+EXPORT_SYMBOL(pagevec_lookup_range_tag);
+unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
+ struct address_space *mapping, pgoff_t *index, pgoff_t end,
+ int tag, unsigned max_pages)
+{
+ pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
+ min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
+ return pagevec_count(pvec);
+}
+EXPORT_SYMBOL(pagevec_lookup_range_nr_tag);
/*
* Perform any setup for the swap system
*/
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 623c77c1327b..9f7bd5e8e68a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -932,11 +932,25 @@ int reuse_swap_page(struct page *page)
count = page_mapcount(page);
if (count <= 1 && PageSwapCache(page)) {
count += page_swapcount(page);
- if (count == 1 && !PageWriteback(page)) {
+ if (count != 1)
+ goto out;
+ if (!PageWriteback(page)) {
delete_from_swap_cache(page);
SetPageDirty(page);
+ } else {
+ swp_entry_t entry;
+ struct swap_info_struct *p;
+
+ entry.val = page_private(page);
+ p = swap_info_get(entry);
+ if (p->flags & SWP_STABLE_WRITES) {
+ spin_unlock(&p->lock);
+ return false;
+ }
+ spin_unlock(&p->lock);
}
}
+out:
return count <= 1;
}
@@ -2481,6 +2495,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = -ENOMEM;
goto bad_swap;
}
+
+ if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
+ p->flags |= SWP_STABLE_WRITES;
+
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
int cpu;
diff --git a/mm/vmacache.c b/mm/vmacache.c
index fd09dc9c6812..9c8ff3d4eda9 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -6,44 +6,6 @@
#include <linux/vmacache.h>
/*
- * Flush vma caches for threads that share a given mm.
- *
- * The operation is safe because the caller holds the mmap_sem
- * exclusively and other threads accessing the vma cache will
- * have mmap_sem held at least for read, so no extra locking
- * is required to maintain the vma cache.
- */
-void vmacache_flush_all(struct mm_struct *mm)
-{
- struct task_struct *g, *p;
-
- count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);
-
- /*
- * Single threaded tasks need not iterate the entire
- * list of process. We can avoid the flushing as well
- * since the mm's seqnum was increased and don't have
- * to worry about other threads' seqnum. Current's
- * flush will occur upon the next lookup.
- */
- if (atomic_read(&mm->mm_users) == 1)
- return;
-
- rcu_read_lock();
- for_each_process_thread(g, p) {
- /*
- * Only flush the vmacache pointers as the
- * mm seqnum is already set and curr's will
- * be set upon invalidation when the next
- * lookup is done.
- */
- if (mm == p->mm)
- vmacache_flush(p);
- }
- rcu_read_unlock();
-}
-
-/*
* This task may be accessing a foreign mm via (for example)
* get_user_pages()->find_vma(). The vmacache is task-local and this
* task's vmacache pertains to a different mm (ie, its own). There is
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c1ea19478119..290e8210c13e 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -202,6 +202,7 @@ static int zs_size_classes;
* (see: fix_fullness_group())
*/
static const int fullness_threshold_frac = 4;
+static size_t huge_class_size;
struct size_class {
spinlock_t lock;
@@ -247,7 +248,6 @@ struct zs_pool {
struct size_class **size_class;
struct kmem_cache *handle_cachep;
- gfp_t flags; /* allocation flags used when growing pool */
atomic_long_t pages_allocated;
struct zs_pool_stats stats;
@@ -296,10 +296,10 @@ static void destroy_handle_cache(struct zs_pool *pool)
kmem_cache_destroy(pool->handle_cachep);
}
-static unsigned long alloc_handle(struct zs_pool *pool)
+static unsigned long alloc_handle(struct zs_pool *pool, gfp_t gfp)
{
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
- pool->flags & ~__GFP_HIGHMEM);
+ gfp & ~__GFP_HIGHMEM);
}
static void free_handle(struct zs_pool *pool, unsigned long handle)
@@ -325,7 +325,12 @@ static void *zs_zpool_create(const char *name, gfp_t gfp,
const struct zpool_ops *zpool_ops,
struct zpool *zpool)
{
- return zs_create_pool(name, gfp);
+ /*
+ * Ignore global gfp flags: zs_malloc() may be invoked from
+ * different contexts and its caller must provide a valid
+ * gfp mask.
+ */
+ return zs_create_pool(name);
}
static void zs_zpool_destroy(void *pool)
@@ -336,7 +341,7 @@ static void zs_zpool_destroy(void *pool)
static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
unsigned long *handle)
{
- *handle = zs_malloc(pool, size);
+ *handle = zs_malloc(pool, size, gfp);
return *handle ? 0 : -1;
}
static void zs_zpool_free(void *pool, unsigned long handle)
@@ -1347,6 +1352,25 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
+/**
+ * zs_huge_class_size() - Returns the size (in bytes) of the first huge
+ * zsmalloc &size_class.
+ * @pool: zsmalloc pool to use
+ *
+ * The function returns the size of the first huge class - any object of equal
+ * or bigger size will be stored in zspage consisting of a single physical
+ * page.
+ *
+ * Context: Any context.
+ *
+ * Return: the size (in bytes) of the first huge zsmalloc &size_class.
+ */
+size_t zs_huge_class_size(struct zs_pool *pool)
+{
+ return huge_class_size;
+}
+EXPORT_SYMBOL_GPL(zs_huge_class_size);
+
static unsigned long obj_malloc(struct page *first_page,
struct size_class *class, unsigned long handle)
{
@@ -1388,7 +1412,7 @@ static unsigned long obj_malloc(struct page *first_page,
* otherwise 0.
* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
*/
-unsigned long zs_malloc(struct zs_pool *pool, size_t size)
+unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
{
unsigned long handle, obj;
struct size_class *class;
@@ -1397,7 +1421,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
return 0;
- handle = alloc_handle(pool);
+ handle = alloc_handle(pool, gfp);
if (!handle)
return 0;
@@ -1410,7 +1434,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
if (!first_page) {
spin_unlock(&class->lock);
- first_page = alloc_zspage(class, pool->flags);
+ first_page = alloc_zspage(class, gfp);
if (unlikely(!first_page)) {
free_handle(pool, handle);
return 0;
@@ -1884,7 +1908,7 @@ static int zs_register_shrinker(struct zs_pool *pool)
* On success, a pointer to the newly created pool is returned,
* otherwise NULL.
*/
-struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
+struct zs_pool *zs_create_pool(const char *name)
{
int i;
struct zs_pool *pool;
@@ -1915,12 +1939,35 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
for (i = zs_size_classes - 1; i >= 0; i--) {
int size;
int pages_per_zspage;
+ int objs_per_zspage;
struct size_class *class;
size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
if (size > ZS_MAX_ALLOC_SIZE)
size = ZS_MAX_ALLOC_SIZE;
pages_per_zspage = get_pages_per_zspage(size);
+ objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
+
+ /*
+ * We iterate from biggest down to smallest classes,
+ * so huge_class_size holds the size of the first huge
+ * class. Any object bigger than or equal to that will
+ * endup in the huge class.
+ */
+ if (pages_per_zspage != 1 && objs_per_zspage != 1 &&
+ !huge_class_size) {
+ huge_class_size = size;
+ /*
+ * The object uses ZS_HANDLE_SIZE bytes to store the
+ * handle. We need to subtract it, because zs_malloc()
+ * unconditionally adds handle size before it performs
+ * size class search - so object may be smaller than
+ * huge class size, yet it still can end up in the huge
+ * class because it grows by ZS_HANDLE_SIZE extra bytes
+ * right before class lookup.
+ */
+ huge_class_size -= (ZS_HANDLE_SIZE - 1);
+ }
/*
* size_class is used for normal zsmalloc operation such
@@ -1954,8 +2001,6 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
prev_class = class;
}
- pool->flags = flags;
-
if (zs_pool_stat_create(name, pool))
goto err;
diff --git a/net/9p/client.c b/net/9p/client.c
index 3ff26eb1ea20..ed8738c4dc09 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -931,7 +931,7 @@ static int p9_client_version(struct p9_client *c)
{
int err = 0;
struct p9_req_t *req;
- char *version;
+ char *version = NULL;
int msize;
p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index bced8c074c12..2f68ffda3715 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -185,6 +185,8 @@ static void p9_mux_poll_stop(struct p9_conn *m)
spin_lock_irqsave(&p9_poll_lock, flags);
list_del_init(&m->poll_pending_link);
spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+ flush_work(&p9_poll_work);
}
/**
@@ -933,7 +935,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
if (err < 0)
return err;
- if (valid_ipaddr4(addr) < 0)
+ if (addr == NULL || valid_ipaddr4(addr) < 0)
return -EINVAL;
csocket = NULL;
@@ -981,6 +983,9 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
csocket = NULL;
+ if (addr == NULL)
+ return -EINVAL;
+
if (strlen(addr) >= UNIX_PATH_MAX) {
pr_err("%s (%d): address too long: %s\n",
__func__, task_pid_nr(current), addr);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 52b4a2f993f2..f42550dd3560 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -644,6 +644,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
struct ib_qp_init_attr qp_attr;
struct ib_cq_init_attr cq_attr = {};
+ if (addr == NULL)
+ return -EINVAL;
+
/* Parse the transport specific mount options */
err = parse_opts(args, &opts);
if (err < 0)
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2ddeecca5b12..2a15b6aa9cdd 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -192,7 +192,7 @@ static int pack_sg_list(struct scatterlist *sg, int start,
s = rest_of_page(data);
if (s > count)
s = count;
- BUG_ON(index > limit);
+ BUG_ON(index >= limit);
/* Make sure we don't terminate early. */
sg_unmark_end(&sg[index]);
sg_set_buf(&sg[index++], data, s);
@@ -237,6 +237,7 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
s = PAGE_SIZE - data_off;
if (s > count)
s = count;
+ BUG_ON(index >= limit);
/* Make sure we don't terminate early. */
sg_unmark_end(&sg[index]);
sg_set_page(&sg[index++], pdata[i++], s, data_off);
@@ -409,6 +410,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
if (uodata) {
+ __le32 sz;
int n = p9_get_mapped_pages(chan, &out_pages, uodata,
outlen, &offs, &need_drop);
if (n < 0)
@@ -419,6 +421,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
outlen = n;
}
+ /* The size field of the message must include the length of the
+ * header and the length of the data. We didn't actually know
+ * the length of the data until this point so add it in now.
+ */
+ sz = cpu_to_le32(req->tc->size + outlen);
+ memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
} else if (uidata) {
int n = p9_get_mapped_pages(chan, &in_pages, uidata,
inlen, &offs, &need_drop);
@@ -566,7 +574,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
if (IS_ERR(chan->vq)) {
err = PTR_ERR(chan->vq);
- goto out_free_vq;
+ goto out_free_chan;
}
chan->vq->vdev->priv = chan;
spin_lock_init(&chan->lock);
@@ -619,6 +627,7 @@ out_free_tag:
kfree(tag);
out_free_vq:
vdev->config->del_vqs(vdev);
+out_free_chan:
kfree(chan);
fail:
return err;
@@ -646,6 +655,9 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
int ret = -ENOENT;
int found = 0;
+ if (devname == NULL)
+ return -EINVAL;
+
mutex_lock(&virtio_9p_lock);
list_for_each_entry(chan, &virtio_chan_list, chan_list) {
if (!strncmp(devname, chan->tag, chan->tag_len) &&
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 1811f8e7ddf4..552e00b07196 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -774,7 +774,7 @@ static int hidp_setup_hid(struct hidp_session *session,
hid->version = req->version;
hid->country = req->country;
- strncpy(hid->name, req->name, sizeof(req->name) - 1);
+ strncpy(hid->name, req->name, sizeof(hid->name));
snprintf(hid->phys, sizeof(hid->phys), "%pMR",
&l2cap_pi(session->ctrl_sock->sk)->chan->src);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f60b93627876..78dc184072e8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1140,6 +1140,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
lladdr = neigh->ha;
}
+ /* Update confirmed timestamp for neighbour entry after we
+ * received ARP packet even if it doesn't change IP to MAC binding.
+ */
+ if (new & NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
@@ -1163,15 +1169,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
}
- /* Update timestamps only once we know we will make a change to the
+ /* Update timestamp only once we know we will make a change to the
* neighbour entry. Otherwise we risk to move the locktime window with
* noop updates and ignore relevant ARP updates.
*/
- if (new != old || lladdr != neigh->ha) {
- if (new & NUD_CONNECTED)
- neigh->confirmed = jiffies;
+ if (new != old || lladdr != neigh->ha)
neigh->updated = jiffies;
- }
if (new != old) {
neigh_del_timer(neigh);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 4f6c1862dfd2..6fe2b615518c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1763,7 +1763,7 @@ static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app,
if (itr->app.selector == app->selector &&
itr->app.protocol == app->protocol &&
itr->ifindex == ifindex &&
- (!prio || itr->app.priority == prio))
+ ((prio == -1) || itr->app.priority == prio))
return itr;
}
@@ -1798,7 +1798,8 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
u8 prio = 0;
spin_lock_bh(&dcb_lock);
- if ((itr = dcb_app_lookup(app, dev->ifindex, 0)))
+ itr = dcb_app_lookup(app, dev->ifindex, -1);
+ if (itr)
prio = itr->app.priority;
spin_unlock_bh(&dcb_lock);
@@ -1826,7 +1827,8 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
spin_lock_bh(&dcb_lock);
/* Search for existing match and replace */
- if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) {
+ itr = dcb_app_lookup(new, dev->ifindex, -1);
+ if (itr) {
if (new->priority)
itr->app.priority = new->priority;
else {
@@ -1859,7 +1861,8 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
u8 prio = 0;
spin_lock_bh(&dcb_lock);
- if ((itr = dcb_app_lookup(app, dev->ifindex, 0)))
+ itr = dcb_app_lookup(app, dev->ifindex, -1);
+ if (itr)
prio |= 1 << itr->app.priority;
spin_unlock_bh(&dcb_lock);
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index d4353faced35..a10db45b2e1e 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -265,9 +265,24 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
/* We must take a copy of the skb before we modify/replace the ipv6
* header as the header could be used elsewhere
*/
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- return NET_XMIT_DROP;
+ if (unlikely(skb_headroom(skb) < ldev->needed_headroom ||
+ skb_tailroom(skb) < ldev->needed_tailroom)) {
+ struct sk_buff *nskb;
+
+ nskb = skb_copy_expand(skb, ldev->needed_headroom,
+ ldev->needed_tailroom, GFP_ATOMIC);
+ if (likely(nskb)) {
+ consume_skb(skb);
+ skb = nskb;
+ } else {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+ } else {
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_XMIT_DROP;
+ }
ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset);
if (ret < 0) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c31e5fa8f1a8..9a2c36fd3d2f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1299,6 +1299,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
if (encap)
skb_reset_inner_headers(skb);
skb->network_header = (u8 *)iph - skb->head;
+ skb_reset_mac_len(skb);
} while ((skb = skb->next));
out:
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4c1c94fa8f08..d270870bf492 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -200,8 +200,9 @@ kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
}
+ } else {
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
}
- inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 9e2ea4ae840d..244b9fec9d4d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -118,6 +118,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
skb->network_header = (u8 *)ipv6h - skb->head;
+ skb_reset_mac_len(skb);
if (udpfrag) {
int err = ip6_find_1stfragopt(skb, &prevhdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index efe1098f452f..ba923a5504b9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -194,12 +194,10 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -ENOBUFS;
}
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
- /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
- * it is safe to call in our context (socket lock not held)
- */
- skb_set_owner_w(skb, (struct sock *)sk);
}
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 060862a6f2f2..ca697f16d2ea 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -470,7 +470,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
}
mtu = dst_mtu(dst);
- if (!skb->ignore_df && skb->len > mtu) {
+ if (skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 4a116d766c15..7cc9db38e1b6 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -774,6 +774,13 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return -EINVAL;
lock_sock(sk);
+
+ /* Ensure that the socket is not already bound */
+ if (self->ias_obj) {
+ err = -EINVAL;
+ goto out;
+ }
+
#ifdef CONFIG_IRDA_ULTRA
/* Special care for Ultra sockets */
if ((sk->sk_type == SOCK_DGRAM) &&
@@ -2020,7 +2027,11 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
err = -EINVAL;
goto out;
}
- irias_insert_object(ias_obj);
+
+ /* Only insert newly allocated objects */
+ if (free_ias)
+ irias_insert_object(ias_obj);
+
kfree(ias_opt);
break;
case IRLMP_IAS_DEL:
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 00a8cc572a22..1f930032253a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -286,7 +286,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
goto out_unlock;
}
- ieee80211_key_free(key, true);
+ ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION);
ret = 0;
out_unlock:
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 4a72c0d1e56f..91a4e606edcd 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -647,11 +647,15 @@ int ieee80211_key_link(struct ieee80211_key *key,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_key *old_key;
- int idx, ret;
- bool pairwise;
-
- pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
- idx = key->conf.keyidx;
+ int idx = key->conf.keyidx;
+ bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
+ /*
+ * We want to delay tailroom updates only for station - in that
+ * case it helps roaming speed, but in other cases it hurts and
+ * can cause warnings to appear.
+ */
+ bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION;
+ int ret;
mutex_lock(&sdata->local->key_mtx);
@@ -679,14 +683,14 @@ int ieee80211_key_link(struct ieee80211_key *key,
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
- ieee80211_key_destroy(old_key, true);
+ ieee80211_key_destroy(old_key, delay_tailroom);
ieee80211_debugfs_key_add(key);
if (!local->wowlan) {
ret = ieee80211_key_enable_hw_accel(key);
if (ret)
- ieee80211_key_free(key, true);
+ ieee80211_key_free(key, delay_tailroom);
} else {
ret = 0;
}
@@ -874,7 +878,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
ieee80211_key_replace(key->sdata, key->sta,
key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
key, NULL);
- __ieee80211_key_destroy(key, true);
+ __ieee80211_key_destroy(key, key->sdata->vif.type ==
+ NL80211_IFTYPE_STATION);
}
for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
@@ -884,7 +889,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
ieee80211_key_replace(key->sdata, key->sta,
key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
key, NULL);
- __ieee80211_key_destroy(key, true);
+ __ieee80211_key_destroy(key, key->sdata->vif.type ==
+ NL80211_IFTYPE_STATION);
}
mutex_unlock(&local->key_mtx);
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 3827f359b336..9e1ff9d4cf2d 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -72,8 +72,21 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
int ret;
if (!(local->hw.flags & IEEE802154_HW_TX_OMIT_CKSUM)) {
- u16 crc = crc_ccitt(0, skb->data, skb->len);
+ struct sk_buff *nskb;
+ u16 crc;
+
+ if (unlikely(skb_tailroom(skb) < IEEE802154_FCS_LEN)) {
+ nskb = skb_copy_expand(skb, 0, IEEE802154_FCS_LEN,
+ GFP_ATOMIC);
+ if (likely(nskb)) {
+ consume_skb(skb);
+ skb = nskb;
+ } else {
+ goto err_tx;
+ }
+ }
+ crc = crc_ccitt(0, skb->data, skb->len);
put_unaligned_le16(crc, skb_put(skb, 2));
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index dd1649caa2b2..ac212542a217 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1809,13 +1809,20 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (sysctl_expire_nodest_conn(ipvs)) {
+ __u32 flags = cp->flags;
+
+ /* when timer already started, silently drop the packet.*/
+ if (timer_pending(&cp->timer))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
+
+ if (sysctl_expire_nodest_conn(ipvs) &&
+ !(flags & IP_VS_CONN_F_ONE_PACKET)) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
- /* don't restart its timer, and silently
- drop the packet. */
- __ip_vs_conn_put(cp);
+
return NF_DROP;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 1f3c305df45d..b6e72af15237 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -876,7 +876,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
return ERR_PTR(-EFAULT);
- strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
info->num_counters = compat_tmp.num_counters;
user += sizeof(compat_tmp);
} else
@@ -889,9 +889,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (copy_from_user(info, user, sizeof(*info)) != 0)
return ERR_PTR(-EFAULT);
- info->name[sizeof(info->name) - 1] = '\0';
user += sizeof(*info);
}
+ info->name[sizeof(info->name) - 1] = '\0';
size = sizeof(struct xt_counters);
size *= info->num_counters;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 45d4b2f22f62..aff2a1b46f7f 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -501,6 +501,9 @@ static void hhf_destroy(struct Qdisc *sch)
hhf_free(q->hhf_valid_bits[i]);
}
+ if (!q->hh_flows)
+ return;
+
for (i = 0; i < HH_FLOWS_CNT; i++) {
struct hh_flow_state *flow, *next;
struct list_head *head = &q->hh_flows[i];
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 87b02ed3d5f2..daa01d5604c2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1025,6 +1025,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
int err;
int i;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ INIT_WORK(&q->work, htb_work_func);
+
if (!opt)
return -EINVAL;
@@ -1045,8 +1048,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < TC_HTB_NUMPRIO; i++)
INIT_LIST_HEAD(q->drops + i);
- qdisc_watchdog_init(&q->watchdog, sch);
- INIT_WORK(&q->work, htb_work_func);
__skb_queue_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index bcdd54bb101c..cef36ad691dd 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -254,7 +254,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct multiq_sched_data *q = qdisc_priv(sch);
- int i, err;
+ int i;
q->queues = NULL;
@@ -269,12 +269,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < q->max_bands; i++)
q->queues[i] = &noop_qdisc;
- err = multiq_tune(sch, opt);
-
- if (err)
- kfree(q->queues);
-
- return err;
+ return multiq_tune(sch, opt);
}
static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b7c29d5b6f04..743ff23885da 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -943,11 +943,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
struct netem_sched_data *q = qdisc_priv(sch);
int ret;
+ qdisc_watchdog_init(&q->watchdog, sch);
+
if (!opt)
return -EINVAL;
- qdisc_watchdog_init(&q->watchdog, sch);
-
q->loss_model = CLG_RANDOM;
ret = netem_change(sch, opt);
if (ret)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index c2fbde742f37..a06c9d6bfc9c 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -432,12 +432,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
{
struct tbf_sched_data *q = qdisc_priv(sch);
+ qdisc_watchdog_init(&q->watchdog, sch);
+ q->qdisc = &noop_qdisc;
+
if (opt == NULL)
return -EINVAL;
q->t_c = ktime_get_ns();
- qdisc_watchdog_init(&q->watchdog, sch);
- q->qdisc = &noop_qdisc;
return tbf_change(sch, opt);
}
diff --git a/net/socket.c b/net/socket.c
index b75a537807b5..b36c981d04dc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -528,7 +528,10 @@ static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
- sock->sk->sk_uid = iattr->ia_uid;
+ if (sock->sk)
+ sock->sk->sk_uid = iattr->ia_uid;
+ else
+ err = -ENOENT;
}
return err;
@@ -579,12 +582,16 @@ static struct socket *sock_alloc(void)
* an inode not a file.
*/
-void sock_release(struct socket *sock)
+static void __sock_release(struct socket *sock, struct inode *inode)
{
if (sock->ops) {
struct module *owner = sock->ops->owner;
+ if (inode)
+ inode_lock(inode);
sock->ops->release(sock);
+ if (inode)
+ inode_unlock(inode);
sock->ops = NULL;
module_put(owner);
}
@@ -599,6 +606,11 @@ void sock_release(struct socket *sock)
}
sock->file = NULL;
}
+
+void sock_release(struct socket *sock)
+{
+ __sock_release(sock, NULL);
+}
EXPORT_SYMBOL(sock_release);
void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
@@ -1035,7 +1047,7 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
static int sock_close(struct inode *inode, struct file *filp)
{
- sock_release(SOCKET_I(inode));
+ __sock_release(SOCKET_I(inode), inode);
return 0;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4615138b104f..e098ca928538 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1846,7 +1846,10 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
/* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
if (err <= 0) {
- if (err != 0 && err != -EAGAIN)
+ if (err == 0)
+ return NULL;
+
+ if (err != -EAGAIN)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
return ERR_PTR(err);
}
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 0924291ef059..52aa80135426 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -373,3 +373,6 @@ endif
endef
#
###############################################################################
+
+# delete partially updated (i.e. corrupted) files on error
+.DELETE_ON_ERROR:
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index ea1e96921e3b..baedaef53ca0 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -15,9 +15,9 @@ if ! test -r System.map ; then
fi
if [ -z $(command -v $DEPMOD) ]; then
- echo "'make modules_install' requires $DEPMOD. Please install it." >&2
+ echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2
echo "This is probably in the kmod package." >&2
- exit 1
+ exit 0
fi
# older versions of depmod don't support -P <symbol-prefix>
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index bd5151915e5a..064fbfbbb22c 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -649,7 +649,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
if (ELF_ST_TYPE(sym->st_info) == STT_SPARC_REGISTER)
break;
if (symname[0] == '.') {
- char *munged = strdup(symname);
+ char *munged = NOFAIL(strdup(symname));
munged[0] = '_';
munged[1] = toupper(munged[1]);
symname = munged;
@@ -1311,7 +1311,7 @@ static Elf_Sym *find_elf_symbol2(struct elf_info *elf, Elf_Addr addr,
static char *sec2annotation(const char *s)
{
if (match(s, init_exit_sections)) {
- char *p = malloc(20);
+ char *p = NOFAIL(malloc(20));
char *r = p;
*p++ = '_';
@@ -1331,7 +1331,7 @@ static char *sec2annotation(const char *s)
strcat(p, " ");
return r;
} else {
- return strdup("");
+ return NOFAIL(strdup(""));
}
}
@@ -2032,7 +2032,7 @@ void buf_write(struct buffer *buf, const char *s, int len)
{
if (buf->size - buf->pos < len) {
buf->size += len + SZ;
- buf->p = realloc(buf->p, buf->size);
+ buf->p = NOFAIL(realloc(buf->p, buf->size));
}
strncpy(buf->p + buf->pos, s, len);
buf->pos += len;
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 45e246595d10..2a61db329adf 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# Generate tags or cscope files
# Usage tags.sh <mode>
#
@@ -135,11 +135,6 @@ all_kconfigs()
find_other_sources 'Kconfig*'
}
-all_defconfigs()
-{
- find_sources $ALLSOURCE_ARCHS "defconfig"
-}
-
docscope()
{
(echo \-k; echo \-q; all_target_sources) > cscope.files
@@ -151,8 +146,111 @@ dogtags()
all_target_sources | gtags -i -f -
}
+# Basic regular expressions with an optional /kind-spec/ for ctags and
+# the following limitations:
+# - No regex modifiers
+# - Use \{0,1\} instead of \?, because etags expects an unescaped ?
+# - \s is not working with etags, use a space or [ \t]
+# - \w works, but does not match underscores in etags
+# - etags regular expressions have to match at the start of a line;
+# a ^[^#] is prepended by setup_regex unless an anchor is already present
+regex_asm=(
+ '/^\(ENTRY\|_GLOBAL\)(\([[:alnum:]_\\]*\)).*/\2/'
+)
+regex_c=(
+ '/^SYSCALL_DEFINE[0-9](\([[:alnum:]_]*\).*/sys_\1/'
+ '/^COMPAT_SYSCALL_DEFINE[0-9](\([[:alnum:]_]*\).*/compat_sys_\1/'
+ '/^TRACE_EVENT(\([[:alnum:]_]*\).*/trace_\1/'
+ '/^TRACE_EVENT(\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
+ '/^DEFINE_EVENT([^,)]*, *\([[:alnum:]_]*\).*/trace_\1/'
+ '/^DEFINE_EVENT([^,)]*, *\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
+ '/^DEFINE_INSN_CACHE_OPS(\([[:alnum:]_]*\).*/get_\1_slot/'
+ '/^DEFINE_INSN_CACHE_OPS(\([[:alnum:]_]*\).*/free_\1_slot/'
+ '/^PAGEFLAG(\([[:alnum:]_]*\).*/Page\1/'
+ '/^PAGEFLAG(\([[:alnum:]_]*\).*/SetPage\1/'
+ '/^PAGEFLAG(\([[:alnum:]_]*\).*/ClearPage\1/'
+ '/^TESTSETFLAG(\([[:alnum:]_]*\).*/TestSetPage\1/'
+ '/^TESTPAGEFLAG(\([[:alnum:]_]*\).*/Page\1/'
+ '/^SETPAGEFLAG(\([[:alnum:]_]*\).*/SetPage\1/'
+ '/\<__SETPAGEFLAG(\([[:alnum:]_]*\).*/__SetPage\1/'
+ '/\<TESTCLEARFLAG(\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/\<__TESTCLEARFLAG(\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/\<CLEARPAGEFLAG(\([[:alnum:]_]*\).*/ClearPage\1/'
+ '/\<__CLEARPAGEFLAG(\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/^__PAGEFLAG(\([[:alnum:]_]*\).*/__SetPage\1/'
+ '/^__PAGEFLAG(\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/^PAGEFLAG_FALSE(\([[:alnum:]_]*\).*/Page\1/'
+ '/\<TESTSCFLAG(\([[:alnum:]_]*\).*/TestSetPage\1/'
+ '/\<TESTSCFLAG(\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/\<SETPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/SetPage\1/'
+ '/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/'
+ '/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/Page\1/'
+ '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__SetPage\1/'
+ '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/'
+ '/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/'
+ '/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/'
+ '/^DEF_MMIO_\(IN\|OUT\)_[XD](\([[:alnum:]_]*\),[^)]*)/\2/'
+ '/^DEBUGGER_BOILERPLATE(\([[:alnum:]_]*\))/\1/'
+ '/^DEF_PCI_AC_\(\|NO\)RET(\([[:alnum:]_]*\).*/\2/'
+ '/^PCI_OP_READ(\(\w*\).*[1-4])/pci_bus_read_config_\1/'
+ '/^PCI_OP_WRITE(\(\w*\).*[1-4])/pci_bus_write_config_\1/'
+ '/\<DEFINE_\(MUTEX\|SEMAPHORE\|SPINLOCK\)(\([[:alnum:]_]*\)/\2/v/'
+ '/\<DEFINE_\(RAW_SPINLOCK\|RWLOCK\|SEQLOCK\)(\([[:alnum:]_]*\)/\2/v/'
+ '/\<DECLARE_\(RWSEM\|COMPLETION\)(\([[:alnum:]_]\+\)/\2/v/'
+ '/\<DECLARE_BITMAP(\([[:alnum:]_]*\)/\1/v/'
+ '/\(^\|\s\)\(\|L\|H\)LIST_HEAD(\([[:alnum:]_]*\)/\3/v/'
+ '/\(^\|\s\)RADIX_TREE(\([[:alnum:]_]*\)/\2/v/'
+ '/\<DEFINE_PER_CPU([^,]*, *\([[:alnum:]_]*\)/\1/v/'
+ '/\<DEFINE_PER_CPU_SHARED_ALIGNED([^,]*, *\([[:alnum:]_]*\)/\1/v/'
+ '/\<DECLARE_WAIT_QUEUE_HEAD(\([[:alnum:]_]*\)/\1/v/'
+ '/\<DECLARE_\(TASKLET\|WORK\|DELAYED_WORK\)(\([[:alnum:]_]*\)/\2/v/'
+ '/\(^\s\)OFFSET(\([[:alnum:]_]*\)/\2/v/'
+ '/\(^\s\)DEFINE(\([[:alnum:]_]*\)/\2/v/'
+ '/\<DEFINE_HASHTABLE(\([[:alnum:]_]*\)/\1/v/'
+)
+regex_kconfig=(
+ '/^[[:blank:]]*\(menu\|\)config[[:blank:]]\+\([[:alnum:]_]\+\)/\2/'
+ '/^[[:blank:]]*\(menu\|\)config[[:blank:]]\+\([[:alnum:]_]\+\)/CONFIG_\2/'
+)
+setup_regex()
+{
+ local mode=$1 lang tmp=() r
+ shift
+
+ regex=()
+ for lang; do
+ case "$lang" in
+ asm) tmp=("${regex_asm[@]}") ;;
+ c) tmp=("${regex_c[@]}") ;;
+ kconfig) tmp=("${regex_kconfig[@]}") ;;
+ esac
+ for r in "${tmp[@]}"; do
+ if test "$mode" = "exuberant"; then
+ regex[${#regex[@]}]="--regex-$lang=${r}b"
+ else
+ # Remove ctags /kind-spec/
+ case "$r" in
+ /*/*/?/)
+ r=${r%?/}
+ esac
+ # Prepend ^[^#] unless already anchored
+ case "$r" in
+ /^*) ;;
+ *)
+ r="/^[^#]*${r#/}"
+ esac
+ regex[${#regex[@]}]="--regex=$r"
+ fi
+ done
+ done
+}
+
exuberant()
{
+ setup_regex exuberant asm c
all_target_sources | xargs $1 -a \
-I __initdata,__exitdata,__initconst, \
-I __initdata_memblock \
@@ -166,118 +264,22 @@ exuberant()
-I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL,ACPI_EXPORT_SYMBOL \
-I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
-I static,const \
- --extra=+f --c-kinds=+px \
- --regex-asm='/^(ENTRY|_GLOBAL)\(([^)]*)\).*/\2/' \
- --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
- --regex-c='/^COMPAT_SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/compat_sys_\1/' \
- --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \
- --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1_rcuidle/' \
- --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' \
- --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1_rcuidle/' \
- --regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/' \
- --regex-c++='/PAGEFLAG\(([^,)]*).*/SetPage\1/' \
- --regex-c++='/PAGEFLAG\(([^,)]*).*/ClearPage\1/' \
- --regex-c++='/TESTSETFLAG\(([^,)]*).*/TestSetPage\1/' \
- --regex-c++='/TESTPAGEFLAG\(([^,)]*).*/Page\1/' \
- --regex-c++='/SETPAGEFLAG\(([^,)]*).*/SetPage\1/' \
- --regex-c++='/__SETPAGEFLAG\(([^,)]*).*/__SetPage\1/' \
- --regex-c++='/TESTCLEARFLAG\(([^,)]*).*/TestClearPage\1/' \
- --regex-c++='/__TESTCLEARFLAG\(([^,)]*).*/TestClearPage\1/' \
- --regex-c++='/CLEARPAGEFLAG\(([^,)]*).*/ClearPage\1/' \
- --regex-c++='/__CLEARPAGEFLAG\(([^,)]*).*/__ClearPage\1/' \
- --regex-c++='/__PAGEFLAG\(([^,)]*).*/__SetPage\1/' \
- --regex-c++='/__PAGEFLAG\(([^,)]*).*/__ClearPage\1/' \
- --regex-c++='/PAGEFLAG_FALSE\(([^,)]*).*/Page\1/' \
- --regex-c++='/TESTSCFLAG\(([^,)]*).*/TestSetPage\1/' \
- --regex-c++='/TESTSCFLAG\(([^,)]*).*/TestClearPage\1/' \
- --regex-c++='/SETPAGEFLAG_NOOP\(([^,)]*).*/SetPage\1/' \
- --regex-c++='/CLEARPAGEFLAG_NOOP\(([^,)]*).*/ClearPage\1/' \
- --regex-c++='/__CLEARPAGEFLAG_NOOP\(([^,)]*).*/__ClearPage\1/' \
- --regex-c++='/TESTCLEARFLAG_FALSE\(([^,)]*).*/TestClearPage\1/' \
- --regex-c++='/__TESTCLEARFLAG_FALSE\(([^,)]*).*/__TestClearPage\1/' \
- --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' \
- --regex-c++='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \
- --regex-c++='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \
- --regex-c++='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'\
- --regex-c++='/DEF_MMIO_(IN|OUT)_(X|D)\(([^,]*),\s*[^)]*\)/\3/' \
- --regex-c++='/DEBUGGER_BOILERPLATE\(([^,]*)\)/\1/' \
- --regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \
- --regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \
- --regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \
- --regex-c='/DEFINE_(RAW_SPINLOCK|RWLOCK|SEQLOCK)\((\w*)/\2/v/' \
- --regex-c='/DECLARE_(RWSEM|COMPLETION)\((\w*)/\2/v/' \
- --regex-c='/DECLARE_BITMAP\((\w*)/\1/v/' \
- --regex-c='/(^|\s)(|L|H)LIST_HEAD\((\w*)/\3/v/' \
- --regex-c='/(^|\s)RADIX_TREE\((\w*)/\2/v/' \
- --regex-c='/DEFINE_PER_CPU\(([^,]*,\s*)(\w*).*\)/\2/v/' \
- --regex-c='/DEFINE_PER_CPU_SHARED_ALIGNED\(([^,]*,\s*)(\w*).*\)/\2/v/' \
- --regex-c='/DECLARE_WAIT_QUEUE_HEAD\((\w*)/\1/v/' \
- --regex-c='/DECLARE_(TASKLET|WORK|DELAYED_WORK)\((\w*)/\2/v/' \
- --regex-c='/DEFINE_PCI_DEVICE_TABLE\((\w*)/\1/v/' \
- --regex-c='/(^\s)OFFSET\((\w*)/\2/v/' \
- --regex-c='/(^\s)DEFINE\((\w*)/\2/v/' \
- --regex-c='/DEFINE_HASHTABLE\((\w*)/\1/v/'
+ --extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
+ "${regex[@]}"
+ setup_regex exuberant kconfig
all_kconfigs | xargs $1 -a \
- --langdef=kconfig --language-force=kconfig \
- --regex-kconfig='/^[[:blank:]]*(menu|)config[[:blank:]]+([[:alnum:]_]+)/\2/'
+ --langdef=kconfig --language-force=kconfig "${regex[@]}"
- all_kconfigs | xargs $1 -a \
- --langdef=kconfig --language-force=kconfig \
- --regex-kconfig='/^[[:blank:]]*(menu|)config[[:blank:]]+([[:alnum:]_]+)/CONFIG_\2/'
-
- all_defconfigs | xargs -r $1 -a \
- --langdef=dotconfig --language-force=dotconfig \
- --regex-dotconfig='/^#?[[:blank:]]*(CONFIG_[[:alnum:]_]+)/\1/'
}
emacs()
{
- all_target_sources | xargs $1 -a \
- --regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/' \
- --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \
- --regex='/^COMPAT_SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/compat_sys_\1/' \
- --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \
- --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1_rcuidle/' \
- --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \
- --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1_rcuidle/' \
- --regex='/PAGEFLAG(\([^,)]*\).*/Page\1/' \
- --regex='/PAGEFLAG(\([^,)]*\).*/SetPage\1/' \
- --regex='/PAGEFLAG(\([^,)]*\).*/ClearPage\1/' \
- --regex='/TESTSETFLAG(\([^,)]*\).*/TestSetPage\1/' \
- --regex='/TESTPAGEFLAG(\([^,)]*\).*/Page\1/' \
- --regex='/SETPAGEFLAG(\([^,)]*\).*/SetPage\1/' \
- --regex='/__SETPAGEFLAG(\([^,)]*\).*/__SetPage\1/' \
- --regex='/TESTCLEARFLAG(\([^,)]*\).*/TestClearPage\1/' \
- --regex='/__TESTCLEARFLAG(\([^,)]*\).*/TestClearPage\1/' \
- --regex='/CLEARPAGEFLAG(\([^,)]*\).*/ClearPage\1/' \
- --regex='/__CLEARPAGEFLAG(\([^,)]*\).*/__ClearPage\1/' \
- --regex='/__PAGEFLAG(\([^,)]*\).*/__SetPage\1/' \
- --regex='/__PAGEFLAG(\([^,)]*\).*/__ClearPage\1/' \
- --regex='/PAGEFLAG_FALSE(\([^,)]*\).*/Page\1/' \
- --regex='/TESTSCFLAG(\([^,)]*\).*/TestSetPage\1/' \
- --regex='/TESTSCFLAG(\([^,)]*\).*/TestClearPage\1/' \
- --regex='/SETPAGEFLAG_NOOP(\([^,)]*\).*/SetPage\1/' \
- --regex='/CLEARPAGEFLAG_NOOP(\([^,)]*\).*/ClearPage\1/' \
- --regex='/__CLEARPAGEFLAG_NOOP(\([^,)]*\).*/__ClearPage\1/' \
- --regex='/TESTCLEARFLAG_FALSE(\([^,)]*\).*/TestClearPage\1/' \
- --regex='/__TESTCLEARFLAG_FALSE(\([^,)]*\).*/__TestClearPage\1/' \
- --regex='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \
- --regex='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \
- --regex='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/' \
- --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \
- --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \
- --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\
- --regex='/[^#]*DEFINE_HASHTABLE(\([^,)]*\)/\1/'
-
- all_kconfigs | xargs $1 -a \
- --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/'
-
- all_kconfigs | xargs $1 -a \
- --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/CONFIG_\3/'
+ setup_regex emacs asm c
+ all_target_sources | xargs $1 -a "${regex[@]}"
- all_defconfigs | xargs -r $1 -a \
- --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'
+ setup_regex emacs kconfig
+ all_kconfigs | xargs $1 -a "${regex[@]}"
}
xtags()
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index e60c79de13e1..52f3c550abcc 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -348,27 +348,26 @@ static struct avc_xperms_decision_node
struct avc_xperms_decision_node *xpd_node;
struct extended_perms_decision *xpd;
- xpd_node = kmem_cache_zalloc(avc_xperms_decision_cachep,
- GFP_ATOMIC | __GFP_NOMEMALLOC);
+ xpd_node = kmem_cache_zalloc(avc_xperms_decision_cachep, GFP_NOWAIT);
if (!xpd_node)
return NULL;
xpd = &xpd_node->xpd;
if (which & XPERMS_ALLOWED) {
xpd->allowed = kmem_cache_zalloc(avc_xperms_data_cachep,
- GFP_ATOMIC | __GFP_NOMEMALLOC);
+ GFP_NOWAIT);
if (!xpd->allowed)
goto error;
}
if (which & XPERMS_AUDITALLOW) {
xpd->auditallow = kmem_cache_zalloc(avc_xperms_data_cachep,
- GFP_ATOMIC | __GFP_NOMEMALLOC);
+ GFP_NOWAIT);
if (!xpd->auditallow)
goto error;
}
if (which & XPERMS_DONTAUDIT) {
xpd->dontaudit = kmem_cache_zalloc(avc_xperms_data_cachep,
- GFP_ATOMIC | __GFP_NOMEMALLOC);
+ GFP_NOWAIT);
if (!xpd->dontaudit)
goto error;
}
@@ -396,8 +395,7 @@ static struct avc_xperms_node *avc_xperms_alloc(void)
{
struct avc_xperms_node *xp_node;
- xp_node = kmem_cache_zalloc(avc_xperms_cachep,
- GFP_ATOMIC|__GFP_NOMEMALLOC);
+ xp_node = kmem_cache_zalloc(avc_xperms_cachep, GFP_NOWAIT);
if (!xp_node)
return xp_node;
INIT_LIST_HEAD(&xp_node->xpd_head);
@@ -550,7 +548,7 @@ static struct avc_node *avc_alloc_node(void)
{
struct avc_node *node;
- node = kmem_cache_zalloc(avc_node_cachep, GFP_ATOMIC|__GFP_NOMEMALLOC);
+ node = kmem_cache_zalloc(avc_node_cachep, GFP_NOWAIT);
if (!node)
goto out;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 4c145d6bccd4..5bc7ddf8fc70 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -648,27 +648,33 @@ EXPORT_SYMBOL(snd_interval_refine);
static int snd_interval_refine_first(struct snd_interval *i)
{
+ const unsigned int last_max = i->max;
+
if (snd_BUG_ON(snd_interval_empty(i)))
return -EINVAL;
if (snd_interval_single(i))
return 0;
i->max = i->min;
- i->openmax = i->openmin;
- if (i->openmax)
+ if (i->openmin)
i->max++;
+ /* only exclude max value if also excluded before refine */
+ i->openmax = (i->openmax && i->max >= last_max);
return 1;
}
static int snd_interval_refine_last(struct snd_interval *i)
{
+ const unsigned int last_min = i->min;
+
if (snd_BUG_ON(snd_interval_empty(i)))
return -EINVAL;
if (snd_interval_single(i))
return 0;
i->min = i->max;
- i->openmin = i->openmax;
- if (i->openmin)
+ if (i->openmax)
i->min--;
+ /* only exclude min value if also excluded before refine */
+ i->openmin = (i->openmin && i->min <= last_min);
return 1;
}
diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c
index 07e5abdbceb5..0a576ccca3dc 100644
--- a/sound/firewire/bebob/bebob_maudio.c
+++ b/sound/firewire/bebob/bebob_maudio.c
@@ -96,17 +96,13 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit)
struct fw_device *device = fw_parent_device(unit);
int err, rcode;
u64 date;
- __le32 cues[3] = {
- cpu_to_le32(MAUDIO_BOOTLOADER_CUE1),
- cpu_to_le32(MAUDIO_BOOTLOADER_CUE2),
- cpu_to_le32(MAUDIO_BOOTLOADER_CUE3)
- };
+ __le32 *cues;
/* check date of software used to build */
err = snd_bebob_read_block(unit, INFO_OFFSET_SW_DATE,
&date, sizeof(u64));
if (err < 0)
- goto end;
+ return err;
/*
* firmware version 5058 or later has date later than "20070401", but
* 'date' is not null-terminated.
@@ -114,20 +110,28 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit)
if (date < 0x3230303730343031LL) {
dev_err(&unit->device,
"Use firmware version 5058 or later\n");
- err = -ENOSYS;
- goto end;
+ return -ENXIO;
}
+ cues = kmalloc_array(3, sizeof(*cues), GFP_KERNEL);
+ if (!cues)
+ return -ENOMEM;
+
+ cues[0] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE1);
+ cues[1] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE2);
+ cues[2] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE3);
+
rcode = fw_run_transaction(device->card, TCODE_WRITE_BLOCK_REQUEST,
device->node_id, device->generation,
device->max_speed, BEBOB_ADDR_REG_REQ,
- cues, sizeof(cues));
+ cues, 3 * sizeof(*cues));
+ kfree(cues);
if (rcode != RCODE_COMPLETE) {
dev_err(&unit->device,
"Failed to send a cue to load firmware\n");
err = -EIO;
}
-end:
+
return err;
}
diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index a31ea6c22d19..2d7379dec1f0 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c
@@ -82,10 +82,10 @@
static void set_default_audio_parameters(struct snd_msnd *chip)
{
- chip->play_sample_size = DEFSAMPLESIZE;
+ chip->play_sample_size = snd_pcm_format_width(DEFSAMPLESIZE);
chip->play_sample_rate = DEFSAMPLERATE;
chip->play_channels = DEFCHANNELS;
- chip->capture_sample_size = DEFSAMPLESIZE;
+ chip->capture_sample_size = snd_pcm_format_width(DEFSAMPLESIZE);
chip->capture_sample_rate = DEFSAMPLERATE;
chip->capture_channels = DEFCHANNELS;
}
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index 56fc47bd6dba..50b216fc369f 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -2520,7 +2520,7 @@ static int snd_emu10k1_fx8010_ioctl(struct snd_hwdep * hw, struct file *file, un
emu->support_tlv = 1;
return put_user(SNDRV_EMU10K1_VERSION, (int __user *)argp);
case SNDRV_EMU10K1_IOCTL_INFO:
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
snd_emu10k1_fx8010_info(emu, info);
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 3324f98c35f6..f6d4a1046e54 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -4019,7 +4019,8 @@ void snd_hda_bus_reset_codecs(struct hda_bus *bus)
list_for_each_codec(codec, bus) {
/* FIXME: maybe a better way needed for forced reset */
- cancel_delayed_work_sync(&codec->jackpoll_work);
+ if (current_work() != &codec->jackpoll_work.work)
+ cancel_delayed_work_sync(&codec->jackpoll_work);
#ifdef CONFIG_PM
if (hda_codec_is_power_on(codec)) {
hda_call_codec_suspend(codec);
diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c
index 55db19ddc5ff..93b02be3a90e 100644
--- a/sound/soc/codecs/cs4265.c
+++ b/sound/soc/codecs/cs4265.c
@@ -157,8 +157,8 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = {
SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2,
3, 1, 0),
SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum),
- SOC_SINGLE("MMTLR Data Switch", 0,
- 1, 1, 0),
+ SOC_SINGLE("MMTLR Data Switch", CS4265_SPDIF_CTL2,
+ 0, 1, 0),
SOC_ENUM("Mono Channel Select", spdif_mono_select_enum),
SND_SOC_BYTES("C Data Buffer", CS4265_C_DATA_BUFF, 24),
};
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index a18aecb49935..2b770d3f05d4 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -2431,6 +2431,7 @@ static int wm8994_set_dai_sysclk(struct snd_soc_dai *dai,
snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_2,
WM8994_OPCLK_ENA, 0);
}
+ break;
default:
return -EINVAL;
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 69bf5cf1e91e..15cbe2565703 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -2875,7 +2875,8 @@ YAMAHA_DEVICE(0x7010, "UB99"),
*/
#define AU0828_DEVICE(vid, pid, vname, pname) { \
- USB_DEVICE_VENDOR_SPEC(vid, pid), \
+ .idVendor = vid, \
+ .idProduct = pid, \
.match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
USB_DEVICE_ID_MATCH_INT_CLASS | \
USB_DEVICE_ID_MATCH_INT_SUBCLASS, \
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 60a94b3e532e..177480066816 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -286,7 +286,7 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size)
* Found a match; just move the remaining
* entries up.
*/
- if (i == num_records) {
+ if (i == (num_records - 1)) {
kvp_file_info[pool].num_records--;
kvp_update_file(pool);
return 0;
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index bd630c222e65..9a53f6e9ef43 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -58,9 +58,13 @@ static int check_return_reg(int ra_regno, Dwarf_Frame *frame)
}
/*
- * Check if return address is on the stack.
+ * Check if return address is on the stack. If return address
+ * is in a register (typically R0), it is yet to be saved on
+ * the stack.
*/
- if (nops != 0 || ops != NULL)
+ if ((nops != 0 || ops != NULL) &&
+ !(nops == 1 && ops[0].atom == DW_OP_regx &&
+ ops[0].number2 == 0 && ops[0].offset == 0))
return 0;
/*
@@ -246,7 +250,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
if (!chain || chain->nr < 3)
return skip_slot;
- ip = chain->ips[2];
+ ip = chain->ips[1];
thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
MAP__FUNCTION, ip, &al);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 90129accffbe..4341ed267d4e 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -29,7 +29,9 @@ static inline unsigned long long rdclock(void)
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
+#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 1024
+#endif
extern const char *input_name;
extern bool perf_host, perf_guest;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 67551225764e..5053fac29f05 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -187,6 +187,9 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues,
for (i = 0; i < queues->nr_queues; i++) {
list_splice_tail(&queues->queue_array[i].head,
&queue_array[i].head);
+ queue_array[i].tid = queues->queue_array[i].tid;
+ queue_array[i].cpu = queues->queue_array[i].cpu;
+ queue_array[i].set = queues->queue_array[i].set;
queue_array[i].priv = queues->queue_array[i].priv;
}
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index f45cee80c58b..af2b1e66e35e 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -85,13 +85,13 @@ wait:
return status;
}
-static void alarm_handler(int signum)
+static void sig_handler(int signum)
{
- /* Jut wake us up from waitpid */
+ /* Just wake us up from waitpid */
}
-static struct sigaction alarm_action = {
- .sa_handler = alarm_handler,
+static struct sigaction sig_action = {
+ .sa_handler = sig_handler,
};
int test_harness(int (test_function)(void), char *name)
@@ -101,8 +101,14 @@ int test_harness(int (test_function)(void), char *name)
test_start(name);
test_set_git_version(GIT_VERSION);
- if (sigaction(SIGALRM, &alarm_action, NULL)) {
- perror("sigaction");
+ if (sigaction(SIGINT, &sig_action, NULL)) {
+ perror("sigaction (sigint)");
+ test_error(name);
+ return 1;
+ }
+
+ if (sigaction(SIGALRM, &sig_action, NULL)) {
+ perror("sigaction (sigalrm)");
test_error(name);
return 1;
}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 30906bfd9c1b..0ab937a17ebb 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -146,6 +146,11 @@ int main(int argv, char **argc)
printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
if (llabs(eppm - ppm) > 1000) {
+ if (tx1.offset || tx2.offset ||
+ tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
+ printf(" [SKIP]\n");
+ return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+ }
printf(" [FAILED]\n");
return ksft_exit_fail();
}
diff --git a/verity_dev_keys.x509 b/verity_dev_keys.x509
new file mode 100644
index 000000000000..86399c3c1dd7
--- /dev/null
+++ b/verity_dev_keys.x509
@@ -0,0 +1,24 @@
+-----BEGIN CERTIFICATE-----
+MIID/TCCAuWgAwIBAgIJAJcPmDkJqolJMA0GCSqGSIb3DQEBBQUAMIGUMQswCQYD
+VQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4g
+VmlldzEQMA4GA1UECgwHQW5kcm9pZDEQMA4GA1UECwwHQW5kcm9pZDEQMA4GA1UE
+AwwHQW5kcm9pZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbTAe
+Fw0xNDExMDYxOTA3NDBaFw00MjAzMjQxOTA3NDBaMIGUMQswCQYDVQQGEwJVUzET
+MBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzEQMA4G
+A1UECgwHQW5kcm9pZDEQMA4GA1UECwwHQW5kcm9pZDEQMA4GA1UEAwwHQW5kcm9p
+ZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbTCCASIwDQYJKoZI
+hvcNAQEBBQADggEPADCCAQoCggEBAOjreE0vTVSRenuzO9vnaWfk0eQzYab0gqpi
+6xAzi6dmD+ugoEKJmbPiuE5Dwf21isZ9uhUUu0dQM46dK4ocKxMRrcnmGxydFn6o
+fs3ODJMXOkv2gKXL/FdbEPdDbxzdu8z3yk+W67udM/fW7WbaQ3DO0knu+izKak/3
+T41c5uoXmQ81UNtAzRGzGchNVXMmWuTGOkg6U+0I2Td7K8yvUMWhAWPPpKLtVH9r
+AL5TzjYNR92izdKcz3AjRsI3CTjtpiVABGeX0TcjRSuZB7K9EK56HV+OFNS6I1NP
+jdD7FIShyGlqqZdUOkAUZYanbpgeT5N7QL6uuqcGpoTOkalu6kkCAwEAAaNQME4w
+HQYDVR0OBBYEFH5DM/m7oArf4O3peeKO0ZIEkrQPMB8GA1UdIwQYMBaAFH5DM/m7
+oArf4O3peeKO0ZIEkrQPMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEB
+AHO3NSvDE5jFvMehGGtS8BnFYdFKRIglDMc4niWSzhzOVYRH4WajxdtBWc5fx0ix
+NF/+hVKVhP6AIOQa+++sk+HIi7RvioPPbhjcsVlZe7cUEGrLSSveGouQyc+j0+m6
+JF84kszIl5GGNMTnx0XRPO+g8t6h5LWfnVydgZfpGRRg+WHewk1U2HlvTjIceb0N
+dcoJ8WKJAFWdcuE7VIm4w+vF/DYX/A2Oyzr2+QRhmYSv1cusgAeC1tvH4ap+J1Lg
+UnOu5Kh/FqPLLSwNVQp4Bu7b9QFfqK8Moj84bj88NqRGZgDyqzuTrFxn6FW7dmyA
+yttuAJAEAymk1mipd9+zp38=
+-----END CERTIFICATE-----