diff options
author | Tao Huang <huangtao@rock-chips.com> | 2017-09-01 18:56:00 +0800 |
---|---|---|
committer | Tao Huang <huangtao@rock-chips.com> | 2017-09-01 18:56:00 +0800 |
commit | dc6b994588a21713677efa88b6c773670d8f7e2f (patch) | |
tree | bf09f3b5b7c2fc0970d4a131508d1f92731ee9a8 | |
parent | 46f287220865232470030aaf447c8eea0a76896c (diff) | |
parent | 0de48f949a2d3ed1e4b745e9dd612f072aaee77e (diff) |
Merge tag 'lsk-v4.4-17.08-android' of git://git.linaro.org/kernel/linux-linaro-stable.git
LSK 17.08 v4.4-android
* tag 'lsk-v4.4-17.08-android': (451 commits)
Linux 4.4.83
pinctrl: samsung: Remove bogus irq_[un]mask from resource management
pinctrl: sunxi: add a missing function of A10/A20 pinctrl driver
pnfs/blocklayout: require 64-bit sector_t
iio: adc: vf610_adc: Fix VALT selection value for REFSEL bits
usb:xhci:Add quirk for Certain failing HP keyboard on reset after resume
usb: quirks: Add no-lpm quirk for Moshi USB to Ethernet Adapter
usb: core: unlink urbs from the tail of the endpoint's urb_list
USB: Check for dropped connection before switching to full speed
uag: Add US_FL_IGNORE_RESIDUE for Initio Corporation INIC-3069
iio: light: tsl2563: use correct event code
iio: accel: bmc150: Always restore device to normal mode after suspend-resume
staging:iio:resolver:ad2s1210 fix negative IIO_ANGL_VEL read
USB: hcd: Mark secondary HCD as dead if the primary one died
usb: musb: fix tx fifo flush handling again
USB: serial: pl2303: add new ATEN device id
USB: serial: cp210x: add support for Qivicon USB ZigBee dongle
USB: serial: option: add D-Link DWM-222 device ID
nfs/flexfiles: fix leak of nfs4_ff_ds_version arrays
fuse: initialize the flock flag in fuse_file on allocation
...
431 files changed, 8013 insertions, 3524 deletions
@@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 77 +SUBLEVEL = 83 EXTRAVERSION = NAME = Blurry Fish Butt @@ -636,6 +636,9 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) +KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) +KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index d675e712fe8c..48b2cdbe8d49 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -17,7 +17,6 @@ CONFIG_AUDIT=y CONFIG_BLK_DEV_INITRD=y CONFIG_CGROUPS=y CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_DEFAULT_SECURITY_SELINUX=y @@ -140,11 +139,6 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_PROFILING=y -CONFIG_QFMT_V2=y -CONFIG_QUOTA=y -CONFIG_QUOTACTL=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -CONFIG_QUOTA_TREE=y CONFIG_RANDOMIZE_BASE=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg index eecf8d80453a..3d7e5e168940 100644 --- a/android/configs/android-recommended.cfg +++ b/android/configs/android-recommended.cfg @@ -109,6 +109,11 @@ CONFIG_POWER_SUPPLY=y CONFIG_PSTORE=y CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_RAM=y +CONFIG_QFMT_V2=y +CONFIG_QUOTA=y +CONFIG_QUOTACTL=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QUOTA_TREE=y CONFIG_SCHEDSTATS=y CONFIG_SMARTJOYPLUS_FF=y CONFIG_SND=y diff --git a/arch/Kconfig b/arch/Kconfig index 98f64ad1caf1..ed2539c590bf 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -225,8 +225,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 2fd00b7077e4..936bc8f89a67 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 5eb707640e9c..0587bf121d11 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -385,8 +385,8 @@ void *unwind_add_table(struct module *module, const void *table_start, return NULL; init_unwind_table(table, module->name, - module->module_core, module->core_size, - module->module_init, module->init_size, + module->core_layout.base, module->core_layout.size, + module->init_layout.base, module->init_layout.size, table_start, table_size, NULL, 0); diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index cd316021d6ce..6c1b45c1af66 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -89,7 +89,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pca0_pins>; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -101,7 +101,7 @@ compatible = "nxp,pca9555"; pinctrl-names = "default"; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 5f5e0f3d5b64..27cd4abfc74d 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -697,6 +697,8 @@ vmmc_aux-supply = <&vsim>; bus-width = <8>; non-removable; + no-sdio; + no-sd; }; &mmc3 { diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index f3142369f594..01116ee1284b 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -87,9 +87,9 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index d2315ffd8f12..f13ae153fb24 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -112,12 +112,8 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* This is the base location for PIE (ET_DYN with INTERP) loads. */ +#define ELF_ET_DYN_BASE 0x400000UL /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index bfe2a2f5a644..22b73112b75f 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -54,6 +54,24 @@ static inline void *return_address(unsigned int level) #define ftrace_return_address(n) return_address(n) +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME + +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + if (!strcmp(sym, "sys_mmap2")) + sym = "sys_mmap_pgoff"; + else if (!strcmp(sym, "sys_statfs64_wrapper")) + sym = "sys_statfs64"; + else if (!strcmp(sym, "sys_fstatfs64_wrapper")) + sym = "sys_fstatfs64"; + else if (!strcmp(sym, "sys_arm_fadvise64_64")) + sym = "sys_fadvise64_64"; + + /* Ignore case since sym may start with "SyS" instead of "sys" */ + return !strcasecmp(sym, name); +} + #endif /* ifndef __ASSEMBLY__ */ #endif /* _ASM_ARM_FTRACE */ diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 097e2e201b9f..0c7efc3446c0 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -32,7 +32,7 @@ struct plt_entries { static bool in_init(const struct module *mod, u32 addr) { - return addr - (u32)mod->module_init < mod->init_size; + return addr - (u32)mod->init_layout.base < mod->init_layout.size; } u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index e4a774f7aba1..360cea172b06 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1636,12 +1636,16 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { + if (!kvm->arch.pgd) + return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { + if (!kvm->arch.pgd) + return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 049335584e0c..190471794853 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -102,6 +102,7 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select HAVE_CONTEXT_TRACKING select HAVE_ARM_SMCCC + select THREAD_INFO_IN_TASK help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts index ce5d848251fa..7b34822d61e9 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts @@ -26,7 +26,7 @@ stdout-path = "serial0:115200n8"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x0 0x0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 857eda5c7217..172402cc1a0f 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -71,7 +71,7 @@ <1 10 0xf01>; }; - amba_apu { + amba_apu: amba_apu@0 { compatible = "simple-bus"; #address-cells = <2>; #size-cells = <1>; @@ -191,7 +191,7 @@ }; i2c0: i2c@ff020000 { - compatible = "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 17 4>; @@ -202,7 +202,7 @@ }; i2c1: i2c@ff030000 { - compatible = "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 18 4>; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d8855ca6068a..e450bb6d21bd 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -223,14 +223,25 @@ lr .req x30 // link register .endm /* + * @dst: Result of per_cpu(sym, smp_processor_id()) * @sym: The name of the per-cpu variable - * @reg: Result of per_cpu(sym, smp_processor_id()) * @tmp: scratch register */ - .macro this_cpu_ptr, sym, reg, tmp - adr_l \reg, \sym + .macro adr_this_cpu, dst, sym, tmp + adr_l \dst, \sym mrs \tmp, tpidr_el1 - add \reg, \reg, \tmp + add \dst, \dst, \tmp + .endm + + /* + * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id())) + * @sym: The name of the per-cpu variable + * @tmp: scratch register + */ + .macro ldr_this_cpu dst, sym, tmp + adr_l \dst, \sym + mrs \tmp, tpidr_el1 + ldr \dst, [\dst, \tmp] .endm /* diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h new file mode 100644 index 000000000000..483a6c9d3e10 --- /dev/null +++ b/arch/arm64/include/asm/current.h @@ -0,0 +1,35 @@ +#ifndef __ASM_CURRENT_H +#define __ASM_CURRENT_H + +#include <linux/compiler.h> + +#include <asm/sysreg.h> + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_THREAD_INFO_IN_TASK +struct task_struct; + +/* + * We don't use read_sysreg() as we want the compiler to cache the value where + * possible. + */ +static __always_inline struct task_struct *get_current(void) +{ + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct task_struct *)sp_el0; +} +#define current get_current() +#else +#include <linux/thread_info.h> +#define get_current() (current_thread_info()->task) +#define current get_current() +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CURRENT_H */ + diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 7875c886ad24..8fbc5c7faf70 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -113,12 +113,11 @@ #define ELF_EXEC_PAGESIZE PAGE_SIZE /* - * This is the location that an ET_DYN program is loaded if exec'ed. Typical - * use of this is to invoke "./ld.so someprog" to test out a new version of - * the loader. We need to make sure that it is out of the way of the program - * that it will "exec", and that there is sufficient room for the brk. + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#define ELF_ET_DYN_BASE 0x100000000UL #ifndef __ASSEMBLY__ @@ -169,7 +168,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #ifdef CONFIG_COMPAT -#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) +/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ +#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL /* AArch32 registers. */ #define COMPAT_ELF_NGREG 18 diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 06ff7fd9e81f..b6c6fa29fe56 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -22,14 +22,19 @@ #define MODULE_ARCH_VERMAGIC "aarch64" #ifdef CONFIG_ARM64_MODULE_PLTS -struct mod_arch_specific { +struct mod_plt_sec { struct elf64_shdr *plt; int plt_num_entries; int plt_max_entries; }; + +struct mod_arch_specific { + struct mod_plt_sec core; + struct mod_plt_sec init; +}; #endif -u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, +u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym); #ifdef CONFIG_RANDOMIZE_BASE diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 8a336852eeba..2ce1a0262a59 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,6 +16,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include <asm/stack_pointer.h> + static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 7bd3cdb533ea..91b6be092ce2 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -17,6 +17,8 @@ #ifndef __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H +#include <asm/stack_pointer.h> + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 1d3ff7e4a6c2..d909746cd421 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -29,11 +29,22 @@ #ifndef __ASSEMBLY__ +#include <asm/percpu.h> + #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> -#define raw_smp_processor_id() (current_thread_info()->cpu) +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); + +/* + * We don't use this_cpu_read(cpu_number) as that has implicit writes to + * preempt_count, and associated (compiler) barriers, that we'd like to avoid + * the expense of. If we're preemptible, the value can be stale at use anyway. + * And we can't use this_cpu_ptr() either, as that winds up recursing back + * here under CONFIG_DEBUG_PREEMPT=y. + */ +#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) struct seq_file; @@ -73,6 +84,9 @@ asmlinkage void secondary_start_kernel(void); */ struct secondary_data { void *stack; +#ifdef CONFIG_THREAD_INFO_IN_TASK + struct task_struct *task; +#endif long status; }; diff --git a/arch/arm64/include/asm/stack_pointer.h b/arch/arm64/include/asm/stack_pointer.h new file mode 100644 index 000000000000..ffcdf742cddf --- /dev/null +++ b/arch/arm64/include/asm/stack_pointer.h @@ -0,0 +1,9 @@ +#ifndef __ASM_STACK_POINTER_H +#define __ASM_STACK_POINTER_H + +/* + * how to get the current stack pointer from C + */ +register unsigned long current_stack_pointer asm ("sp"); + +#endif /* __ASM_STACK_POINTER_H */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 024d623f662e..92d6a628e478 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -1,7 +1,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 10 +#define NR_CTX_REGS 12 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 794d22603f04..67dd228c3f17 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -36,25 +36,36 @@ struct task_struct; +#include <asm/stack_pointer.h> #include <asm/types.h> typedef unsigned long mm_segment_t; /* * low level task data that entry.S needs immediate access to. - * __switch_to() assumes cpu_context follows immediately after cpu_domain. */ struct thread_info { unsigned long flags; /* low level flags */ mm_segment_t addr_limit; /* address limit */ +#ifndef CONFIG_THREAD_INFO_IN_TASK struct task_struct *task; /* main task structure */ +#endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif int preempt_count; /* 0 => preemptable, <0 => bug */ +#ifndef CONFIG_THREAD_INFO_IN_TASK int cpu; /* cpu */ +#endif }; +#ifdef CONFIG_THREAD_INFO_IN_TASK +#define INIT_THREAD_INFO(tsk) \ +{ \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .addr_limit = KERNEL_DS, \ +} +#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -63,14 +74,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -/* - * how to get the current stack pointer from C - */ -register unsigned long current_stack_pointer asm ("sp"); - /* * how to get the thread information struct from C */ @@ -88,6 +91,11 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)sp_el0; } +#define init_thread_info (init_thread_union.thread_info) +#endif + +#define init_stack (init_thread_union.stack) + #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7a3f34b15fb0..67ebe708e30c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -35,11 +35,16 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); BLANK(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); + DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); +#else DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); - DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); +#endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct thread_info, ttbr0)); #endif @@ -126,6 +131,11 @@ int main(void) BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); BLANK(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); + DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); + BLANK(); +#endif #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5472cedfe750..b5119f7f5cd6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -93,9 +93,14 @@ .if \el == 0 mrs x21, sp_el0 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug +#else mov tsk, sp and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug +#endif disable_step_tsk x19, x20 // exceptions when scheduling. mov x29, xzr // fp pointed to user-space @@ -103,10 +108,18 @@ add x21, sp, #S_FRAME_SIZE get_thread_info tsk /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] +#else ldr x20, [tsk, #TI_ADDR_LIMIT] +#endif str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 +#ifdef CONFIG_THREAD_INFO_IN_TASK + str x20, [tsk, #TSK_TI_ADDR_LIMIT] +#else str x20, [tsk, #TI_ADDR_LIMIT] +#endif .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 @@ -167,7 +180,11 @@ alternative_else_nop_endif .if \el != 0 /* Restore the task's original addr_limit. */ ldr x20, [sp, #S_ORIG_ADDR_LIMIT] +#ifdef CONFIG_THREAD_INFO_IN_TASK + str x20, [tsk, #TSK_TI_ADDR_LIMIT] +#else str x20, [tsk, #TI_ADDR_LIMIT] +#endif .endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR @@ -255,15 +272,22 @@ alternative_endif mov x19, sp // preserve the original sp /* - * Compare sp with the current thread_info, if the top - * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and - * should switch to the irq stack. + * Compare sp with the base of the task stack. + * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, + * and should switch to the irq stack. */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x25, [tsk, TSK_STACK] + eor x25, x25, x19 + and x25, x25, #~(THREAD_SIZE - 1) + cbnz x25, 9998f +#else and x25, x19, #~(THREAD_SIZE - 1) cmp x25, tsk b.ne 9998f +#endif - this_cpu_ptr irq_stack, x25, x26 + adr_this_cpu x25, irq_stack, x26 mov x26, #IRQ_STACK_START_SP add x26, x25, x26 @@ -492,9 +516,17 @@ el1_irq: irq_handler #ifdef CONFIG_PREEMPT +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count +#else ldr w24, [tsk, #TI_PREEMPT] // get preempt count +#endif cbnz w24, 1f // preempt count != 0 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x0, [tsk, #TSK_TI_FLAGS] // get flags +#else ldr x0, [tsk, #TI_FLAGS] // get flags +#endif tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt 1: @@ -509,7 +541,11 @@ ENDPROC(el1_irq) el1_preempt: mov x24, lr 1: bl preempt_schedule_irq // irq en/disable is done inside +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS +#else ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS +#endif tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? ret x24 #endif @@ -729,8 +765,12 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 +#ifdef CONFIG_THREAD_INFO_IN_TASK + msr sp_el0, x1 +#else and x9, x9, #~(THREAD_SIZE - 1) msr sp_el0, x9 +#endif ret ENDPROC(cpu_switch_to) @@ -741,7 +781,11 @@ ENDPROC(cpu_switch_to) ret_fast_syscall: disable_irq // disable interrupts str x0, [sp, #S_X0] // returned x0 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing +#else ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing +#endif and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace and x2, x1, #_TIF_WORK_MASK @@ -773,7 +817,11 @@ work_resched: */ ret_to_user: disable_irq // disable interrupts +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x1, [tsk, #TSK_TI_FLAGS] +#else ldr x1, [tsk, #TI_FLAGS] +#endif and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending enable_step_tsk x1, x2 @@ -805,7 +853,11 @@ el0_svc_naked: // compat entry point enable_dbg_and_irq ct_user_exit 1 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks +#else ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks +#endif tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace cmp scno, sc_nr // check upper syscall limit diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index bf6332d4b6d3..06597fde57ba 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -419,6 +419,7 @@ ENDPROC(__create_page_tables) .set initial_sp, init_thread_union + THREAD_START_SP __primary_switched: mov x28, lr // preserve LR + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -431,10 +432,18 @@ __primary_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW +#ifdef CONFIG_THREAD_INFO_IN_TASK + adrp x4, init_thread_union + add sp, x4, #THREAD_SIZE + adr_l x5, init_task + msr sp_el0, x5 // Save thread_info +#else adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) msr sp_el0, x4 // Save thread_info +#endif + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -649,12 +658,19 @@ __secondary_switched: adr_l x5, vectors msr vbar_el1, x5 isb - +#ifdef CONFIG_THREAD_INFO_IN_TASK + adr_l x0, secondary_data + ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack + mov sp, x1 + ldr x2, [x0, #CPU_BOOT_TASK] + msr sp_el0, x2 +#else adr_l x0, secondary_data ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack mov sp, x0 and x0, x0, #~(THREAD_SIZE - 1) msr sp_el0, x0 // save thread_info +#endif mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 1ce90d8450ae..d05dbe658409 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -26,35 +26,21 @@ struct plt_entry { __le32 br; /* br x16 */ }; -u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, +static bool in_init(const struct module *mod, void *loc) +{ + return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; +} + +u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym) { - struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; - int i = mod->arch.plt_num_entries; + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : + &mod->arch.init; + struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr; + int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; /* - * We only emit PLT entries against undefined (SHN_UNDEF) symbols, - * which are listed in the ELF symtab section, but without a type - * or a size. - * So, similar to how the module loader uses the Elf64_Sym::st_value - * field to store the resolved addresses of undefined symbols, let's - * borrow the Elf64_Sym::st_size field (whose value is never used by - * the module loader, even for symbols that are defined) to record - * the address of a symbol's associated PLT entry as we emit it for a - * zero addend relocation (which is the only kind we have to deal with - * in practice). This allows us to find duplicates without having to - * go through the table every time. - */ - if (rela->r_addend == 0 && sym->st_size != 0) { - BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); - return sym->st_size; - } - - mod->arch.plt_num_entries++; - BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); - - /* * MOVK/MOVN/MOVZ opcode: * +--------+------------+--------+-----------+-------------+---------+ * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | @@ -72,8 +58,19 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, cpu_to_le32(0xd61f0200) }; - if (rela->r_addend == 0) - sym->st_size = (u64)&plt[i]; + /* + * Check if the entry we just created is a duplicate. Given that the + * relocations are sorted, this will be the last entry we allocated. + * (if one exists). + */ + if (i > 0 && + plt[i].mov0 == plt[i - 1].mov0 && + plt[i].mov1 == plt[i - 1].mov1 && + plt[i].mov2 == plt[i - 1].mov2) + return (u64)&plt[i - 1]; + + pltsec->plt_num_entries++; + BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries); return (u64)&plt[i]; } @@ -104,7 +101,8 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num) return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; } -static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, + Elf64_Word dstidx) { unsigned int ret = 0; Elf64_Sym *s; @@ -116,13 +114,17 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) case R_AARCH64_CALL26: /* * We only have to consider branch targets that resolve - * to undefined symbols. This is not simply a heuristic, - * it is a fundamental limitation, since the PLT itself - * is part of the module, and needs to be within 128 MB - * as well, so modules can never grow beyond that limit. + * to symbols that are defined in a different section. + * This is not simply a heuristic, it is a fundamental + * limitation, since there is no guaranteed way to emit + * PLT entries sufficiently close to the branch if the + * section size exceeds the range of a branch + * instruction. So ignore relocations against defined + * symbols if they live in the same section as the + * relocation target. */ s = syms + ELF64_R_SYM(rela[i].r_info); - if (s->st_shndx != SHN_UNDEF) + if (s->st_shndx == dstidx) break; /* @@ -149,7 +151,8 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long plt_max_entries = 0; + unsigned long core_plts = 0; + unsigned long init_plts = 0; Elf64_Sym *syms = NULL; int i; @@ -158,14 +161,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, * entries. Record the symtab address as well. */ for (i = 0; i < ehdr->e_shnum; i++) { - if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) - mod->arch.plt = sechdrs + i; + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.core.plt = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) + mod->arch.init.plt = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } - if (!mod->arch.plt) { - pr_err("%s: module PLT section missing\n", mod->name); + if (!mod->arch.core.plt || !mod->arch.init.plt) { + pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } if (!syms) { @@ -188,14 +193,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* sort by type, symbol index and addend */ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); - plt_max_entries += count_plts(syms, rels, numrels); + if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) + core_plts += count_plts(syms, rels, numrels, + sechdrs[i].sh_info); + else + init_plts += count_plts(syms, rels, numrels, + sechdrs[i].sh_info); } - mod->arch.plt->sh_type = SHT_NOBITS; - mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); - mod->arch.plt_num_entries = 0; - mod->arch.plt_max_entries = plt_max_entries; + mod->arch.core.plt->sh_type = SHT_NOBITS; + mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry); + mod->arch.core.plt_num_entries = 0; + mod->arch.core.plt_max_entries = core_plts; + + mod->arch.init.plt->sh_type = SHT_NOBITS; + mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry); + mod->arch.init.plt_num_entries = 0; + mod->arch.init.plt_max_entries = init_plts; + return 0; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 7f316982ce00..c9a2ab446dc6 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -380,7 +380,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && ovf == -ERANGE) { - val = module_emit_plt_entry(me, &rel[i], sym); + val = module_emit_plt_entry(me, loc, &rel[i], sym); ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index 8949f6c6f729..f7c9781a9d48 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,3 +1,4 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } + .init.plt (NOLOAD) : { BYTE(0) } } diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index f7931d900bca..37e47a9d617e 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -157,10 +157,10 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) mod = __module_address((unsigned long)addr); if (mod && within_module_init((unsigned long)addr, mod) && !within_module_init((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->module_init; + scan_end = (kprobe_opcode_t *)mod->init_layout.base; else if (mod && within_module_core((unsigned long)addr, mod) && !within_module_core((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->module_core; + scan_end = (kprobe_opcode_t *)mod->core_layout.base; preempt_enable(); } #endif diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 44d632e38786..fc20a5aa9a73 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -45,6 +45,9 @@ #include <linux/personality.h> #include <linux/notifier.h> #include <trace/events/power.h> +#ifdef CONFIG_THREAD_INFO_IN_TASK +#include <linux/percpu.h> +#endif #include <asm/alternative.h> #include <asm/compat.h> @@ -392,6 +395,22 @@ void uao_thread_switch(struct task_struct *next) } } +#ifdef CONFIG_THREAD_INFO_IN_TASK +/* + * We store our current task in sp_el0, which is clobbered by userspace. Keep a + * shadow copy so that we can restore this upon entry from userspace. + * + * This is *only* for exception entry from EL0, and is not valid until we + * __switch_to() a user task. + */ +DEFINE_PER_CPU(struct task_struct *, __entry_task); + +static void entry_task_switch(struct task_struct *next) +{ + __this_cpu_write(__entry_task, next); +} +#endif + /* * Thread switching. */ @@ -404,6 +423,9 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); +#ifdef CONFIG_THREAD_INFO_IN_TASK + entry_task_switch(next); +#endif uao_thread_switch(next); /* @@ -421,27 +443,35 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; - unsigned long stack_page; + unsigned long stack_page, ret = 0; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; + stack_page = (unsigned long)try_get_task_stack(p); + if (!stack_page) + return 0; + frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = p->curr_ret_stack; #endif - stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || unwind_frame(p, &frame)) - return 0; - if (!in_sched_functions(frame.pc)) - return frame.pc; + goto out; + if (!in_sched_functions(frame.pc)) { + ret = frame.pc; + goto out; + } } while (count ++ < 16); - return 0; + +out: + put_task_stack(p); + return ret; } unsigned long arch_align_stack(unsigned long sp) diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 1718706fde83..12a87f2600f2 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -12,6 +12,7 @@ #include <linux/export.h> #include <linux/ftrace.h> +#include <asm/stack_pointer.h> #include <asm/stacktrace.h> struct return_address_data { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 19749870c757..f1fae3cbc4ca 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -353,12 +353,16 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* - * Make sure init_thread_info.ttbr0 always generates translation + * Make sure thread_info.ttbr0 always generates translation * faults in case uaccess_enable() is inadvertently called by the init * thread. */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page); +#else init_thread_info.ttbr0 = virt_to_phys(empty_zero_page); #endif +#endif #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 6f9ba222955c..9696c5239a3a 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -125,9 +125,6 @@ ENTRY(_cpu_resume) /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] mov sp, x2 - /* save thread_info */ - and x2, x2, #~(THREAD_SIZE - 1) - msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context address pointer */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a1d06fc42048..fb3a21e17bff 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -58,6 +58,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/ipi.h> +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -110,6 +113,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * We need to tell the secondary core where to find its stack and the * page tables. */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + secondary_data.task = idle; +#endif secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); @@ -134,6 +140,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } +#ifdef CONFIG_THREAD_INFO_IN_TASK + secondary_data.task = NULL; +#endif secondary_data.stack = NULL; status = READ_ONCE(secondary_data.status); if (ret && status) { @@ -177,7 +186,10 @@ static void smp_store_cpu_info(unsigned int cpuid) asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = task_cpu(current); + set_my_cpu_offset(per_cpu_offset(cpu)); /* * All kernel threads share the same mm context; grab a @@ -186,8 +198,6 @@ asmlinkage void secondary_start_kernel(void) atomic_inc(&mm->mm_count); current->active_mm = mm; - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -676,6 +686,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (max_cpus == 0) break; + per_cpu(cpu_number, cpu) = cpu; + if (cpu == smp_processor_id()) continue; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index a99eff9afc1f..95b915d17afc 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -22,6 +22,7 @@ #include <linux/stacktrace.h> #include <asm/irq.h> +#include <asm/stack_pointer.h> #include <asm/stacktrace.h> /* @@ -128,7 +129,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, break; } } -EXPORT_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE struct stack_trace_data { @@ -160,6 +160,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) struct stack_trace_data data; struct stackframe frame; + if (!try_get_task_stack(tsk)) + return; + data.trace = trace; data.skip = trace->skip; @@ -181,6 +184,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; + + put_task_stack(tsk); } void save_stack_trace(struct stack_trace *trace) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 0acdb63d19b6..468b939f3471 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -45,12 +45,6 @@ void notrace __cpu_suspend_exit(void) cpu_uninstall_idmap(); /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. - */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - - /* * Restore HW breakpoint registers to sane values * before debug exceptions are possibly reenabled * through local_dbg_restore. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a1cfcaa562a2..5b56522bc3da 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/esr.h> #include <asm/insn.h> #include <asm/traps.h> +#include <asm/stack_pointer.h> #include <asm/stacktrace.h> #include <asm/exception.h> #include <asm/system_misc.h> @@ -154,6 +155,14 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (!tsk) tsk = current; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + + if (!try_get_task_stack(tsk)) + return; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -219,6 +228,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) stack + sizeof(struct pt_regs), false); } } + + put_task_stack(tsk); } void show_stack(struct task_struct *tsk, unsigned long *sp) @@ -234,10 +245,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #endif #define S_SMP " SMP" -static int __die(const char *str, int err, struct thread_info *thread, - struct pt_regs *regs) +static int __die(const char *str, int err, struct pt_regs *regs) { - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; static int die_counter; int ret; @@ -252,7 +262,8 @@ static int __die(const char *str, int err, struct thread_info *thread, print_modules(); __show_regs(regs); pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), + end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, @@ -272,7 +283,6 @@ static DEFINE_RAW_SPINLOCK(die_lock); */ void die(const char *str, struct pt_regs *regs, int err) { - struct thread_info *thread = current_thread_info(); int ret; oops_enter(); @@ -280,9 +290,9 @@ void die(const char *str, struct pt_regs *regs, int err) raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); - ret = __die(str, err, thread, regs); + ret = __die(str, err, regs); - if (regs && kexec_should_crash(thread->task)) + if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 826032bc3945..acde4782621a 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -12,3 +12,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o + +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 454d76f6a612..c7809f41d9d1 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -88,21 +88,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); - printk(", *pud=%016llx", pud_val(*pud)); + pr_cont(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; pmd = pmd_offset(pud, addr); - printk(", *pmd=%016llx", pmd_val(*pmd)); + pr_cont(", *pmd=%016llx", pmd_val(*pmd)); if (pmd_none(*pmd) || pmd_bad(*pmd)) break; pte = pte_offset_map(pmd, addr); - printk(", *pte=%016llx", pte_val(*pte)); + pr_cont(", *pte=%016llx", pte_val(*pte)); pte_unmap(pte); } while(0); - printk("\n"); + pr_cont("\n"); } #ifdef CONFIG_ARM64_HW_AFDBM diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2adb0cb952a..32682be978e0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,11 +70,14 @@ ENTRY(cpu_do_suspend) mrs x8, mdscr_el1 mrs x9, oslsr_el1 mrs x10, sctlr_el1 + mrs x11, tpidr_el1 + mrs x12, sp_el0 stp x2, x3, [x0] stp x4, xzr, [x0, #16] stp x5, x6, [x0, #32] stp x7, x8, [x0, #48] stp x9, x10, [x0, #64] + stp x11, x12, [x0, #80] ret ENDPROC(cpu_do_suspend) @@ -90,6 +93,7 @@ ENTRY(cpu_do_resume) ldp x6, x8, [x0, #32] ldp x9, x10, [x0, #48] ldp x11, x12, [x0, #64] + ldp x13, x14, [x0, #80] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 @@ -112,6 +116,8 @@ ENTRY(cpu_do_resume) msr mdscr_el1, x10 msr sctlr_el1, x12 + msr tpidr_el1, x13 + msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1 */ diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 164efa009e5b..2b4c54c04cb6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -118,9 +118,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, * Increase core size to make room for GOT and set start * offset for GOT. */ - module->core_size = ALIGN(module->core_size, 4); - module->arch.got_offset = module->core_size; - module->core_size += module->arch.got_size; + module->core_layout.size = ALIGN(module->core_layout.size, 4); + module->arch.got_offset = module->core_layout.size; + module->core_layout.size += module->arch.got_size; return 0; @@ -177,7 +177,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, if (!info->got_initialized) { Elf32_Addr *gotent; - gotent = (module->module_core + gotent = (module->core_layout.base + module->arch.got_offset + info->got_offset); *gotent = relocation; @@ -255,8 +255,8 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, */ pr_debug("GOTPC: PC=0x%x, got_offset=0x%lx, core=0x%p\n", relocation, module->arch.got_offset, - module->module_core); - relocation -= ((unsigned long)module->module_core + module->core_layout.base); + relocation -= ((unsigned long)module->core_layout.base + module->arch.got_offset); *location = relocation; break; diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 2c86a4ef6742..7091a367eeda 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index b15933c31b2f..6ab0ae7d6535 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -486,13 +486,13 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return addr - (uint64_t) mod->init_layout.base < mod->init_layout.size; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return addr - (uint64_t) mod->core_layout.base < mod->core_layout.size; } static inline int @@ -675,7 +675,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + val -= (uint64_t) (in_init(mod, val) ? mod->init_layout.base : mod->core_layout.base); break; case RV_LTV: @@ -810,15 +810,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_layout.size > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_layout.size - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = mod->core_layout.size / 2; + gp = (uint64_t) mod->core_layout.base + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); } diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c index 986331cd0a52..bb8dfba9a763 100644 --- a/arch/metag/kernel/module.c +++ b/arch/metag/kernel/module.c @@ -176,8 +176,8 @@ static uint32_t do_plt_call(void *location, Elf32_Addr val, tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->core_layout.base + && location < mod->core_layout.base + mod->core_layout.size) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h index de781cf54bc7..da80878f2c0d 100644 --- a/arch/mips/include/asm/branch.h +++ b/arch/mips/include/asm/branch.h @@ -74,10 +74,7 @@ static inline int compute_return_epc(struct pt_regs *regs) return __microMIPS_compute_return_epc(regs); if (cpu_has_mips16) return __MIPS16e_compute_return_epc(regs); - return regs->cp0_epc; - } - - if (!delay_slot(regs)) { + } else if (!delay_slot(regs)) { regs->cp0_epc += 4; return 0; } diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index e9fed8ca9b42..71e8f4c0b8da 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -399,7 +399,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) * * @regs: Pointer to pt_regs * @insn: branch instruction to decode - * @returns: -EFAULT on error and forces SIGBUS, and on success + * @returns: -EFAULT on error and forces SIGILL, and on success * returns 0 or BRANCH_LIKELY_TAKEN as appropriate after * evaluating the branch. * @@ -431,7 +431,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* Fall through */ case jr_op: if (NO_R6EMU && insn.r_format.func == jr_op) - goto sigill_r6; + goto sigill_r2r6; regs->cp0_epc = regs->regs[insn.r_format.rs]; break; } @@ -446,7 +446,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, switch (insn.i_format.rt) { case bltzl_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -459,7 +459,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezl_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -473,10 +473,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bltzal_op: case bltzall_op: if (NO_R6EMU && (insn.i_format.rs || - insn.i_format.rt == bltzall_op)) { - ret = -SIGILL; - break; - } + insn.i_format.rt == bltzall_op)) + goto sigill_r2r6; regs->regs[31] = epc + 8; /* * OK we are here either because we hit a NAL @@ -507,10 +505,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezal_op: case bgezall_op: if (NO_R6EMU && (insn.i_format.rs || - insn.i_format.rt == bgezall_op)) { - ret = -SIGILL; - break; - } + insn.i_format.rt == bgezall_op)) + goto sigill_r2r6; regs->regs[31] = epc + 8; /* * OK we are here either because we hit a BAL @@ -556,6 +552,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* * These are unconditional and in j_format. */ + case jalx_op: case jal_op: regs->regs[31] = regs->cp0_epc + 8; case j_op: @@ -573,7 +570,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, */ case beql_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) { @@ -587,7 +584,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bnel_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) { @@ -601,7 +598,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case blezl_op: /* not really i_format */ if (!insn.i_format.rt && NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case blez_op: /* * Compact branches for R6 for the @@ -636,7 +633,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bgtz_op: /* * Compact branches for R6 for the @@ -843,11 +840,12 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, return ret; sigill_dsp: - printk("%s: DSP branch but not DSP ASE - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + pr_info("%s: DSP branch but not DSP ASE - sending SIGILL.\n", + current->comm); + force_sig(SIGILL, current); return -EFAULT; -sigill_r6: - pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n", +sigill_r2r6: + pr_info("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n", current->comm); force_sig(SIGILL, current); return -EFAULT; diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 298b2b773d12..f1fab6ff53e6 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -83,7 +83,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) } seq_printf(m, "isa\t\t\t:"); - if (cpu_has_mips_r1) + if (cpu_has_mips_1) seq_printf(m, " mips1"); if (cpu_has_mips_2) seq_printf(m, "%s", " mips2"); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c95bf18260f8..24c115a0721a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -927,7 +927,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->regs[2]); + trace_sys_exit(regs, regs_return_value(regs)); if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 2d23c834ba96..29b0c5f978e4 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -372,7 +372,7 @@ EXPORT(sys_call_table) PTR sys_writev PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index deac63315d0e..a6323a969919 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -312,7 +312,7 @@ EXPORT(sys_call_table) PTR sys_sched_getaffinity PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_io_setup /* 5200 */ PTR sys_io_destroy PTR sys_io_getevents diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index ee93d5fe61d7..e0fdca8d3abe 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -298,7 +298,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_sched_getaffinity PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR compat_sys_io_setup /* 6200 */ PTR sys_io_destroy PTR compat_sys_io_getevents diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index b77052ec6fb2..87c697181d25 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -367,7 +367,7 @@ EXPORT(sys32_call_table) PTR compat_sys_writev PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 53a7ef9a8f32..4234b2d726c5 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -28,6 +28,7 @@ #include <linux/elf.h> #include <asm/asm.h> +#include <asm/asm-eva.h> #include <asm/branch.h> #include <asm/cachectl.h> #include <asm/cacheflush.h> @@ -138,10 +139,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) __asm__ __volatile__ ( " .set "MIPS_ISA_ARCH_LEVEL" \n" " li %[err], 0 \n" - "1: ll %[old], (%[addr]) \n" + "1: \n" + user_ll("%[old]", "(%[addr])") " move %[tmp], %[new] \n" - "2: sc %[tmp], (%[addr]) \n" - " bnez %[tmp], 4f \n" + "2: \n" + user_sc("%[tmp]", "(%[addr])") + " beqz %[tmp], 4f \n" "3: \n" " .insn \n" " .subsection 2 \n" @@ -199,6 +202,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) unreachable(); } +/* + * mips_atomic_set() normally returns directly via syscall_exit potentially + * clobbering static registers, so be sure to preserve them. + */ +save_static_function(sys_sysmips); + SYSCALL_DEFINE3(sysmips, long, cmd, long, arg1, long, arg2) { switch (cmd) { diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 9067b651c7a2..544ea21bfef9 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -205,11 +205,11 @@ static void layout_sections(struct module *mod, const Elf_Ehdr *hdr, || s->sh_entsize != ~0UL) continue; s->sh_entsize = - get_offset((unsigned long *)&mod->core_size, s); + get_offset((unsigned long *)&mod->core_layout.size, s); } if (m == 0) - mod->core_text_size = mod->core_size; + mod->core_layout.text_size = mod->core_layout.size; } } @@ -641,7 +641,7 @@ static int vpe_elfload(struct vpe *v) layout_sections(&mod, hdr, sechdrs, secstrings); } - v->load_addr = alloc_progmem(mod.core_size); + v->load_addr = alloc_progmem(mod.core_layout.size); if (!v->load_addr) return -ENOMEM; diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 734a2c7665ec..6da2e4a6ba39 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2496,6 +2496,35 @@ dcopuop: return 0; } +/* + * Emulate FPU instructions. + * + * If we use FPU hardware, then we have been typically called to handle + * an unimplemented operation, such as where an operand is a NaN or + * denormalized. In that case exit the emulation loop after a single + * iteration so as to let hardware execute any subsequent instructions. + * + * If we have no FPU hardware or it has been disabled, then continue + * emulating floating-point instructions until one of these conditions + * has occurred: + * + * - a non-FPU instruction has been encountered, + * + * - an attempt to emulate has ended with a signal, + * + * - the ISA mode has been switched. + * + * We need to terminate the emulation loop if we got switched to the + * MIPS16 mode, whether supported or not, so that we do not attempt + * to emulate a MIPS16 instruction as a regular MIPS FPU instruction. + * Similarly if we got switched to the microMIPS mode and only the + * regular MIPS mode is supported, so that we do not attempt to emulate + * a microMIPS instruction as a regular MIPS FPU instruction. Or if + * we got switched to the regular MIPS mode and only the microMIPS mode + * is supported, so that we do not attempt to emulate a regular MIPS + * instruction that should cause an Address Error exception instead. + * For simplicity we always terminate upon an ISA mode switch. + */ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr) { @@ -2581,6 +2610,15 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; if (sig) break; + /* + * We have to check for the ISA bit explicitly here, + * because `get_isa16_mode' may return 0 if support + * for code compression has been globally disabled, + * or otherwise we may produce the wrong signal or + * even proceed successfully where we must not. + */ + if ((xcp->cp0_epc ^ prevepc) & 0x1) + break; cond_resched(); } while (xcp->cp0_epc > prevepc); diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 2d69a853b742..3a08b55609b6 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -38,6 +38,8 @@ SECTIONS /* Read-only sections, merged into text segment: */ . = LOAD_BASE ; + _text = .; + /* _s_kernel_ro must be page aligned */ . = ALIGN(PAGE_SIZE); _s_kernel_ro = .; diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index d8d60a57183f..f53725202955 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -39,6 +39,8 @@ struct hppa_dma_ops { ** flush/purge and allocate "regular" cacheable pages for everything. */ +#define DMA_ERROR_CODE (~(dma_addr_t)0) + #ifdef CONFIG_PA11 extern struct hppa_dma_ops pcxl_dma_ops; extern struct hppa_dma_ops pcx_dma_ops; @@ -209,12 +211,13 @@ parisc_walk_tree(struct device *dev) break; } } - BUG_ON(!dev->platform_data); return dev->platform_data; } - -#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu) - + +#define GET_IOC(dev) ({ \ + void *__pdata = parisc_walk_tree(dev); \ + __pdata ? HBA_DATA(__pdata)->iommu : NULL; \ +}) #ifdef CONFIG_IOMMU_CCIO struct parisc_device; diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h index 59be25764433..a81226257878 100644 --- a/arch/parisc/include/asm/mmu_context.h +++ b/arch/parisc/include/asm/mmu_context.h @@ -49,15 +49,26 @@ static inline void load_context(mm_context_t context) mtctl(__space_to_prot(context), 8); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +static inline void switch_mm_irqs_off(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { - if (prev != next) { mtctl(__pa(next->pgd), 25); load_context(next->context); } } +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} +#define switch_mm_irqs_off switch_mm_irqs_off + #define deactivate_mm(tsk,mm) do { } while (0) static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 3c63a820fcda..b9d75d9fa9ac 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -42,9 +42,9 @@ * We are not doing SEGREL32 handling correctly. According to the ABI, we * should do a value offset, like this: * if (in_init(me, (void *)val)) - * val -= (uint32_t)me->module_init; + * val -= (uint32_t)me->init_layout.base; * else - * val -= (uint32_t)me->module_core; + * val -= (uint32_t)me->core_layout.base; * However, SEGREL32 is used only for PARISC unwind entries, and we want * those entries to have an absolute address, and not just an offset. * @@ -100,14 +100,14 @@ * or init pieces the location is */ static inline int in_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return (loc >= me->init_layout.base && + loc <= (me->init_layout.base + me->init_layout.size)); } static inline int in_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return (loc >= me->core_layout.base && + loc <= (me->core_layout.base + me->core_layout.size)); } static inline int in_local(struct module *me, void *loc) @@ -367,13 +367,13 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); + me->core_layout.size = ALIGN(me->core_layout.size, 16); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += gots * sizeof(struct got_entry); - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); + me->core_layout.size = ALIGN(me->core_layout.size, 16); + me->arch.fdesc_offset = me->core_layout.size; + me->core_layout.size += fdescs * sizeof(Elf_Fdesc); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -391,7 +391,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->core_layout.base + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -409,7 +409,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) #ifdef CONFIG_64BIT static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->core_layout.base + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -427,7 +427,7 @@ static Elf_Addr get_fdesc(struct module *me, unsigned long value) /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* CONFIG_64BIT */ @@ -839,7 +839,7 @@ register_unwind_table(struct module *me, table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index d4ffcfbc9885..041e1f9ec129 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -361,7 +361,7 @@ ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ - ENTRY_SAME(keyctl) + ENTRY_COMP(keyctl) ENTRY_SAME(ioprio_set) ENTRY_SAME(ioprio_get) ENTRY_SAME(inotify_init) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 16dbe81c97c9..2f33a67bc531 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -298,7 +298,7 @@ bad_area: case 15: /* Data TLB miss fault/Data page fault */ /* send SIGSEGV when outside of vma */ if (!vma || - address < vma->vm_start || address > vma->vm_end) { + address < vma->vm_start || address >= vma->vm_end) { si.si_signo = SIGSEGV; si.si_code = SEGV_MAPERR; break; diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 55f106ed12bf..039c4b910615 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -460,7 +460,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) * Atomically increments @v by 1, so long as @v is non-zero. * Returns non-zero if @v was non-zero, and zero otherwise. */ -static __inline__ long atomic64_inc_not_zero(atomic64_t *v) +static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { long t1, t2; @@ -479,7 +479,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v) : "r" (&v->counter) : "cc", "xer", "memory"); - return t1; + return t1 != 0; } #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index ee46ffef608e..743ad7a400d6 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -23,12 +23,13 @@ #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE 0x20000000 +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (is_32bit_task() ? 0x000400000UL : \ + 0x100000000UL) #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 627d129d7fcb..ca372bbc0ffe 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1236,7 +1236,7 @@ static inline unsigned long mfvtb (void) " .llong 0\n" \ ".previous" \ : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ rval;}) #else #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 329771559cbb..8b3b46b7b0f2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,22 +44,8 @@ extern void __init dump_numa_cpu_topology(void); extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -static inline int early_cpu_to_node(int cpu) -{ - int nid; - - nid = numa_cpu_lookup_table[cpu]; - - /* - * Fall back to node 0 if nid is unset (it should be, except bugs). - * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)). - */ - return (nid < 0) ? 0 : nid; -} #else -static inline int early_cpu_to_node(int cpu) { return 0; } - static inline void dump_numa_cpu_topology(void) {} static inline int sysfs_add_device_to_node(struct device *dev, int nid) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c94d2e018d84..2c01665eb410 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -188,8 +188,8 @@ static uint32_t do_plt_call(void *location, pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->core_layout.base + && location < mod->core_layout.base + mod->core_layout.size) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -296,7 +296,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE module->arch.tramp = - do_plt_call(module->module_core, + do_plt_call(module->core_layout.base, (unsigned long)ftrace_caller, sechdrs, module); #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fe6e800c1357..a20823210ac0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -751,7 +751,7 @@ void __init setup_arch(char **cmdline_p) static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, __pa(MAX_DMA_ADDRESS)); } @@ -762,7 +762,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size) static int pcpu_cpu_distance(unsigned int from, unsigned int to) { - if (early_cpu_to_node(from) == early_cpu_to_node(to)) + if (cpu_to_node(from) == cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 396dc44e783b..428563b195c3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2687,6 +2687,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned long proc_fscr = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -2707,10 +2711,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } + /* Enable TM so we can read the TM SPRs */ + mtmsr(mfmsr() | MSR_TM); current->thread.tm_tfhar = mfspr(SPRN_TFHAR); current->thread.tm_tfiar = mfspr(SPRN_TFIAR); current->thread.tm_texasr = mfspr(SPRN_TEXASR); - current->thread.regs->msr &= ~MSR_TM; } #endif @@ -2736,6 +2741,17 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_fp_to_thread(current); flush_altivec_to_thread(current); flush_vsx_to_thread(current); + + /* Save userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); + proc_fscr = mfspr(SPRN_FSCR); + } + user_vrsave = mfspr(SPRN_VRSAVE); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -2757,6 +2773,29 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); + /* Restore userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, proc_fscr); + } + mtspr(SPRN_VRSAVE, user_vrsave); + + /* + * Since we don't do lazy TM reload, we need to reload + * the TM registers here. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + (current->thread.regs->msr & MSR_TM)) { + mtspr(SPRN_TFHAR, current->thread.tm_tfhar); + mtspr(SPRN_TFIAR, current->thread.tm_tfiar); + mtspr(SPRN_TEXASR, current->thread.tm_texasr); + } +#endif + out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 1a743f87b37d..ffab9269bfe4 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -36,6 +36,13 @@ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 112 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_CIABR (SFS-16) +#define STACK_SLOT_DAWR (SFS-24) +#define STACK_SLOT_DAWRX (SFS-32) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -274,10 +281,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -489,7 +496,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -643,6 +650,16 @@ kvmppc_got_guest: mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + /* Save host values of some registers */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -1266,8 +1283,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) */ li r0, 0 mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ @@ -1283,6 +1299,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR @@ -1424,6 +1441,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync + /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1533,8 +1560,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 4014881e9843..e37162d356d8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -687,8 +687,10 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 19: switch ((instr >> 1) & 0x3ff) { case 0: /* mcrf */ - rd = (instr >> 21) & 0x1c; - ra = (instr >> 16) & 0x1c; + rd = 7 - ((instr >> 23) & 0x7); + ra = 7 - ((instr >> 18) & 0x7); + rd *= 4; + ra *= 4; val = (regs->ccr >> ra) & 0xf; regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); goto instr_done; @@ -967,6 +969,19 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, #endif case 19: /* mfcr */ + if ((instr >> 20) & 1) { + imm = 0xf0000000UL; + for (sh = 0; sh < 8; ++sh) { + if (instr & (0x80000 >> sh)) { + regs->gpr[rd] = regs->ccr & imm; + break; + } + imm >>= 4; + } + + goto instr_done; + } + regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 7c7fcc042549..fb695f142563 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np) of_detach_node(np); of_node_put(parent); - of_node_put(np); /* Must decrement the refcount */ return 0; } diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739a1154..b9eb7b1a49d2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -154,14 +154,13 @@ extern unsigned int vdso_enabled; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. 64-bit - tasks are aligned to 4GB. */ -#define ELF_ET_DYN_BASE (is_32bit_task() ? \ - (STACK_TOP / 3 * 2) : \ - (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6ba0bf928909..6bc941be6921 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -64,6 +64,12 @@ static inline void syscall_get_arguments(struct task_struct *task, { unsigned long mask = -1UL; + /* + * No arguments for this syscall, there's nothing to do. + */ + if (!n) + return; + BUG_ON(i + n > 6); #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 0c1a679314dd..7873e171457c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -159,11 +159,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_layout.size = ALIGN(me->core_layout.size, 4); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += me->arch.got_size; + me->arch.plt_offset = me->core_layout.size; + me->core_layout.size += me->arch.plt_size; return 0; } @@ -279,7 +279,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->core_layout.base + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -302,7 +302,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->module_core - loc; + val += (Elf_Addr) me->core_layout.base - loc; rc = apply_rela_bits(loc, val, 1, 32, 1); } break; @@ -315,7 +315,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->core_layout.base + me->arch.plt_offset + info->plt_offset; ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ ip[1] = 0x100a0004; @@ -334,7 +334,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->core_layout.base + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; @@ -356,7 +356,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->core_layout.base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) @@ -366,7 +366,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->core_layout.base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) rc = apply_rela_bits(loc, val, 1, 32, 0); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0e2919dd8df3..1395eeb6005f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1250,7 +1250,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) return -1; - jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + /* Next instruction address */ + jit->addrs[i + insn_count] = jit->prg; } bpf_jit_epilogue(jit); diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 349dd23e2876..0cdeb2b483a0 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -25,9 +25,11 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[0], @@ -38,9 +40,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[0])); + , __pa(&mm->context.tsb_descr[0]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -110,8 +115,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; void init_cur_cpu_trap(struct thread_info *); void setup_tba(void); extern int ncpus_probed; +extern u64 cpu_mondo_counter[NR_CPUS]; unsigned long real_hard_smp_processor_id(void); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 95a9fa0d2195..4511caa3b7e9 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -617,22 +617,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -640,94 +666,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. + * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; - prev_sent = n_sent; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* Record the overall number of mondos received by the + * first of the remaining cpus. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); + prev_sent = n_sent; - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } - } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); - return; + retries++; + } while (1); -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); + fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -26,6 +26,21 @@ sun4v_cpu_mondo: ldxa [%g0] ASI_SCRATCHPAD, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + /* Get smp_processor_id() into %g3 */ + sethi %hi(trap_block), %g5 + or %g5, %lo(trap_block), %g5 + sub %g4, %g5, %g3 + srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + + /* Increment cpu_mondo_counter[smp_processor_id()] */ + sethi %hi(cpu_mondo_counter), %g5 + or %g5, %lo(cpu_mondo_counter), %g5 + sllx %g3, 3, %g3 + add %g5, %g3, %g5 + ldx [%g5], %g3 + add %g3, 1, %g3 + stx %g3, [%g5] + /* Get CPU mondo queue base phys address into %g7. */ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index cc97a43268ee..d883c5951e8b 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2659,6 +2659,7 @@ void do_getpsr(struct pt_regs *regs) } } +u64 cpu_mondo_counter[NR_CPUS] = {0}; struct trap_per_cpu trap_block[NR_CPUS]; EXPORT_SYMBOL(trap_block); diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 395ec1800530..7d961f6e3907 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -375,6 +375,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -387,6 +388,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index dc1fb28d9636..489b15016303 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 7d5769310bef..a97ab1a69a90 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 318b8465d302..06ceddb3a22e 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -14,6 +14,7 @@ #include <linux/types.h> #include "ctype.h" +#include "string.h" int memcmp(const void *s1, const void *s2, size_t len) { diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 725e820602b1..113588ddb43f 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len); #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp +extern int strcmp(const char *str1, const char *str2); +extern int strncmp(const char *cs, const char *ct, size_t count); +extern size_t strlen(const char *s); +extern char *strstr(const char *s1, const char *s2); +extern size_t strnlen(const char *s, size_t maxlen); +extern unsigned int atou(const char *s); +extern unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base); + #endif /* BOOT_STRING_H */ diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index dd14616b7739..7de207a11014 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index d262f985bbc8..07cf288b692e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -245,12 +245,13 @@ extern int force_personality32; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 690b4027e17c..37db36fddc88 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -405,6 +405,8 @@ #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + #define MSR_IA32_XSS 0x00000da0 #define FEATURE_CONTROL_LOCKED (1<<0) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4c20dd333412..85133b2b8e99 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -43,6 +43,7 @@ #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/smap.h> #include <xen/interface/xen.h> #include <xen/interface/sched.h> @@ -213,10 +214,12 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + stac(); asm volatile("call *%[call]" : __HYPERCALL_5PARAM : [call] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); + clac(); return (long)__res; } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e75907601a41..1e5eb9f2ff5f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -329,6 +329,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, struct mpc_intsrc mp_irq; /* + * Check bus_irq boundary. + */ + if (bus_irq >= NR_IRQS_LEGACY) { + pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq); + return; + } + + /* * Convert 'gsi' to 'ioapic.pin'. */ ioapic = mp_find_ioapic(gsi); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1e5d2f07416b..fc91c98bee01 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2115,7 +2115,7 @@ static inline void __init check_timer(void) int idx; idx = find_irq_entry(apic1, pin1, mp_INT); if (idx != -1 && irq_trigger(idx)) - unmask_ioapic_irq(irq_get_chip_data(0)); + unmask_ioapic_irq(irq_get_irq_data(0)); } irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 62aca448726a..2116176c1721 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -682,6 +682,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) const char *name = th_names[bank]; int err = 0; + if (!dev) + return -ENODEV; + if (is_shared_bank(bank)) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index cec49ecf5f31..32187f8a49b4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_wait(&n.wq, &wait); diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c index d1d35ccffed3..bcc06e82a593 100644 --- a/arch/x86/kernel/livepatch.c +++ b/arch/x86/kernel/livepatch.c @@ -41,8 +41,8 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, int ret, numpages, size = 4; bool readonly; unsigned long val; - unsigned long core = (unsigned long)mod->module_core; - unsigned long core_size = mod->core_size; + unsigned long core = (unsigned long)mod->core_layout.base; + unsigned long core_size = mod->core_layout.size; switch (type) { case R_X86_64_NONE: @@ -72,7 +72,7 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, readonly = false; #ifdef CONFIG_DEBUG_SET_MODULE_RONX - if (loc < core + mod->core_ro_size) + if (loc < core + mod->core_layout.ro_size) readonly = true; #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9357b29de9bc..83d6369c45f5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) return ret; } +bool kvm_mpx_supported(void) +{ + return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) + && kvm_x86_ops->mpx_supported()); +} +EXPORT_SYMBOL_GPL(kvm_mpx_supported); + u64 kvm_supported_xcr0(void) { u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); return xcr0; @@ -97,7 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + vcpu->arch.eager_fpu = use_eager_fpu(); if (vcpu->arch.eager_fpu) kvm_x86_ops->fpu_activate(vcpu); @@ -295,7 +302,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; - unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; + unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; /* cpuid 1.edx */ diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 3f5c48ddba45..d1534feefcfe 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -4,6 +4,7 @@ #include "x86.h" int kvm_update_cpuid(struct kvm_vcpu *vcpu); +bool kvm_mpx_supported(void); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, @@ -134,20 +135,20 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_RTM)); } -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); + return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); } -static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); + return best && (best->ebx & bit(X86_FEATURE_MPX)); } static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bbaa11f4e74b..b12391119ce8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); -static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); @@ -2541,7 +2540,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* We support free control of debug control saving. */ @@ -2562,7 +2561,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_ENTRY_LOAD_IA32_PAT; vmx->nested.nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ @@ -2813,7 +2812,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2890,7 +2890,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + if (is_noncanonical_address(data & PAGE_MASK) || + (data & MSR_IA32_BNDCFGS_RSVD)) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; @@ -3363,7 +3367,7 @@ static void init_vmcs_shadow_fields(void) for (i = j = 0; i < max_shadow_read_write_fields; i++) { switch (shadow_read_write_fields[i]) { case GUEST_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) continue; break; default: @@ -6253,7 +6257,6 @@ static __init int hardware_setup(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); memcpy(vmx_msr_bitmap_legacy_x2apic, vmx_msr_bitmap_legacy, PAGE_SIZE); @@ -10265,7 +10268,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); if (nested_cpu_has_xsaves(vmcs12)) vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 0c0468869e25..52154ef21b5e 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -245,6 +245,9 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, u8 *ihash = ohash + crypto_ahash_digestsize(auth); u32 tmp[2]; + if (!authsize) + goto decrypt; + /* Move high-order bits of sequence number back. */ scatterwalk_map_and_copy(tmp, dst, 4, 4, 0); scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0); @@ -253,6 +256,8 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, if (crypto_memneq(ihash, ohash, authsize)) return -EBADMSG; +decrypt: + sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 5ea5dc219f56..73c9c7fa9001 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -98,7 +98,15 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) if (check_children && list_empty(&adev->children)) return -ENODEV; - return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; + /* + * If the device has a _HID (or _CID) returning a valid ACPI/PNP + * device ID, it is better to make it look less attractive here, so that + * the other device with the same _ADR value (that may not have a valid + * device ID) can be matched going forward. [This means a second spec + * violation in a row, so whatever we do here is best effort anyway.] + */ + return sta_present && list_empty(&adev->pnp.ids) ? + FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } struct acpi_device *acpi_find_child_device(struct acpi_device *parent, diff --git a/drivers/android/Makefile b/drivers/android/Makefile index 3b7e4b072c58..4b7c726bb560 100644 --- a/drivers/android/Makefile +++ b/drivers/android/Makefile @@ -1,3 +1,3 @@ ccflags-y += -I$(src) # needed for trace events -obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o +obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 48cae6cb9b7b..bfdd52ea0d1c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -15,17 +15,49 @@ * */ +/* + * Locking overview + * + * There are 3 main spinlocks which must be acquired in the + * order shown: + * + * 1) proc->outer_lock : protects binder_ref + * binder_proc_lock() and binder_proc_unlock() are + * used to acq/rel. + * 2) node->lock : protects most fields of binder_node. + * binder_node_lock() and binder_node_unlock() are + * used to acq/rel + * 3) proc->inner_lock : protects the thread and node lists + * (proc->threads, proc->waiting_threads, proc->nodes) + * and all todo lists associated with the binder_proc + * (proc->todo, thread->todo, proc->delivered_death and + * node->async_todo), as well as thread->transaction_stack + * binder_inner_proc_lock() and binder_inner_proc_unlock() + * are used to acq/rel + * + * Any lock under procA must never be nested under any lock at the same + * level or below on procB. + * + * Functions that require a lock held on entry indicate which lock + * in the suffix of the function name: + * + * foo_olocked() : requires node->outer_lock + * foo_nlocked() : requires node->lock + * foo_ilocked() : requires proc->inner_lock + * foo_oilocked(): requires proc->outer_lock and proc->inner_lock + * foo_nilocked(): requires node->lock and proc->inner_lock + * ... + */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <asm/cacheflush.h> -#include <linux/atomic.h> #include <linux/fdtable.h> #include <linux/file.h> #include <linux/freezer.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/miscdevice.h> -#include <linux/mm.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/nsproxy.h> @@ -35,23 +67,32 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/uaccess.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> #include <linux/pid_namespace.h> #include <linux/security.h> +#include <linux/spinlock.h> #ifdef CONFIG_ANDROID_BINDER_IPC_32BIT #define BINDER_IPC_32BIT 1 #endif #include <uapi/linux/android/binder.h> +#include "binder_alloc.h" #include "binder_trace.h" +static HLIST_HEAD(binder_deferred_list); +static DEFINE_MUTEX(binder_deferred_lock); + static HLIST_HEAD(binder_devices); +static HLIST_HEAD(binder_procs); +static DEFINE_MUTEX(binder_procs_lock); + +static HLIST_HEAD(binder_dead_nodes); +static DEFINE_SPINLOCK(binder_dead_nodes_lock); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; -atomic_t binder_last_id; +static atomic_t binder_last_id; +static struct workqueue_struct *binder_deferred_workqueue; #define BINDER_DEBUG_ENTRY(name) \ static int binder_##name##_open(struct inode *inode, struct file *file) \ @@ -97,17 +138,13 @@ enum { BINDER_DEBUG_TRANSACTION_COMPLETE = 1U << 10, BINDER_DEBUG_FREE_BUFFER = 1U << 11, BINDER_DEBUG_INTERNAL_REFS = 1U << 12, - BINDER_DEBUG_BUFFER_ALLOC = 1U << 13, - BINDER_DEBUG_PRIORITY_CAP = 1U << 14, - BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 15, + BINDER_DEBUG_PRIORITY_CAP = 1U << 13, + BINDER_DEBUG_SPINLOCKS = 1U << 14, }; static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); -static bool binder_debug_no_lock; -module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); - static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; module_param_named(devices, binder_devices_param, charp, S_IRUGO); @@ -164,30 +201,27 @@ enum binder_stat_types { }; struct binder_stats { - int br[_IOC_NR(BR_FAILED_REPLY) + 1]; - int bc[_IOC_NR(BC_REPLY_SG) + 1]; -}; - -/* These are still global, since it's not always easy to get the context */ -struct binder_obj_stats { + atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1]; + atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1]; atomic_t obj_created[BINDER_STAT_COUNT]; atomic_t obj_deleted[BINDER_STAT_COUNT]; }; -static struct binder_obj_stats binder_obj_stats; +static struct binder_stats binder_stats; static inline void binder_stats_deleted(enum binder_stat_types type) { - atomic_inc(&binder_obj_stats.obj_deleted[type]); + atomic_inc(&binder_stats.obj_deleted[type]); } static inline void binder_stats_created(enum binder_stat_types type) { - atomic_inc(&binder_obj_stats.obj_created[type]); + atomic_inc(&binder_stats.obj_created[type]); } struct binder_transaction_log_entry { int debug_id; + int debug_id_done; int call_type; int from_proc; int from_thread; @@ -197,48 +231,45 @@ struct binder_transaction_log_entry { int to_node; int data_size; int offsets_size; + int return_error_line; + uint32_t return_error; + uint32_t return_error_param; const char *context_name; }; struct binder_transaction_log { - int next; - int full; + atomic_t cur; + bool full; struct binder_transaction_log_entry entry[32]; }; +static struct binder_transaction_log binder_transaction_log; +static struct binder_transaction_log binder_transaction_log_failed; static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) { struct binder_transaction_log_entry *e; + unsigned int cur = atomic_inc_return(&log->cur); - e = &log->entry[log->next]; - memset(e, 0, sizeof(*e)); - log->next++; - if (log->next == ARRAY_SIZE(log->entry)) { - log->next = 0; + if (cur >= ARRAY_SIZE(log->entry)) log->full = 1; - } + e = &log->entry[cur % ARRAY_SIZE(log->entry)]; + WRITE_ONCE(e->debug_id_done, 0); + /* + * write-barrier to synchronize access to e->debug_id_done. + * We make sure the initialized 0 value is seen before + * memset() other fields are zeroed by memset. + */ + smp_wmb(); + memset(e, 0, sizeof(*e)); return e; } struct binder_context { struct binder_node *binder_context_mgr_node; + struct mutex context_mgr_node_lock; + kuid_t binder_context_mgr_uid; const char *name; - - struct mutex binder_main_lock; - struct mutex binder_deferred_lock; - struct mutex binder_mmap_lock; - - struct hlist_head binder_procs; - struct hlist_head binder_dead_nodes; - struct hlist_head binder_deferred_list; - - struct work_struct deferred_work; - struct workqueue_struct *binder_deferred_workqueue; - struct binder_transaction_log transaction_log; - struct binder_transaction_log transaction_log_failed; - - struct binder_stats binder_stats; }; struct binder_device { @@ -247,11 +278,20 @@ struct binder_device { struct binder_context context; }; +/** + * struct binder_work - work enqueued on a worklist + * @entry: node enqueued on list + * @type: type of work to be performed + * + * There are separate work lists for proc, thread, and node (async). + */ struct binder_work { struct list_head entry; + enum { BINDER_WORK_TRANSACTION = 1, BINDER_WORK_TRANSACTION_COMPLETE, + BINDER_WORK_RETURN_ERROR, BINDER_WORK_NODE, BINDER_WORK_DEAD_BINDER, BINDER_WORK_DEAD_BINDER_AND_CLEAR, @@ -259,8 +299,76 @@ struct binder_work { } type; }; +struct binder_error { + struct binder_work work; + uint32_t cmd; +}; + +/** + * struct binder_node - binder node bookkeeping + * @debug_id: unique ID for debugging + * (invariant after initialized) + * @lock: lock for node fields + * @work: worklist element for node work + * (protected by @proc->inner_lock) + * @rb_node: element for proc->nodes tree + * (protected by @proc->inner_lock) + * @dead_node: element for binder_dead_nodes list + * (protected by binder_dead_nodes_lock) + * @proc: binder_proc that owns this node + * (invariant after initialized) + * @refs: list of references on this node + * (protected by @lock) + * @internal_strong_refs: used to take strong references when + * initiating a transaction + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @local_weak_refs: weak user refs from local process + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @local_strong_refs: strong user refs from local process + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @tmp_refs: temporary kernel refs + * (protected by @proc->inner_lock while @proc + * is valid, and by binder_dead_nodes_lock + * if @proc is NULL. During inc/dec and node release + * it is also protected by @lock to provide safety + * as the node dies and @proc becomes NULL) + * @ptr: userspace pointer for node + * (invariant, no lock needed) + * @cookie: userspace cookie for node + * (invariant, no lock needed) + * @has_strong_ref: userspace notified of strong ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @pending_strong_ref: userspace has acked notification of strong ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @has_weak_ref: userspace notified of weak ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @pending_weak_ref: userspace has acked notification of weak ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @has_async_transaction: async transaction to node in progress + * (protected by @lock) + * @sched_policy: minimum scheduling policy for node + * (invariant after initialized) + * @accept_fds: file descriptor operations supported for node + * (invariant after initialized) + * @min_priority: minimum scheduling priority + * (invariant after initialized) + * @inherit_rt: inherit RT scheduling policy from caller + * (invariant after initialized) + * @async_todo: list of async work items + * (protected by @proc->inner_lock) + * + * Bookkeeping structure for binder nodes. + */ struct binder_node { int debug_id; + spinlock_t lock; struct binder_work work; union { struct rb_node rb_node; @@ -271,88 +379,185 @@ struct binder_node { int internal_strong_refs; int local_weak_refs; int local_strong_refs; + int tmp_refs; binder_uintptr_t ptr; binder_uintptr_t cookie; - unsigned has_strong_ref:1; - unsigned pending_strong_ref:1; - unsigned has_weak_ref:1; - unsigned pending_weak_ref:1; - unsigned has_async_transaction:1; - unsigned accept_fds:1; - unsigned min_priority:8; + struct { + /* + * bitfield elements protected by + * proc inner_lock + */ + u8 has_strong_ref:1; + u8 pending_strong_ref:1; + u8 has_weak_ref:1; + u8 pending_weak_ref:1; + }; + struct { + /* + * invariant after initialization + */ + u8 sched_policy:2; + u8 inherit_rt:1; + u8 accept_fds:1; + u8 min_priority; + }; + bool has_async_transaction; struct list_head async_todo; }; struct binder_ref_death { + /** + * @work: worklist element for death notifications + * (protected by inner_lock of the proc that + * this ref belongs to) + */ struct binder_work work; binder_uintptr_t cookie; }; +/** + * struct binder_ref_data - binder_ref counts and id + * @debug_id: unique ID for the ref + * @desc: unique userspace handle for ref + * @strong: strong ref count (debugging only if not locked) + * @weak: weak ref count (debugging only if not locked) + * + * Structure to hold ref count and ref id information. Since + * the actual ref can only be accessed with a lock, this structure + * is used to return information about the ref to callers of + * ref inc/dec functions. + */ +struct binder_ref_data { + int debug_id; + uint32_t desc; + int strong; + int weak; +}; + +/** + * struct binder_ref - struct to track references on nodes + * @data: binder_ref_data containing id, handle, and current refcounts + * @rb_node_desc: node for lookup by @data.desc in proc's rb_tree + * @rb_node_node: node for lookup by @node in proc's rb_tree + * @node_entry: list entry for node->refs list in target node + * (protected by @node->lock) + * @proc: binder_proc containing ref + * @node: binder_node of target node. When cleaning up a + * ref for deletion in binder_cleanup_ref, a non-NULL + * @node indicates the node must be freed + * @death: pointer to death notification (ref_death) if requested + * (protected by @node->lock) + * + * Structure to track references from procA to target node (on procB). This + * structure is unsafe to access without holding @proc->outer_lock. + */ struct binder_ref { /* Lookups needed: */ /* node + proc => ref (transaction) */ /* desc + proc => ref (transaction, inc/dec ref) */ /* node => refs + procs (proc exit) */ - int debug_id; + struct binder_ref_data data; struct rb_node rb_node_desc; struct rb_node rb_node_node; struct hlist_node node_entry; struct binder_proc *proc; struct binder_node *node; - uint32_t desc; - int strong; - int weak; struct binder_ref_death *death; }; -struct binder_buffer { - struct list_head entry; /* free and allocated entries by address */ - struct rb_node rb_node; /* free entry by size or allocated entry */ - /* by address */ - unsigned free:1; - unsigned allow_user_free:1; - unsigned async_transaction:1; - unsigned debug_id:29; - - struct binder_transaction *transaction; - - struct binder_node *target_node; - size_t data_size; - size_t offsets_size; - size_t extra_buffers_size; - uint8_t data[0]; -}; - enum binder_deferred_state { BINDER_DEFERRED_PUT_FILES = 0x01, BINDER_DEFERRED_FLUSH = 0x02, BINDER_DEFERRED_RELEASE = 0x04, }; +/** + * struct binder_priority - scheduler policy and priority + * @sched_policy scheduler policy + * @prio [100..139] for SCHED_NORMAL, [0..99] for FIFO/RT + * + * The binder driver supports inheriting the following scheduler policies: + * SCHED_NORMAL + * SCHED_BATCH + * SCHED_FIFO + * SCHED_RR + */ +struct binder_priority { + unsigned int sched_policy; + int prio; +}; + +/** + * struct binder_proc - binder process bookkeeping + * @proc_node: element for binder_procs list + * @threads: rbtree of binder_threads in this proc + * (protected by @inner_lock) + * @nodes: rbtree of binder nodes associated with + * this proc ordered by node->ptr + * (protected by @inner_lock) + * @refs_by_desc: rbtree of refs ordered by ref->desc + * (protected by @outer_lock) + * @refs_by_node: rbtree of refs ordered by ref->node + * (protected by @outer_lock) + * @waiting_threads: threads currently waiting for proc work + * (protected by @inner_lock) + * @pid PID of group_leader of process + * (invariant after initialized) + * @tsk task_struct for group_leader of process + * (invariant after initialized) + * @files files_struct for process + * (invariant after initialized) + * @deferred_work_node: element for binder_deferred_list + * (protected by binder_deferred_lock) + * @deferred_work: bitmap of deferred work to perform + * (protected by binder_deferred_lock) + * @is_dead: process is dead and awaiting free + * when outstanding transactions are cleaned up + * (protected by @inner_lock) + * @todo: list of work for this process + * (protected by @inner_lock) + * @wait: wait queue head to wait for proc work + * (invariant after initialized) + * @stats: per-process binder statistics + * (atomics, no lock needed) + * @delivered_death: list of delivered death notification + * (protected by @inner_lock) + * @max_threads: cap on number of binder threads + * (protected by @inner_lock) + * @requested_threads: number of binder threads requested but not + * yet started. In current implementation, can + * only be 0 or 1. + * (protected by @inner_lock) + * @requested_threads_started: number binder threads started + * (protected by @inner_lock) + * @tmp_ref: temporary reference to indicate proc is in use + * (protected by @inner_lock) + * @default_priority: default scheduler priority + * (invariant after initialized) + * @debugfs_entry: debugfs node + * @alloc: binder allocator bookkeeping + * @context: binder_context for this proc + * (invariant after initialized) + * @inner_lock: can nest under outer_lock and/or node lock + * @outer_lock: no nesting under innor or node lock + * Lock order: 1) outer, 2) node, 3) inner + * + * Bookkeeping structure for binder processes + */ struct binder_proc { struct hlist_node proc_node; struct rb_root threads; struct rb_root nodes; struct rb_root refs_by_desc; struct rb_root refs_by_node; + struct list_head waiting_threads; int pid; - struct vm_area_struct *vma; - struct mm_struct *vma_vm_mm; struct task_struct *tsk; struct files_struct *files; struct hlist_node deferred_work_node; int deferred_work; - void *buffer; - ptrdiff_t user_buffer_offset; + bool is_dead; - struct list_head buffers; - struct rb_root free_buffers; - struct rb_root allocated_buffers; - size_t free_async_space; - - struct page **pages; - size_t buffer_size; - uint32_t buffer_free; struct list_head todo; wait_queue_head_t wait; struct binder_stats stats; @@ -360,10 +565,13 @@ struct binder_proc { int max_threads; int requested_threads; int requested_threads_started; - int ready_threads; - long default_priority; + int tmp_ref; + struct binder_priority default_priority; struct dentry *debugfs_entry; + struct binder_alloc alloc; struct binder_context *context; + spinlock_t inner_lock; + spinlock_t outer_lock; }; enum { @@ -372,22 +580,60 @@ enum { BINDER_LOOPER_STATE_EXITED = 0x04, BINDER_LOOPER_STATE_INVALID = 0x08, BINDER_LOOPER_STATE_WAITING = 0x10, - BINDER_LOOPER_STATE_NEED_RETURN = 0x20 + BINDER_LOOPER_STATE_POLL = 0x20, }; +/** + * struct binder_thread - binder thread bookkeeping + * @proc: binder process for this thread + * (invariant after initialization) + * @rb_node: element for proc->threads rbtree + * (protected by @proc->inner_lock) + * @waiting_thread_node: element for @proc->waiting_threads list + * (protected by @proc->inner_lock) + * @pid: PID for this thread + * (invariant after initialization) + * @looper: bitmap of looping state + * (only accessed by this thread) + * @looper_needs_return: looping thread needs to exit driver + * (no lock needed) + * @transaction_stack: stack of in-progress transactions for this thread + * (protected by @proc->inner_lock) + * @todo: list of work to do for this thread + * (protected by @proc->inner_lock) + * @return_error: transaction errors reported by this thread + * (only accessed by this thread) + * @reply_error: transaction errors reported by target thread + * (protected by @proc->inner_lock) + * @wait: wait queue for thread work + * @stats: per-thread statistics + * (atomics, no lock needed) + * @tmp_ref: temporary reference to indicate thread is in use + * (atomic since @proc->inner_lock cannot + * always be acquired) + * @is_dead: thread is dead and awaiting free + * when outstanding transactions are cleaned up + * (protected by @proc->inner_lock) + * @task: struct task_struct for this thread + * + * Bookkeeping structure for binder threads. + */ struct binder_thread { struct binder_proc *proc; struct rb_node rb_node; + struct list_head waiting_thread_node; int pid; - int looper; + int looper; /* only modified by this thread */ + bool looper_need_return; /* can be written by other thread */ struct binder_transaction *transaction_stack; struct list_head todo; - uint32_t return_error; /* Write failed, return error code in read buf */ - uint32_t return_error2; /* Write failed, return error code in read */ - /* buffer. Used when sending a reply to a dead process that */ - /* we are also waiting on */ + struct binder_error return_error; + struct binder_error reply_error; wait_queue_head_t wait; struct binder_stats stats; + atomic_t tmp_ref; + bool is_dead; + struct task_struct *task; }; struct binder_transaction { @@ -404,13 +650,257 @@ struct binder_transaction { struct binder_buffer *buffer; unsigned int code; unsigned int flags; - long priority; - long saved_priority; + struct binder_priority priority; + struct binder_priority saved_priority; + bool set_priority_called; kuid_t sender_euid; + /** + * @lock: protects @from, @to_proc, and @to_thread + * + * @from, @to_proc, and @to_thread can be set to NULL + * during thread teardown + */ + spinlock_t lock; }; +/** + * binder_proc_lock() - Acquire outer lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Acquires proc->outer_lock. Used to protect binder_ref + * structures associated with the given proc. + */ +#define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) +static void +_binder_proc_lock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&proc->outer_lock); +} + +/** + * binder_proc_unlock() - Release spinlock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Release lock acquired via binder_proc_lock() + */ +#define binder_proc_unlock(_proc) _binder_proc_unlock(_proc, __LINE__) +static void +_binder_proc_unlock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&proc->outer_lock); +} + +/** + * binder_inner_proc_lock() - Acquire inner lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Acquires proc->inner_lock. Used to protect todo lists + */ +#define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) +static void +_binder_inner_proc_lock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&proc->inner_lock); +} + +/** + * binder_inner_proc_unlock() - Release inner lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Release lock acquired via binder_inner_proc_lock() + */ +#define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) +static void +_binder_inner_proc_unlock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&proc->inner_lock); +} + +/** + * binder_node_lock() - Acquire spinlock for given binder_node + * @node: struct binder_node to acquire + * + * Acquires node->lock. Used to protect binder_node fields + */ +#define binder_node_lock(node) _binder_node_lock(node, __LINE__) +static void +_binder_node_lock(struct binder_node *node, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&node->lock); +} + +/** + * binder_node_unlock() - Release spinlock for given binder_proc + * @node: struct binder_node to acquire + * + * Release lock acquired via binder_node_lock() + */ +#define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) +static void +_binder_node_unlock(struct binder_node *node, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&node->lock); +} + +/** + * binder_node_inner_lock() - Acquire node and inner locks + * @node: struct binder_node to acquire + * + * Acquires node->lock. If node->proc also acquires + * proc->inner_lock. Used to protect binder_node fields + */ +#define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) +static void +_binder_node_inner_lock(struct binder_node *node, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&node->lock); + if (node->proc) + binder_inner_proc_lock(node->proc); +} + +/** + * binder_node_unlock() - Release node and inner locks + * @node: struct binder_node to acquire + * + * Release lock acquired via binder_node_lock() + */ +#define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) +static void +_binder_node_inner_unlock(struct binder_node *node, int line) +{ + struct binder_proc *proc = node->proc; + + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + if (proc) + binder_inner_proc_unlock(proc); + spin_unlock(&node->lock); +} + +static bool binder_worklist_empty_ilocked(struct list_head *list) +{ + return list_empty(list); +} + +/** + * binder_worklist_empty() - Check if no items on the work list + * @proc: binder_proc associated with list + * @list: list to check + * + * Return: true if there are no items on list, else false + */ +static bool binder_worklist_empty(struct binder_proc *proc, + struct list_head *list) +{ + bool ret; + + binder_inner_proc_lock(proc); + ret = binder_worklist_empty_ilocked(list); + binder_inner_proc_unlock(proc); + return ret; +} + +static void +binder_enqueue_work_ilocked(struct binder_work *work, + struct list_head *target_list) +{ + BUG_ON(target_list == NULL); + BUG_ON(work->entry.next && !list_empty(&work->entry)); + list_add_tail(&work->entry, target_list); +} + +/** + * binder_enqueue_work() - Add an item to the work list + * @proc: binder_proc associated with list + * @work: struct binder_work to add to list + * @target_list: list to add work to + * + * Adds the work to the specified list. Asserts that work + * is not already on a list. + */ +static void +binder_enqueue_work(struct binder_proc *proc, + struct binder_work *work, + struct list_head *target_list) +{ + binder_inner_proc_lock(proc); + binder_enqueue_work_ilocked(work, target_list); + binder_inner_proc_unlock(proc); +} + +static void +binder_dequeue_work_ilocked(struct binder_work *work) +{ + list_del_init(&work->entry); +} + +/** + * binder_dequeue_work() - Removes an item from the work list + * @proc: binder_proc associated with list + * @work: struct binder_work to remove from list + * + * Removes the specified work item from whatever list it is on. + * Can safely be called if work is not on any list. + */ +static void +binder_dequeue_work(struct binder_proc *proc, struct binder_work *work) +{ + binder_inner_proc_lock(proc); + binder_dequeue_work_ilocked(work); + binder_inner_proc_unlock(proc); +} + +static struct binder_work *binder_dequeue_work_head_ilocked( + struct list_head *list) +{ + struct binder_work *w; + + w = list_first_entry_or_null(list, struct binder_work, entry); + if (w) + list_del_init(&w->entry); + return w; +} + +/** + * binder_dequeue_work_head() - Dequeues the item at head of list + * @proc: binder_proc associated with list + * @list: list to dequeue head + * + * Removes the head of the list if there are items on the list + * + * Return: pointer dequeued binder_work, NULL if list was empty + */ +static struct binder_work *binder_dequeue_work_head( + struct binder_proc *proc, + struct list_head *list) +{ + struct binder_work *w; + + binder_inner_proc_lock(proc); + w = binder_dequeue_work_head_ilocked(list); + binder_inner_proc_unlock(proc); + return w; +} + static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); +static void binder_free_thread(struct binder_thread *thread); +static void binder_free_proc(struct binder_proc *proc); +static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { @@ -461,463 +951,281 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) return retval; } -static inline void binder_lock(struct binder_context *context, const char *tag) +static bool binder_has_work_ilocked(struct binder_thread *thread, + bool do_proc_work) { - trace_binder_lock(tag); - mutex_lock(&context->binder_main_lock); - trace_binder_locked(tag); + return !binder_worklist_empty_ilocked(&thread->todo) || + thread->looper_need_return || + (do_proc_work && + !binder_worklist_empty_ilocked(&thread->proc->todo)); } -static inline void binder_unlock(struct binder_context *context, - const char *tag) +static bool binder_has_work(struct binder_thread *thread, bool do_proc_work) { - trace_binder_unlock(tag); - mutex_unlock(&context->binder_main_lock); -} + bool has_work; -static void binder_set_nice(long nice) -{ - long min_nice; + binder_inner_proc_lock(thread->proc); + has_work = binder_has_work_ilocked(thread, do_proc_work); + binder_inner_proc_unlock(thread->proc); - if (can_nice(current, nice)) { - set_user_nice(current, nice); - return; - } - min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur); - binder_debug(BINDER_DEBUG_PRIORITY_CAP, - "%d: nice value %ld not allowed use %ld instead\n", - current->pid, nice, min_nice); - set_user_nice(current, min_nice); - if (min_nice <= MAX_NICE) - return; - binder_user_error("%d RLIMIT_NICE not set\n", current->pid); + return has_work; } -static size_t binder_buffer_size(struct binder_proc *proc, - struct binder_buffer *buffer) +static bool binder_available_for_proc_work_ilocked(struct binder_thread *thread) { - if (list_is_last(&buffer->entry, &proc->buffers)) - return proc->buffer + proc->buffer_size - (void *)buffer->data; - return (size_t)list_entry(buffer->entry.next, - struct binder_buffer, entry) - (size_t)buffer->data; + return !thread->transaction_stack && + binder_worklist_empty_ilocked(&thread->todo) && + (thread->looper & (BINDER_LOOPER_STATE_ENTERED | + BINDER_LOOPER_STATE_REGISTERED)); } -static void binder_insert_free_buffer(struct binder_proc *proc, - struct binder_buffer *new_buffer) +static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc, + bool sync) { - struct rb_node **p = &proc->free_buffers.rb_node; - struct rb_node *parent = NULL; - struct binder_buffer *buffer; - size_t buffer_size; - size_t new_buffer_size; - - BUG_ON(!new_buffer->free); - - new_buffer_size = binder_buffer_size(proc, new_buffer); - - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: add free buffer, size %zd, at %p\n", - proc->pid, new_buffer_size, new_buffer); - - while (*p) { - parent = *p; - buffer = rb_entry(parent, struct binder_buffer, rb_node); - BUG_ON(!buffer->free); - - buffer_size = binder_buffer_size(proc, buffer); + struct rb_node *n; + struct binder_thread *thread; - if (new_buffer_size < buffer_size) - p = &parent->rb_left; - else - p = &parent->rb_right; + for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->looper & BINDER_LOOPER_STATE_POLL && + binder_available_for_proc_work_ilocked(thread)) { + if (sync) + wake_up_interruptible_sync(&thread->wait); + else + wake_up_interruptible(&thread->wait); + } } - rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &proc->free_buffers); } -static void binder_insert_allocated_buffer(struct binder_proc *proc, - struct binder_buffer *new_buffer) +/** + * binder_select_thread_ilocked() - selects a thread for doing proc work. + * @proc: process to select a thread from + * + * Note that calling this function moves the thread off the waiting_threads + * list, so it can only be woken up by the caller of this function, or a + * signal. Therefore, callers *should* always wake up the thread this function + * returns. + * + * Return: If there's a thread currently waiting for process work, + * returns that thread. Otherwise returns NULL. + */ +static struct binder_thread * +binder_select_thread_ilocked(struct binder_proc *proc) { - struct rb_node **p = &proc->allocated_buffers.rb_node; - struct rb_node *parent = NULL; - struct binder_buffer *buffer; + struct binder_thread *thread; - BUG_ON(new_buffer->free); + assert_spin_locked(&proc->inner_lock); + thread = list_first_entry_or_null(&proc->waiting_threads, + struct binder_thread, + waiting_thread_node); - while (*p) { - parent = *p; - buffer = rb_entry(parent, struct binder_buffer, rb_node); - BUG_ON(buffer->free); + if (thread) + list_del_init(&thread->waiting_thread_node); - if (new_buffer < buffer) - p = &parent->rb_left; - else if (new_buffer > buffer) - p = &parent->rb_right; - else - BUG(); - } - rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &proc->allocated_buffers); + return thread; } -static struct binder_buffer *binder_buffer_lookup(struct binder_proc *proc, - uintptr_t user_ptr) +/** + * binder_wakeup_thread_ilocked() - wakes up a thread for doing proc work. + * @proc: process to wake up a thread in + * @thread: specific thread to wake-up (may be NULL) + * @sync: whether to do a synchronous wake-up + * + * This function wakes up a thread in the @proc process. + * The caller may provide a specific thread to wake-up in + * the @thread parameter. If @thread is NULL, this function + * will wake up threads that have called poll(). + * + * Note that for this function to work as expected, callers + * should first call binder_select_thread() to find a thread + * to handle the work (if they don't have a thread already), + * and pass the result into the @thread parameter. + */ +static void binder_wakeup_thread_ilocked(struct binder_proc *proc, + struct binder_thread *thread, + bool sync) { - struct rb_node *n = proc->allocated_buffers.rb_node; - struct binder_buffer *buffer; - struct binder_buffer *kern_ptr; - - kern_ptr = (struct binder_buffer *)(user_ptr - proc->user_buffer_offset - - offsetof(struct binder_buffer, data)); - - while (n) { - buffer = rb_entry(n, struct binder_buffer, rb_node); - BUG_ON(buffer->free); + assert_spin_locked(&proc->inner_lock); - if (kern_ptr < buffer) - n = n->rb_left; - else if (kern_ptr > buffer) - n = n->rb_right; + if (thread) { + if (sync) + wake_up_interruptible_sync(&thread->wait); else - return buffer; + wake_up_interruptible(&thread->wait); + return; } - return NULL; + + /* Didn't find a thread waiting for proc work; this can happen + * in two scenarios: + * 1. All threads are busy handling transactions + * In that case, one of those threads should call back into + * the kernel driver soon and pick up this work. + * 2. Threads are using the (e)poll interface, in which case + * they may be blocked on the waitqueue without having been + * added to waiting_threads. For this case, we just iterate + * over all threads not handling transaction work, and + * wake them all up. We wake all because we don't know whether + * a thread that called into (e)poll is handling non-binder + * work currently. + */ + binder_wakeup_poll_threads_ilocked(proc, sync); } -static int binder_update_page_range(struct binder_proc *proc, int allocate, - void *start, void *end, - struct vm_area_struct *vma) +static void binder_wakeup_proc_ilocked(struct binder_proc *proc) { - void *page_addr; - unsigned long user_page_addr; - struct page **page; - struct mm_struct *mm; - - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: %s pages %p-%p\n", proc->pid, - allocate ? "allocate" : "free", start, end); + struct binder_thread *thread = binder_select_thread_ilocked(proc); - if (end <= start) - return 0; - - trace_binder_update_page_range(proc, allocate, start, end); - - if (vma) - mm = NULL; - else - mm = get_task_mm(proc->tsk); - - if (mm) { - down_write(&mm->mmap_sem); - vma = proc->vma; - if (vma && mm != proc->vma_vm_mm) { - pr_err("%d: vma mm and task mm mismatch\n", - proc->pid); - vma = NULL; - } - } - - if (allocate == 0) - goto free_range; + binder_wakeup_thread_ilocked(proc, thread, /* sync = */false); +} - if (vma == NULL) { - pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n", - proc->pid); - goto err_no_vma; - } +static bool is_rt_policy(int policy) +{ + return policy == SCHED_FIFO || policy == SCHED_RR; +} - for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { - int ret; +static bool is_fair_policy(int policy) +{ + return policy == SCHED_NORMAL || policy == SCHED_BATCH; +} - page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; +static bool binder_supported_policy(int policy) +{ + return is_fair_policy(policy) || is_rt_policy(policy); +} - BUG_ON(*page); - *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); - if (*page == NULL) { - pr_err("%d: binder_alloc_buf failed for page at %p\n", - proc->pid, page_addr); - goto err_alloc_page_failed; - } - ret = map_kernel_range_noflush((unsigned long)page_addr, - PAGE_SIZE, PAGE_KERNEL, page); - flush_cache_vmap((unsigned long)page_addr, - (unsigned long)page_addr + PAGE_SIZE); - if (ret != 1) { - pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", - proc->pid, page_addr); - goto err_map_kernel_failed; - } - user_page_addr = - (uintptr_t)page_addr + proc->user_buffer_offset; - ret = vm_insert_page(vma, user_page_addr, page[0]); - if (ret) { - pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", - proc->pid, user_page_addr); - goto err_vm_insert_page_failed; - } - /* vm_insert_page does not seem to increment the refcount */ - } - if (mm) { - up_write(&mm->mmap_sem); - mmput(mm); - } - return 0; +static int to_userspace_prio(int policy, int kernel_priority) +{ + if (is_fair_policy(policy)) + return PRIO_TO_NICE(kernel_priority); + else + return MAX_USER_RT_PRIO - 1 - kernel_priority; +} -free_range: - for (page_addr = end - PAGE_SIZE; page_addr >= start; - page_addr -= PAGE_SIZE) { - page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; - if (vma) - zap_page_range(vma, (uintptr_t)page_addr + - proc->user_buffer_offset, PAGE_SIZE, NULL); -err_vm_insert_page_failed: - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); -err_map_kernel_failed: - __free_page(*page); - *page = NULL; -err_alloc_page_failed: - ; - } -err_no_vma: - if (mm) { - up_write(&mm->mmap_sem); - mmput(mm); - } - return -ENOMEM; +static int to_kernel_prio(int policy, int user_priority) +{ + if (is_fair_policy(policy)) + return NICE_TO_PRIO(user_priority); + else + return MAX_USER_RT_PRIO - 1 - user_priority; } -static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, - size_t data_size, - size_t offsets_size, - size_t extra_buffers_size, - int is_async) +static void binder_do_set_priority(struct task_struct *task, + struct binder_priority desired, + bool verify) { - struct rb_node *n = proc->free_buffers.rb_node; - struct binder_buffer *buffer; - size_t buffer_size; - struct rb_node *best_fit = NULL; - void *has_page_addr; - void *end_page_addr; - size_t size, data_offsets_size; - - if (proc->vma == NULL) { - pr_err("%d: binder_alloc_buf, no vma\n", - proc->pid); - return NULL; - } + int priority; /* user-space prio value */ + bool has_cap_nice; + unsigned int policy = desired.sched_policy; - data_offsets_size = ALIGN(data_size, sizeof(void *)) + - ALIGN(offsets_size, sizeof(void *)); + if (task->policy == policy && task->normal_prio == desired.prio) + return; - if (data_offsets_size < data_size || data_offsets_size < offsets_size) { - binder_user_error("%d: got transaction with invalid size %zd-%zd\n", - proc->pid, data_size, offsets_size); - return NULL; - } - size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); - if (size < data_offsets_size || size < extra_buffers_size) { - binder_user_error("%d: got transaction with invalid extra_buffers_size %zd\n", - proc->pid, extra_buffers_size); - return NULL; - } - if (is_async && - proc->free_async_space < size + sizeof(struct binder_buffer)) { - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd failed, no async space left\n", - proc->pid, size); - return NULL; - } + has_cap_nice = has_capability_noaudit(task, CAP_SYS_NICE); - while (n) { - buffer = rb_entry(n, struct binder_buffer, rb_node); - BUG_ON(!buffer->free); - buffer_size = binder_buffer_size(proc, buffer); + priority = to_userspace_prio(policy, desired.prio); - if (size < buffer_size) { - best_fit = n; - n = n->rb_left; - } else if (size > buffer_size) - n = n->rb_right; - else { - best_fit = n; - break; + if (verify && is_rt_policy(policy) && !has_cap_nice) { + long max_rtprio = task_rlimit(task, RLIMIT_RTPRIO); + + if (max_rtprio == 0) { + policy = SCHED_NORMAL; + priority = MIN_NICE; + } else if (priority > max_rtprio) { + priority = max_rtprio; } } - if (best_fit == NULL) { - pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", - proc->pid, size); - return NULL; - } - if (n == NULL) { - buffer = rb_entry(best_fit, struct binder_buffer, rb_node); - buffer_size = binder_buffer_size(proc, buffer); - } - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got buffer %p size %zd\n", - proc->pid, size, buffer, buffer_size); + if (verify && is_fair_policy(policy) && !has_cap_nice) { + long min_nice = rlimit_to_nice(task_rlimit(task, RLIMIT_NICE)); - has_page_addr = - (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK); - if (n == NULL) { - if (size + sizeof(struct binder_buffer) + 4 >= buffer_size) - buffer_size = size; /* no room for other buffers */ - else - buffer_size = size + sizeof(struct binder_buffer); + if (min_nice > MAX_NICE) { + binder_user_error("%d RLIMIT_NICE not set\n", + task->pid); + return; + } else if (priority < min_nice) { + priority = min_nice; + } } - end_page_addr = - (void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size); - if (end_page_addr > has_page_addr) - end_page_addr = has_page_addr; - if (binder_update_page_range(proc, 1, - (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL)) - return NULL; - rb_erase(best_fit, &proc->free_buffers); - buffer->free = 0; - binder_insert_allocated_buffer(proc, buffer); - if (buffer_size != size) { - struct binder_buffer *new_buffer = (void *)buffer->data + size; + if (policy != desired.sched_policy || + to_kernel_prio(policy, priority) != desired.prio) + binder_debug(BINDER_DEBUG_PRIORITY_CAP, + "%d: priority %d not allowed, using %d instead\n", + task->pid, desired.prio, + to_kernel_prio(policy, priority)); - list_add(&new_buffer->entry, &buffer->entry); - new_buffer->free = 1; - binder_insert_free_buffer(proc, new_buffer); - } - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got %p\n", - proc->pid, size, buffer); - buffer->data_size = data_size; - buffer->offsets_size = offsets_size; - buffer->extra_buffers_size = extra_buffers_size; - buffer->async_transaction = is_async; - if (is_async) { - proc->free_async_space -= size + sizeof(struct binder_buffer); - binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, - "%d: binder_alloc_buf size %zd async free %zd\n", - proc->pid, size, proc->free_async_space); - } + /* Set the actual priority */ + if (task->policy != policy || is_rt_policy(policy)) { + struct sched_param params; - return buffer; -} + params.sched_priority = is_rt_policy(policy) ? priority : 0; -static void *buffer_start_page(struct binder_buffer *buffer) -{ - return (void *)((uintptr_t)buffer & PAGE_MASK); + sched_setscheduler_nocheck(task, + policy | SCHED_RESET_ON_FORK, + ¶ms); + } + if (is_fair_policy(policy)) + set_user_nice(task, priority); } -static void *buffer_end_page(struct binder_buffer *buffer) +static void binder_set_priority(struct task_struct *task, + struct binder_priority desired) { - return (void *)(((uintptr_t)(buffer + 1) - 1) & PAGE_MASK); + binder_do_set_priority(task, desired, /* verify = */ true); } -static void binder_delete_free_buffer(struct binder_proc *proc, - struct binder_buffer *buffer) +static void binder_restore_priority(struct task_struct *task, + struct binder_priority desired) { - struct binder_buffer *prev, *next = NULL; - int free_page_end = 1; - int free_page_start = 1; - - BUG_ON(proc->buffers.next == &buffer->entry); - prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); - BUG_ON(!prev->free); - if (buffer_end_page(prev) == buffer_start_page(buffer)) { - free_page_start = 0; - if (buffer_end_page(prev) == buffer_end_page(buffer)) - free_page_end = 0; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p share page with %p\n", - proc->pid, buffer, prev); - } - - if (!list_is_last(&buffer->entry, &proc->buffers)) { - next = list_entry(buffer->entry.next, - struct binder_buffer, entry); - if (buffer_start_page(next) == buffer_end_page(buffer)) { - free_page_end = 0; - if (buffer_start_page(next) == - buffer_start_page(buffer)) - free_page_start = 0; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p share page with %p\n", - proc->pid, buffer, prev); - } - } - list_del(&buffer->entry); - if (free_page_start || free_page_end) { - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p do not share page%s%s with %p or %p\n", - proc->pid, buffer, free_page_start ? "" : " end", - free_page_end ? "" : " start", prev, next); - binder_update_page_range(proc, 0, free_page_start ? - buffer_start_page(buffer) : buffer_end_page(buffer), - (free_page_end ? buffer_end_page(buffer) : - buffer_start_page(buffer)) + PAGE_SIZE, NULL); - } + binder_do_set_priority(task, desired, /* verify = */ false); } -static void binder_free_buf(struct binder_proc *proc, - struct binder_buffer *buffer) +static void binder_transaction_priority(struct task_struct *task, + struct binder_transaction *t, + struct binder_priority node_prio, + bool inherit_rt) { - size_t size, buffer_size; - - buffer_size = binder_buffer_size(proc, buffer); + struct binder_priority desired_prio; - size = ALIGN(buffer->data_size, sizeof(void *)) + - ALIGN(buffer->offsets_size, sizeof(void *)) + - ALIGN(buffer->extra_buffers_size, sizeof(void *)); - - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_free_buf %p size %zd buffer_size %zd\n", - proc->pid, buffer, size, buffer_size); - - BUG_ON(buffer->free); - BUG_ON(size > buffer_size); - BUG_ON(buffer->transaction != NULL); - BUG_ON((void *)buffer < proc->buffer); - BUG_ON((void *)buffer > proc->buffer + proc->buffer_size); + if (t->set_priority_called) + return; - if (buffer->async_transaction) { - proc->free_async_space += size + sizeof(struct binder_buffer); + t->set_priority_called = true; + t->saved_priority.sched_policy = task->policy; + t->saved_priority.prio = task->normal_prio; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, - "%d: binder_free_buf size %zd async free %zd\n", - proc->pid, size, proc->free_async_space); + if (!inherit_rt && is_rt_policy(desired_prio.sched_policy)) { + desired_prio.prio = NICE_TO_PRIO(0); + desired_prio.sched_policy = SCHED_NORMAL; + } else { + desired_prio.prio = t->priority.prio; + desired_prio.sched_policy = t->priority.sched_policy; } - binder_update_page_range(proc, 0, - (void *)PAGE_ALIGN((uintptr_t)buffer->data), - (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), - NULL); - rb_erase(&buffer->rb_node, &proc->allocated_buffers); - buffer->free = 1; - if (!list_is_last(&buffer->entry, &proc->buffers)) { - struct binder_buffer *next = list_entry(buffer->entry.next, - struct binder_buffer, entry); - - if (next->free) { - rb_erase(&next->rb_node, &proc->free_buffers); - binder_delete_free_buffer(proc, next); - } - } - if (proc->buffers.next != &buffer->entry) { - struct binder_buffer *prev = list_entry(buffer->entry.prev, - struct binder_buffer, entry); - - if (prev->free) { - binder_delete_free_buffer(proc, buffer); - rb_erase(&prev->rb_node, &proc->free_buffers); - buffer = prev; - } + if (node_prio.prio < t->priority.prio || + (node_prio.prio == t->priority.prio && + node_prio.sched_policy == SCHED_FIFO)) { + /* + * In case the minimum priority on the node is + * higher (lower value), use that priority. If + * the priority is the same, but the node uses + * SCHED_FIFO, prefer SCHED_FIFO, since it can + * run unbounded, unlike SCHED_RR. + */ + desired_prio = node_prio; } - binder_insert_free_buffer(proc, buffer); + + binder_set_priority(task, desired_prio); } -static struct binder_node *binder_get_node(struct binder_proc *proc, - binder_uintptr_t ptr) +static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, + binder_uintptr_t ptr) { struct rb_node *n = proc->nodes.rb_node; struct binder_node *node; + assert_spin_locked(&proc->inner_lock); + while (n) { node = rb_entry(n, struct binder_node, rb_node); @@ -925,21 +1233,47 @@ static struct binder_node *binder_get_node(struct binder_proc *proc, n = n->rb_left; else if (ptr > node->ptr) n = n->rb_right; - else + else { + /* + * take an implicit weak reference + * to ensure node stays alive until + * call to binder_put_node() + */ + binder_inc_node_tmpref_ilocked(node); return node; + } } return NULL; } -static struct binder_node *binder_new_node(struct binder_proc *proc, - binder_uintptr_t ptr, - binder_uintptr_t cookie) +static struct binder_node *binder_get_node(struct binder_proc *proc, + binder_uintptr_t ptr) +{ + struct binder_node *node; + + binder_inner_proc_lock(proc); + node = binder_get_node_ilocked(proc, ptr); + binder_inner_proc_unlock(proc); + return node; +} + +static struct binder_node *binder_init_node_ilocked( + struct binder_proc *proc, + struct binder_node *new_node, + struct flat_binder_object *fp) { struct rb_node **p = &proc->nodes.rb_node; struct rb_node *parent = NULL; struct binder_node *node; + binder_uintptr_t ptr = fp ? fp->binder : 0; + binder_uintptr_t cookie = fp ? fp->cookie : 0; + __u32 flags = fp ? fp->flags : 0; + s8 priority; + + assert_spin_locked(&proc->inner_lock); while (*p) { + parent = *p; node = rb_entry(parent, struct binder_node, rb_node); @@ -947,14 +1281,19 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, p = &(*p)->rb_left; else if (ptr > node->ptr) p = &(*p)->rb_right; - else - return NULL; + else { + /* + * A matching node is already in + * the rb tree. Abandon the init + * and return it. + */ + binder_inc_node_tmpref_ilocked(node); + return node; + } } - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (node == NULL) - return NULL; + node = new_node; binder_stats_created(BINDER_STAT_NODE); + node->tmp_refs++; rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); node->debug_id = atomic_inc_return(&binder_last_id); @@ -962,18 +1301,58 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; + priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->sched_policy = (flags & FLAT_BINDER_FLAG_PRIORITY_MASK) >> + FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT; + node->min_priority = to_kernel_prio(node->sched_policy, priority); + node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->inherit_rt = !!(flags & FLAT_BINDER_FLAG_INHERIT_RT); + spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d:%d node %d u%016llx c%016llx created\n", proc->pid, current->pid, node->debug_id, (u64)node->ptr, (u64)node->cookie); + return node; } -static int binder_inc_node(struct binder_node *node, int strong, int internal, - struct list_head *target_list) +static struct binder_node *binder_new_node(struct binder_proc *proc, + struct flat_binder_object *fp) +{ + struct binder_node *node; + struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!new_node) + return NULL; + binder_inner_proc_lock(proc); + node = binder_init_node_ilocked(proc, new_node, fp); + binder_inner_proc_unlock(proc); + if (node != new_node) + /* + * The node was already added by another thread + */ + kfree(new_node); + + return node; +} + +static void binder_free_node(struct binder_node *node) +{ + kfree(node); + binder_stats_deleted(BINDER_STAT_NODE); +} + +static int binder_inc_node_nilocked(struct binder_node *node, int strong, + int internal, + struct list_head *target_list) { + struct binder_proc *proc = node->proc; + + assert_spin_locked(&node->lock); + if (proc) + assert_spin_locked(&proc->inner_lock); if (strong) { if (internal) { if (target_list == NULL && @@ -990,8 +1369,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, } else node->local_strong_refs++; if (!node->has_strong_ref && target_list) { - list_del_init(&node->work.entry); - list_add_tail(&node->work.entry, target_list); + binder_dequeue_work_ilocked(&node->work); + binder_enqueue_work_ilocked(&node->work, target_list); } } else { if (!internal) @@ -1002,58 +1381,169 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, node->debug_id); return -EINVAL; } - list_add_tail(&node->work.entry, target_list); + binder_enqueue_work_ilocked(&node->work, target_list); } } return 0; } -static int binder_dec_node(struct binder_node *node, int strong, int internal) +static int binder_inc_node(struct binder_node *node, int strong, int internal, + struct list_head *target_list) +{ + int ret; + + binder_node_inner_lock(node); + ret = binder_inc_node_nilocked(node, strong, internal, target_list); + binder_node_inner_unlock(node); + + return ret; +} + +static bool binder_dec_node_nilocked(struct binder_node *node, + int strong, int internal) { + struct binder_proc *proc = node->proc; + + assert_spin_locked(&node->lock); + if (proc) + assert_spin_locked(&proc->inner_lock); if (strong) { if (internal) node->internal_strong_refs--; else node->local_strong_refs--; if (node->local_strong_refs || node->internal_strong_refs) - return 0; + return false; } else { if (!internal) node->local_weak_refs--; - if (node->local_weak_refs || !hlist_empty(&node->refs)) - return 0; + if (node->local_weak_refs || node->tmp_refs || + !hlist_empty(&node->refs)) + return false; } - if (node->proc && (node->has_strong_ref || node->has_weak_ref)) { + + if (proc && (node->has_strong_ref || node->has_weak_ref)) { if (list_empty(&node->work.entry)) { - list_add_tail(&node->work.entry, &node->proc->todo); - wake_up_interruptible(&node->proc->wait); + binder_enqueue_work_ilocked(&node->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); } } else { if (hlist_empty(&node->refs) && !node->local_strong_refs && - !node->local_weak_refs) { - list_del_init(&node->work.entry); - if (node->proc) { - rb_erase(&node->rb_node, &node->proc->nodes); + !node->local_weak_refs && !node->tmp_refs) { + if (proc) { + binder_dequeue_work_ilocked(&node->work); + rb_erase(&node->rb_node, &proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "refless node %d deleted\n", node->debug_id); } else { + BUG_ON(!list_empty(&node->work.entry)); + spin_lock(&binder_dead_nodes_lock); + /* + * tmp_refs could have changed so + * check it again + */ + if (node->tmp_refs) { + spin_unlock(&binder_dead_nodes_lock); + return false; + } hlist_del(&node->dead_node); + spin_unlock(&binder_dead_nodes_lock); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "dead node %d deleted\n", node->debug_id); } - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); + return true; } } + return false; +} - return 0; +static void binder_dec_node(struct binder_node *node, int strong, int internal) +{ + bool free_node; + + binder_node_inner_lock(node); + free_node = binder_dec_node_nilocked(node, strong, internal); + binder_node_inner_unlock(node); + if (free_node) + binder_free_node(node); +} + +static void binder_inc_node_tmpref_ilocked(struct binder_node *node) +{ + /* + * No call to binder_inc_node() is needed since we + * don't need to inform userspace of any changes to + * tmp_refs + */ + node->tmp_refs++; +} + +/** + * binder_inc_node_tmpref() - take a temporary reference on node + * @node: node to reference + * + * Take reference on node to prevent the node from being freed + * while referenced only by a local variable. The inner lock is + * needed to serialize with the node work on the queue (which + * isn't needed after the node is dead). If the node is dead + * (node->proc is NULL), use binder_dead_nodes_lock to protect + * node->tmp_refs against dead-node-only cases where the node + * lock cannot be acquired (eg traversing the dead node list to + * print nodes) + */ +static void binder_inc_node_tmpref(struct binder_node *node) +{ + binder_node_lock(node); + if (node->proc) + binder_inner_proc_lock(node->proc); + else + spin_lock(&binder_dead_nodes_lock); + binder_inc_node_tmpref_ilocked(node); + if (node->proc) + binder_inner_proc_unlock(node->proc); + else + spin_unlock(&binder_dead_nodes_lock); + binder_node_unlock(node); +} + +/** + * binder_dec_node_tmpref() - remove a temporary reference on node + * @node: node to reference + * + * Release temporary reference on node taken via binder_inc_node_tmpref() + */ +static void binder_dec_node_tmpref(struct binder_node *node) +{ + bool free_node; + + binder_node_inner_lock(node); + if (!node->proc) + spin_lock(&binder_dead_nodes_lock); + node->tmp_refs--; + BUG_ON(node->tmp_refs < 0); + if (!node->proc) + spin_unlock(&binder_dead_nodes_lock); + /* + * Call binder_dec_node() to check if all refcounts are 0 + * and cleanup is needed. Calling with strong=0 and internal=1 + * causes no actual reference to be released in binder_dec_node(). + * If that changes, a change is needed here too. + */ + free_node = binder_dec_node_nilocked(node, 0, 1); + binder_node_inner_unlock(node); + if (free_node) + binder_free_node(node); } +static void binder_put_node(struct binder_node *node) +{ + binder_dec_node_tmpref(node); +} -static struct binder_ref *binder_get_ref(struct binder_proc *proc, - u32 desc, bool need_strong_ref) +static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, + u32 desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; @@ -1061,11 +1551,11 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (desc < ref->desc) { + if (desc < ref->data.desc) { n = n->rb_left; - } else if (desc > ref->desc) { + } else if (desc > ref->data.desc) { n = n->rb_right; - } else if (need_strong_ref && !ref->strong) { + } else if (need_strong_ref && !ref->data.strong) { binder_user_error("tried to use weak ref as strong ref\n"); return NULL; } else { @@ -1075,14 +1565,34 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, return NULL; } -static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, - struct binder_node *node) +/** + * binder_get_ref_for_node_olocked() - get the ref associated with given node + * @proc: binder_proc that owns the ref + * @node: binder_node of target + * @new_ref: newly allocated binder_ref to be initialized or %NULL + * + * Look up the ref for the given node and return it if it exists + * + * If it doesn't exist and the caller provides a newly allocated + * ref, initialize the fields of the newly allocated ref and insert + * into the given proc rb_trees and node refs list. + * + * Return: the ref for node. It is possible that another thread + * allocated/initialized the ref first in which case the + * returned ref would be different than the passed-in + * new_ref. new_ref must be kfree'd by the caller in + * this case. + */ +static struct binder_ref *binder_get_ref_for_node_olocked( + struct binder_proc *proc, + struct binder_node *node, + struct binder_ref *new_ref) { - struct rb_node *n; + struct binder_context *context = proc->context; struct rb_node **p = &proc->refs_by_node.rb_node; struct rb_node *parent = NULL; - struct binder_ref *ref, *new_ref; - struct binder_context *context = proc->context; + struct binder_ref *ref; + struct rb_node *n; while (*p) { parent = *p; @@ -1095,22 +1605,22 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, else return ref; } - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); - if (new_ref == NULL) + if (!new_ref) return NULL; + binder_stats_created(BINDER_STAT_REF); - new_ref->debug_id = atomic_inc_return(&binder_last_id); + new_ref->data.debug_id = atomic_inc_return(&binder_last_id); new_ref->proc = proc; new_ref->node = node; rb_link_node(&new_ref->rb_node_node, parent, p); rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node); - new_ref->desc = (node == context->binder_context_mgr_node) ? 0 : 1; + new_ref->data.desc = (node == context->binder_context_mgr_node) ? 0 : 1; for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (ref->desc > new_ref->desc) + if (ref->data.desc > new_ref->data.desc) break; - new_ref->desc = ref->desc + 1; + new_ref->data.desc = ref->data.desc + 1; } p = &proc->refs_by_desc.rb_node; @@ -1118,121 +1628,423 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, parent = *p; ref = rb_entry(parent, struct binder_ref, rb_node_desc); - if (new_ref->desc < ref->desc) + if (new_ref->data.desc < ref->data.desc) p = &(*p)->rb_left; - else if (new_ref->desc > ref->desc) + else if (new_ref->data.desc > ref->data.desc) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new_ref->rb_node_desc, parent, p); rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc); - if (node) { - hlist_add_head(&new_ref->node_entry, &node->refs); - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d new ref %d desc %d for node %d\n", - proc->pid, new_ref->debug_id, new_ref->desc, - node->debug_id); - } else { - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d new ref %d desc %d for dead node\n", - proc->pid, new_ref->debug_id, new_ref->desc); - } + binder_node_lock(node); + hlist_add_head(&new_ref->node_entry, &node->refs); + + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d new ref %d desc %d for node %d\n", + proc->pid, new_ref->data.debug_id, new_ref->data.desc, + node->debug_id); + binder_node_unlock(node); return new_ref; } -static void binder_delete_ref(struct binder_ref *ref) +static void binder_cleanup_ref_olocked(struct binder_ref *ref) { + bool delete_node = false; + binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d delete ref %d desc %d for node %d\n", - ref->proc->pid, ref->debug_id, ref->desc, + ref->proc->pid, ref->data.debug_id, ref->data.desc, ref->node->debug_id); rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc); rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node); - if (ref->strong) - binder_dec_node(ref->node, 1, 1); + + binder_node_inner_lock(ref->node); + if (ref->data.strong) + binder_dec_node_nilocked(ref->node, 1, 1); + hlist_del(&ref->node_entry); - binder_dec_node(ref->node, 0, 1); + delete_node = binder_dec_node_nilocked(ref->node, 0, 1); + binder_node_inner_unlock(ref->node); + /* + * Clear ref->node unless we want the caller to free the node + */ + if (!delete_node) { + /* + * The caller uses ref->node to determine + * whether the node needs to be freed. Clear + * it since the node is still alive. + */ + ref->node = NULL; + } + if (ref->death) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "%d delete ref %d desc %d has death notification\n", - ref->proc->pid, ref->debug_id, ref->desc); - list_del(&ref->death->work.entry); - kfree(ref->death); + ref->proc->pid, ref->data.debug_id, + ref->data.desc); + binder_dequeue_work(ref->proc, &ref->death->work); binder_stats_deleted(BINDER_STAT_DEATH); } - kfree(ref); binder_stats_deleted(BINDER_STAT_REF); } -static int binder_inc_ref(struct binder_ref *ref, int strong, - struct list_head *target_list) +/** + * binder_inc_ref_olocked() - increment the ref for given handle + * @ref: ref to be incremented + * @strong: if true, strong increment, else weak + * @target_list: list to queue node work on + * + * Increment the ref. @ref->proc->outer_lock must be held on entry + * + * Return: 0, if successful, else errno + */ +static int binder_inc_ref_olocked(struct binder_ref *ref, int strong, + struct list_head *target_list) { int ret; if (strong) { - if (ref->strong == 0) { + if (ref->data.strong == 0) { ret = binder_inc_node(ref->node, 1, 1, target_list); if (ret) return ret; } - ref->strong++; + ref->data.strong++; } else { - if (ref->weak == 0) { + if (ref->data.weak == 0) { ret = binder_inc_node(ref->node, 0, 1, target_list); if (ret) return ret; } - ref->weak++; + ref->data.weak++; } return 0; } - -static int binder_dec_ref(struct binder_ref *ref, int strong) +/** + * binder_dec_ref() - dec the ref for given handle + * @ref: ref to be decremented + * @strong: if true, strong decrement, else weak + * + * Decrement the ref. + * + * Return: true if ref is cleaned up and ready to be freed + */ +static bool binder_dec_ref_olocked(struct binder_ref *ref, int strong) { if (strong) { - if (ref->strong == 0) { + if (ref->data.strong == 0) { binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n", - ref->proc->pid, ref->debug_id, - ref->desc, ref->strong, ref->weak); - return -EINVAL; - } - ref->strong--; - if (ref->strong == 0) { - int ret; - - ret = binder_dec_node(ref->node, strong, 1); - if (ret) - return ret; + ref->proc->pid, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak); + return false; } + ref->data.strong--; + if (ref->data.strong == 0) + binder_dec_node(ref->node, strong, 1); } else { - if (ref->weak == 0) { + if (ref->data.weak == 0) { binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n", - ref->proc->pid, ref->debug_id, - ref->desc, ref->strong, ref->weak); - return -EINVAL; + ref->proc->pid, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak); + return false; } - ref->weak--; + ref->data.weak--; } - if (ref->strong == 0 && ref->weak == 0) - binder_delete_ref(ref); - return 0; + if (ref->data.strong == 0 && ref->data.weak == 0) { + binder_cleanup_ref_olocked(ref); + return true; + } + return false; } -static void binder_pop_transaction(struct binder_thread *target_thread, - struct binder_transaction *t) +/** + * binder_get_node_from_ref() - get the node from the given proc/desc + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @need_strong_ref: if true, only return node if ref is strong + * @rdata: the id/refcount data for the ref + * + * Given a proc and ref handle, return the associated binder_node + * + * Return: a binder_node or NULL if not found or not strong when strong required + */ +static struct binder_node *binder_get_node_from_ref( + struct binder_proc *proc, + u32 desc, bool need_strong_ref, + struct binder_ref_data *rdata) { - if (target_thread) { - BUG_ON(target_thread->transaction_stack != t); - BUG_ON(target_thread->transaction_stack->from != target_thread); - target_thread->transaction_stack = - target_thread->transaction_stack->from_parent; - t->from = NULL; + struct binder_node *node; + struct binder_ref *ref; + + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, desc, need_strong_ref); + if (!ref) + goto err_no_ref; + node = ref->node; + /* + * Take an implicit reference on the node to ensure + * it stays alive until the call to binder_put_node() + */ + binder_inc_node_tmpref(node); + if (rdata) + *rdata = ref->data; + binder_proc_unlock(proc); + + return node; + +err_no_ref: + binder_proc_unlock(proc); + return NULL; +} + +/** + * binder_free_ref() - free the binder_ref + * @ref: ref to free + * + * Free the binder_ref. Free the binder_node indicated by ref->node + * (if non-NULL) and the binder_ref_death indicated by ref->death. + */ +static void binder_free_ref(struct binder_ref *ref) +{ + if (ref->node) + binder_free_node(ref->node); + kfree(ref->death); + kfree(ref); +} + +/** + * binder_update_ref_for_handle() - inc/dec the ref for given handle + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @increment: true=inc reference, false=dec reference + * @strong: true=strong reference, false=weak reference + * @rdata: the id/refcount data for the ref + * + * Given a proc and ref handle, increment or decrement the ref + * according to "increment" arg. + * + * Return: 0 if successful, else errno + */ +static int binder_update_ref_for_handle(struct binder_proc *proc, + uint32_t desc, bool increment, bool strong, + struct binder_ref_data *rdata) +{ + int ret = 0; + struct binder_ref *ref; + bool delete_ref = false; + + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, desc, strong); + if (!ref) { + ret = -EINVAL; + goto err_no_ref; } - t->need_reply = 0; + if (increment) + ret = binder_inc_ref_olocked(ref, strong, NULL); + else + delete_ref = binder_dec_ref_olocked(ref, strong); + + if (rdata) + *rdata = ref->data; + binder_proc_unlock(proc); + + if (delete_ref) + binder_free_ref(ref); + return ret; + +err_no_ref: + binder_proc_unlock(proc); + return ret; +} + +/** + * binder_dec_ref_for_handle() - dec the ref for given handle + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @strong: true=strong reference, false=weak reference + * @rdata: the id/refcount data for the ref + * + * Just calls binder_update_ref_for_handle() to decrement the ref. + * + * Return: 0 if successful, else errno + */ +static int binder_dec_ref_for_handle(struct binder_proc *proc, + uint32_t desc, bool strong, struct binder_ref_data *rdata) +{ + return binder_update_ref_for_handle(proc, desc, false, strong, rdata); +} + + +/** + * binder_inc_ref_for_node() - increment the ref for given proc/node + * @proc: proc containing the ref + * @node: target node + * @strong: true=strong reference, false=weak reference + * @target_list: worklist to use if node is incremented + * @rdata: the id/refcount data for the ref + * + * Given a proc and node, increment the ref. Create the ref if it + * doesn't already exist + * + * Return: 0 if successful, else errno + */ +static int binder_inc_ref_for_node(struct binder_proc *proc, + struct binder_node *node, + bool strong, + struct list_head *target_list, + struct binder_ref_data *rdata) +{ + struct binder_ref *ref; + struct binder_ref *new_ref = NULL; + int ret = 0; + + binder_proc_lock(proc); + ref = binder_get_ref_for_node_olocked(proc, node, NULL); + if (!ref) { + binder_proc_unlock(proc); + new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!new_ref) + return -ENOMEM; + binder_proc_lock(proc); + ref = binder_get_ref_for_node_olocked(proc, node, new_ref); + } + ret = binder_inc_ref_olocked(ref, strong, target_list); + *rdata = ref->data; + binder_proc_unlock(proc); + if (new_ref && ref != new_ref) + /* + * Another thread created the ref first so + * free the one we allocated + */ + kfree(new_ref); + return ret; +} + +static void binder_pop_transaction_ilocked(struct binder_thread *target_thread, + struct binder_transaction *t) +{ + BUG_ON(!target_thread); + assert_spin_locked(&target_thread->proc->inner_lock); + BUG_ON(target_thread->transaction_stack != t); + BUG_ON(target_thread->transaction_stack->from != target_thread); + target_thread->transaction_stack = + target_thread->transaction_stack->from_parent; + t->from = NULL; +} + +/** + * binder_thread_dec_tmpref() - decrement thread->tmp_ref + * @thread: thread to decrement + * + * A thread needs to be kept alive while being used to create or + * handle a transaction. binder_get_txn_from() is used to safely + * extract t->from from a binder_transaction and keep the thread + * indicated by t->from from being freed. When done with that + * binder_thread, this function is called to decrement the + * tmp_ref and free if appropriate (thread has been released + * and no transaction being processed by the driver) + */ +static void binder_thread_dec_tmpref(struct binder_thread *thread) +{ + /* + * atomic is used to protect the counter value while + * it cannot reach zero or thread->is_dead is false + */ + binder_inner_proc_lock(thread->proc); + atomic_dec(&thread->tmp_ref); + if (thread->is_dead && !atomic_read(&thread->tmp_ref)) { + binder_inner_proc_unlock(thread->proc); + binder_free_thread(thread); + return; + } + binder_inner_proc_unlock(thread->proc); +} + +/** + * binder_proc_dec_tmpref() - decrement proc->tmp_ref + * @proc: proc to decrement + * + * A binder_proc needs to be kept alive while being used to create or + * handle a transaction. proc->tmp_ref is incremented when + * creating a new transaction or the binder_proc is currently in-use + * by threads that are being released. When done with the binder_proc, + * this function is called to decrement the counter and free the + * proc if appropriate (proc has been released, all threads have + * been released and not currenly in-use to process a transaction). + */ +static void binder_proc_dec_tmpref(struct binder_proc *proc) +{ + binder_inner_proc_lock(proc); + proc->tmp_ref--; + if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) && + !proc->tmp_ref) { + binder_inner_proc_unlock(proc); + binder_free_proc(proc); + return; + } + binder_inner_proc_unlock(proc); +} + +/** + * binder_get_txn_from() - safely extract the "from" thread in transaction + * @t: binder transaction for t->from + * + * Atomically return the "from" thread and increment the tmp_ref + * count for the thread to ensure it stays alive until + * binder_thread_dec_tmpref() is called. + * + * Return: the value of t->from + */ +static struct binder_thread *binder_get_txn_from( + struct binder_transaction *t) +{ + struct binder_thread *from; + + spin_lock(&t->lock); + from = t->from; + if (from) + atomic_inc(&from->tmp_ref); + spin_unlock(&t->lock); + return from; +} + +/** + * binder_get_txn_from_and_acq_inner() - get t->from and acquire inner lock + * @t: binder transaction for t->from + * + * Same as binder_get_txn_from() except it also acquires the proc->inner_lock + * to guarantee that the thread cannot be released while operating on it. + * The caller must call binder_inner_proc_unlock() to release the inner lock + * as well as call binder_dec_thread_txn() to release the reference. + * + * Return: the value of t->from + */ +static struct binder_thread *binder_get_txn_from_and_acq_inner( + struct binder_transaction *t) +{ + struct binder_thread *from; + + from = binder_get_txn_from(t); + if (!from) + return NULL; + binder_inner_proc_lock(from->proc); + if (t->from) { + BUG_ON(from != t->from); + return from; + } + binder_inner_proc_unlock(from->proc); + binder_thread_dec_tmpref(from); + return NULL; +} + +static void binder_free_transaction(struct binder_transaction *t) +{ if (t->buffer) t->buffer->transaction = NULL; kfree(t); @@ -1247,30 +2059,28 @@ static void binder_send_failed_reply(struct binder_transaction *t, BUG_ON(t->flags & TF_ONE_WAY); while (1) { - target_thread = t->from; + target_thread = binder_get_txn_from_and_acq_inner(t); if (target_thread) { - if (target_thread->return_error != BR_OK && - target_thread->return_error2 == BR_OK) { - target_thread->return_error2 = - target_thread->return_error; - target_thread->return_error = BR_OK; - } - if (target_thread->return_error == BR_OK) { - binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "send failed reply for transaction %d to %d:%d\n", - t->debug_id, - target_thread->proc->pid, - target_thread->pid); - - binder_pop_transaction(target_thread, t); - target_thread->return_error = error_code; + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "send failed reply for transaction %d to %d:%d\n", + t->debug_id, + target_thread->proc->pid, + target_thread->pid); + + binder_pop_transaction_ilocked(target_thread, t); + if (target_thread->reply_error.cmd == BR_OK) { + target_thread->reply_error.cmd = error_code; + binder_enqueue_work_ilocked( + &target_thread->reply_error.work, + &target_thread->todo); wake_up_interruptible(&target_thread->wait); } else { - pr_err("reply failed, target thread, %d:%d, has error code %d already\n", - target_thread->proc->pid, - target_thread->pid, - target_thread->return_error); + WARN(1, "Unexpected reply error: %u\n", + target_thread->reply_error.cmd); } + binder_inner_proc_unlock(target_thread->proc); + binder_thread_dec_tmpref(target_thread); + binder_free_transaction(t); return; } next = t->from_parent; @@ -1279,7 +2089,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, "send failed reply for transaction %d, target dead\n", t->debug_id); - binder_pop_transaction(target_thread, t); + binder_free_transaction(t); if (next == NULL) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "reply failed, no target thread at root\n"); @@ -1488,25 +2298,26 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, node->debug_id, (u64)node->ptr); binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER, 0); + binder_put_node(node); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { struct flat_binder_object *fp; - struct binder_ref *ref; + struct binder_ref_data rdata; + int ret; fp = to_flat_binder_object(hdr); - ref = binder_get_ref(proc, fp->handle, - hdr->type == BINDER_TYPE_HANDLE); + ret = binder_dec_ref_for_handle(proc, fp->handle, + hdr->type == BINDER_TYPE_HANDLE, &rdata); - if (ref == NULL) { - pr_err("transaction release %d bad handle %d\n", - debug_id, fp->handle); + if (ret) { + pr_err("transaction release %d bad handle %d, ret = %d\n", + debug_id, fp->handle, ret); break; } binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, ref->node->debug_id); - binder_dec_ref(ref, hdr->type == BINDER_TYPE_HANDLE); + " ref %d desc %d\n", + rdata.debug_id, rdata.desc); } break; case BINDER_TYPE_FD: { @@ -1545,7 +2356,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, * back to kernel address space to access it */ parent_buffer = parent->buffer - - proc->user_buffer_offset; + binder_alloc_get_user_buffer_offset( + &proc->alloc); fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -1577,102 +2389,122 @@ static int binder_translate_binder(struct flat_binder_object *fp, struct binder_thread *thread) { struct binder_node *node; - struct binder_ref *ref; struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; + struct binder_ref_data rdata; + int ret = 0; node = binder_get_node(proc, fp->binder); if (!node) { - node = binder_new_node(proc, fp->binder, fp->cookie); + node = binder_new_node(proc, fp); if (!node) return -ENOMEM; - - node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); } if (fp->cookie != node->cookie) { binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", proc->pid, thread->pid, (u64)fp->binder, node->debug_id, (u64)fp->cookie, (u64)node->cookie); - return -EINVAL; + ret = -EINVAL; + goto done; + } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + ret = -EPERM; + goto done; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) - return -EPERM; - ref = binder_get_ref_for_node(target_proc, node); - if (!ref) - return -EINVAL; + ret = binder_inc_ref_for_node(target_proc, node, + fp->hdr.type == BINDER_TYPE_BINDER, + &thread->todo, &rdata); + if (ret) + goto done; if (fp->hdr.type == BINDER_TYPE_BINDER) fp->hdr.type = BINDER_TYPE_HANDLE; else fp->hdr.type = BINDER_TYPE_WEAK_HANDLE; fp->binder = 0; - fp->handle = ref->desc; + fp->handle = rdata.desc; fp->cookie = 0; - binder_inc_ref(ref, fp->hdr.type == BINDER_TYPE_HANDLE, &thread->todo); - trace_binder_transaction_node_to_ref(t, node, ref); + trace_binder_transaction_node_to_ref(t, node, &rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%016llx -> ref %d desc %d\n", node->debug_id, (u64)node->ptr, - ref->debug_id, ref->desc); - - return 0; + rdata.debug_id, rdata.desc); +done: + binder_put_node(node); + return ret; } static int binder_translate_handle(struct flat_binder_object *fp, struct binder_transaction *t, struct binder_thread *thread) { - struct binder_ref *ref; struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; + struct binder_node *node; + struct binder_ref_data src_rdata; + int ret = 0; - ref = binder_get_ref(proc, fp->handle, - fp->hdr.type == BINDER_TYPE_HANDLE); - if (!ref) { + node = binder_get_node_from_ref(proc, fp->handle, + fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata); + if (!node) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", proc->pid, thread->pid, fp->handle); return -EINVAL; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) - return -EPERM; + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + ret = -EPERM; + goto done; + } - if (ref->node->proc == target_proc) { + binder_node_lock(node); + if (node->proc == target_proc) { if (fp->hdr.type == BINDER_TYPE_HANDLE) fp->hdr.type = BINDER_TYPE_BINDER; else fp->hdr.type = BINDER_TYPE_WEAK_BINDER; - fp->binder = ref->node->ptr; - fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->hdr.type == BINDER_TYPE_BINDER, - 0, NULL); - trace_binder_transaction_ref_to_node(t, ref); + fp->binder = node->ptr; + fp->cookie = node->cookie; + if (node->proc) + binder_inner_proc_lock(node->proc); + binder_inc_node_nilocked(node, + fp->hdr.type == BINDER_TYPE_BINDER, + 0, NULL); + if (node->proc) + binder_inner_proc_unlock(node->proc); + trace_binder_transaction_ref_to_node(t, node, &src_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> node %d u%016llx\n", - ref->debug_id, ref->desc, ref->node->debug_id, - (u64)ref->node->ptr); + src_rdata.debug_id, src_rdata.desc, node->debug_id, + (u64)node->ptr); + binder_node_unlock(node); } else { - struct binder_ref *new_ref; + int ret; + struct binder_ref_data dest_rdata; - new_ref = binder_get_ref_for_node(target_proc, ref->node); - if (!new_ref) - return -EINVAL; + binder_node_unlock(node); + ret = binder_inc_ref_for_node(target_proc, node, + fp->hdr.type == BINDER_TYPE_HANDLE, + NULL, &dest_rdata); + if (ret) + goto done; fp->binder = 0; - fp->handle = new_ref->desc; + fp->handle = dest_rdata.desc; fp->cookie = 0; - binder_inc_ref(new_ref, fp->hdr.type == BINDER_TYPE_HANDLE, - NULL); - trace_binder_transaction_ref_to_ref(t, ref, new_ref); + trace_binder_transaction_ref_to_ref(t, node, &src_rdata, + &dest_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, new_ref->debug_id, - new_ref->desc, ref->node->debug_id); + src_rdata.debug_id, src_rdata.desc, + dest_rdata.debug_id, dest_rdata.desc, + node->debug_id); } - return 0; +done: + binder_put_node(node); + return ret; } static int binder_translate_fd(int fd, @@ -1763,7 +2595,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, * Since the parent was already fixed up, convert it * back to the kernel address space to access it */ - parent_buffer = parent->buffer - target_proc->user_buffer_offset; + parent_buffer = parent->buffer - + binder_alloc_get_user_buffer_offset(&target_proc->alloc); fd_array = (u32 *)(parent_buffer + fda->parent_offset); if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", @@ -1831,12 +2664,87 @@ static int binder_fixup_parent(struct binder_transaction *t, return -EINVAL; } parent_buffer = (u8 *)(parent->buffer - - target_proc->user_buffer_offset); + binder_alloc_get_user_buffer_offset( + &target_proc->alloc)); *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; return 0; } +/** + * binder_proc_transaction() - sends a transaction to a process and wakes it up + * @t: transaction to send + * @proc: process to send the transaction to + * @thread: thread in @proc to send the transaction to (may be NULL) + * + * This function queues a transaction to the specified process. It will try + * to find a thread in the target process to handle the transaction and + * wake it up. If no thread is found, the work is queued to the proc + * waitqueue. + * + * If the @thread parameter is not NULL, the transaction is always queued + * to the waitlist of that specific thread. + * + * Return: true if the transactions was successfully queued + * false if the target process or thread is dead + */ +static bool binder_proc_transaction(struct binder_transaction *t, + struct binder_proc *proc, + struct binder_thread *thread) +{ + struct list_head *target_list = NULL; + struct binder_node *node = t->buffer->target_node; + struct binder_priority node_prio; + bool oneway = !!(t->flags & TF_ONE_WAY); + bool wakeup = true; + + BUG_ON(!node); + binder_node_lock(node); + node_prio.prio = node->min_priority; + node_prio.sched_policy = node->sched_policy; + + if (oneway) { + BUG_ON(thread); + if (node->has_async_transaction) { + target_list = &node->async_todo; + wakeup = false; + } else { + node->has_async_transaction = 1; + } + } + + binder_inner_proc_lock(proc); + + if (proc->is_dead || (thread && thread->is_dead)) { + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + return false; + } + + if (!thread && !target_list) + thread = binder_select_thread_ilocked(proc); + + if (thread) { + target_list = &thread->todo; + binder_transaction_priority(thread->task, t, node_prio, + node->inherit_rt); + } else if (!target_list) { + target_list = &proc->todo; + } else { + BUG_ON(target_list != &node->async_todo); + } + + binder_enqueue_work_ilocked(&t->work, target_list); + + if (wakeup) + binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); + + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + + return true; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, @@ -1848,19 +2756,21 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t *offp, *off_end, *off_start; binder_size_t off_min; u8 *sg_bufp, *sg_buf_end; - struct binder_proc *target_proc; + struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; - struct list_head *target_list; - wait_queue_head_t *target_wait; struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; - uint32_t return_error; + uint32_t return_error = 0; + uint32_t return_error_param = 0; + uint32_t return_error_line = 0; struct binder_buffer_object *last_fixup_obj = NULL; binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; + int t_debug_id = atomic_inc_return(&binder_last_id); - e = binder_transaction_log_add(&context->transaction_log); + e = binder_transaction_log_add(&binder_transaction_log); + e->debug_id = t_debug_id; e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); e->from_proc = proc->pid; e->from_thread = thread->pid; @@ -1870,29 +2780,39 @@ static void binder_transaction(struct binder_proc *proc, e->context_name = proc->context->name; if (reply) { + binder_inner_proc_lock(proc); in_reply_to = thread->transaction_stack; if (in_reply_to == NULL) { + binder_inner_proc_unlock(proc); binder_user_error("%d:%d got reply transaction with no transaction stack\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; goto err_empty_call_stack; } - binder_set_nice(in_reply_to->saved_priority); if (in_reply_to->to_thread != thread) { + spin_lock(&in_reply_to->lock); binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n", proc->pid, thread->pid, in_reply_to->debug_id, in_reply_to->to_proc ? in_reply_to->to_proc->pid : 0, in_reply_to->to_thread ? in_reply_to->to_thread->pid : 0); + spin_unlock(&in_reply_to->lock); + binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; in_reply_to = NULL; goto err_bad_call_stack; } thread->transaction_stack = in_reply_to->to_parent; - target_thread = in_reply_to->from; + binder_inner_proc_unlock(proc); + target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); if (target_thread == NULL) { return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; goto err_dead_binder; } if (target_thread->transaction_stack != in_reply_to) { @@ -1901,89 +2821,137 @@ static void binder_transaction(struct binder_proc *proc, target_thread->transaction_stack ? target_thread->transaction_stack->debug_id : 0, in_reply_to->debug_id); + binder_inner_proc_unlock(target_thread->proc); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; in_reply_to = NULL; target_thread = NULL; goto err_dead_binder; } target_proc = target_thread->proc; + target_proc->tmp_ref++; + binder_inner_proc_unlock(target_thread->proc); } else { if (tr->target.handle) { struct binder_ref *ref; - ref = binder_get_ref(proc, tr->target.handle, true); - if (ref == NULL) { + /* + * There must already be a strong ref + * on this node. If so, do a strong + * increment on the node to ensure it + * stays alive until the transaction is + * done. + */ + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, tr->target.handle, + true); + if (ref) { + binder_inc_node(ref->node, 1, 0, NULL); + target_node = ref->node; + } + binder_proc_unlock(proc); + if (target_node == NULL) { binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_invalid_target_handle; } - target_node = ref->node; } else { + mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; if (target_node == NULL) { return_error = BR_DEAD_REPLY; + mutex_unlock(&context->context_mgr_node_lock); + return_error_line = __LINE__; goto err_no_context_mgr_node; } + binder_inc_node(target_node, 1, 0, NULL); + mutex_unlock(&context->context_mgr_node_lock); } e->to_node = target_node->debug_id; + binder_node_lock(target_node); target_proc = target_node->proc; if (target_proc == NULL) { + binder_node_unlock(target_node); return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; goto err_dead_binder; } + binder_inner_proc_lock(target_proc); + target_proc->tmp_ref++; + binder_inner_proc_unlock(target_proc); + binder_node_unlock(target_node); if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; + return_error_param = -EPERM; + return_error_line = __LINE__; goto err_invalid_target_handle; } + binder_inner_proc_lock(proc); if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { struct binder_transaction *tmp; tmp = thread->transaction_stack; if (tmp->to_thread != thread) { + spin_lock(&tmp->lock); binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n", proc->pid, thread->pid, tmp->debug_id, tmp->to_proc ? tmp->to_proc->pid : 0, tmp->to_thread ? tmp->to_thread->pid : 0); + spin_unlock(&tmp->lock); + binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; goto err_bad_call_stack; } while (tmp) { - if (tmp->from && tmp->from->proc == target_proc) - target_thread = tmp->from; + struct binder_thread *from; + + spin_lock(&tmp->lock); + from = tmp->from; + if (from && from->proc == target_proc) { + atomic_inc(&from->tmp_ref); + target_thread = from; + spin_unlock(&tmp->lock); + break; + } + spin_unlock(&tmp->lock); tmp = tmp->from_parent; } } + binder_inner_proc_unlock(proc); } - if (target_thread) { + if (target_thread) e->to_thread = target_thread->pid; - target_list = &target_thread->todo; - target_wait = &target_thread->wait; - } else { - target_list = &target_proc->todo; - target_wait = &target_proc->wait; - } e->to_proc = target_proc->pid; /* TODO: reuse incoming transaction for reply */ t = kzalloc(sizeof(*t), GFP_KERNEL); if (t == NULL) { return_error = BR_FAILED_REPLY; + return_error_param = -ENOMEM; + return_error_line = __LINE__; goto err_alloc_t_failed; } binder_stats_created(BINDER_STAT_TRANSACTION); + spin_lock_init(&t->lock); tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); if (tcomplete == NULL) { return_error = BR_FAILED_REPLY; + return_error_param = -ENOMEM; + return_error_line = __LINE__; goto err_alloc_tcomplete_failed; } binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); - t->debug_id = atomic_inc_return(&binder_last_id); - e->debug_id = t->debug_id; + t->debug_id = t_debug_id; if (reply) binder_debug(BINDER_DEBUG_TRANSACTION, @@ -2013,15 +2981,30 @@ static void binder_transaction(struct binder_proc *proc, t->to_thread = target_thread; t->code = tr->code; t->flags = tr->flags; - t->priority = task_nice(current); + if (!(t->flags & TF_ONE_WAY) && + binder_supported_policy(current->policy)) { + /* Inherit supported policies for synchronous transactions */ + t->priority.sched_policy = current->policy; + t->priority.prio = current->normal_prio; + } else { + /* Otherwise, fall back to the default priority */ + t->priority = target_proc->default_priority; + } trace_binder_transaction(reply, t, target_node); - t->buffer = binder_alloc_buf(target_proc, tr->data_size, + t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, tr->offsets_size, extra_buffers_size, !reply && (t->flags & TF_ONE_WAY)); - if (t->buffer == NULL) { - return_error = BR_FAILED_REPLY; + if (IS_ERR(t->buffer)) { + /* + * -ESRCH indicates VMA cleared. The target is dying. + */ + return_error_param = PTR_ERR(t->buffer); + return_error = return_error_param == -ESRCH ? + BR_DEAD_REPLY : BR_FAILED_REPLY; + return_error_line = __LINE__; + t->buffer = NULL; goto err_binder_alloc_buf_failed; } t->buffer->allow_user_free = 0; @@ -2029,9 +3012,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->transaction = t; t->buffer->target_node = target_node; trace_binder_transaction_alloc_buf(t->buffer); - if (target_node) - binder_inc_node(target_node, 1, 0, NULL); - off_start = (binder_size_t *)(t->buffer->data + ALIGN(tr->data_size, sizeof(void *))); offp = off_start; @@ -2041,6 +3021,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid data ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; goto err_copy_data_failed; } if (copy_from_user(offp, (const void __user *)(uintptr_t) @@ -2048,12 +3030,16 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; goto err_copy_data_failed; } if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) { binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n", proc->pid, thread->pid, (u64)tr->offsets_size); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { @@ -2061,6 +3047,8 @@ static void binder_transaction(struct binder_proc *proc, proc->pid, thread->pid, (u64)extra_buffers_size); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } off_end = (void *)off_start + tr->offsets_size; @@ -2077,6 +3065,8 @@ static void binder_transaction(struct binder_proc *proc, (u64)off_min, (u64)t->buffer->data_size); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } @@ -2091,6 +3081,8 @@ static void binder_transaction(struct binder_proc *proc, ret = binder_translate_binder(fp, t, thread); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } } break; @@ -2102,6 +3094,8 @@ static void binder_transaction(struct binder_proc *proc, ret = binder_translate_handle(fp, t, thread); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } } break; @@ -2113,6 +3107,8 @@ static void binder_transaction(struct binder_proc *proc, if (target_fd < 0) { return_error = BR_FAILED_REPLY; + return_error_param = target_fd; + return_error_line = __LINE__; goto err_translate_failed; } fp->pad_binder = 0; @@ -2129,6 +3125,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid parent offset or type\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_parent; } if (!binder_validate_fixup(t->buffer, off_start, @@ -2138,12 +3136,16 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_parent; } ret = binder_translate_fd_array(fda, parent, t, thread, in_reply_to); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } last_fixup_obj = parent; @@ -2159,6 +3161,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with too large buffer\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } if (copy_from_user(sg_bufp, @@ -2166,12 +3170,15 @@ static void binder_transaction(struct binder_proc *proc, bp->buffer, bp->length)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); + return_error_param = -EFAULT; return_error = BR_FAILED_REPLY; + return_error_line = __LINE__; goto err_copy_data_failed; } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t)sg_bufp + - target_proc->user_buffer_offset; + binder_alloc_get_user_buffer_offset( + &target_proc->alloc); sg_bufp += ALIGN(bp->length, sizeof(u64)); ret = binder_fixup_parent(t, thread, bp, off_start, @@ -2180,6 +3187,8 @@ static void binder_transaction(struct binder_proc *proc, last_fixup_min_off); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } last_fixup_obj = bp; @@ -2189,34 +3198,61 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid object type, %x\n", proc->pid, thread->pid, hdr->type); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_object_type; } } + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; + binder_enqueue_work(proc, tcomplete, &thread->todo); + t->work.type = BINDER_WORK_TRANSACTION; + if (reply) { + binder_inner_proc_lock(target_proc); + if (target_thread->is_dead) { + binder_inner_proc_unlock(target_proc); + goto err_dead_proc_or_thread; + } BUG_ON(t->buffer->async_transaction != 0); - binder_pop_transaction(target_thread, in_reply_to); + binder_pop_transaction_ilocked(target_thread, in_reply_to); + binder_enqueue_work_ilocked(&t->work, &target_thread->todo); + binder_inner_proc_unlock(target_proc); + wake_up_interruptible_sync(&target_thread->wait); + binder_restore_priority(current, in_reply_to->saved_priority); + binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); + binder_inner_proc_lock(proc); t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; + binder_inner_proc_unlock(proc); + if (!binder_proc_transaction(t, target_proc, target_thread)) { + binder_inner_proc_lock(proc); + binder_pop_transaction_ilocked(thread, t); + binder_inner_proc_unlock(proc); + goto err_dead_proc_or_thread; + } } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); - if (target_node->has_async_transaction) { - target_list = &target_node->async_todo; - target_wait = NULL; - } else - target_node->has_async_transaction = 1; + if (!binder_proc_transaction(t, target_proc, NULL)) + goto err_dead_proc_or_thread; } - t->work.type = BINDER_WORK_TRANSACTION; - list_add_tail(&t->work.entry, target_list); - tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; - list_add_tail(&tcomplete->entry, &thread->todo); - if (target_wait) - wake_up_interruptible(target_wait); + if (target_thread) + binder_thread_dec_tmpref(target_thread); + binder_proc_dec_tmpref(target_proc); + /* + * write barrier to synchronize with initialization + * of log entry + */ + smp_wmb(); + WRITE_ONCE(e->debug_id_done, t_debug_id); return; +err_dead_proc_or_thread: + return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; err_translate_failed: err_bad_object_type: err_bad_offset: @@ -2224,8 +3260,9 @@ err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); + target_node = NULL; t->buffer->transaction = NULL; - binder_free_buf(target_proc, t->buffer); + binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: kfree(tcomplete); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); @@ -2238,25 +3275,50 @@ err_empty_call_stack: err_dead_binder: err_invalid_target_handle: err_no_context_mgr_node: + if (target_thread) + binder_thread_dec_tmpref(target_thread); + if (target_proc) + binder_proc_dec_tmpref(target_proc); + if (target_node) + binder_dec_node(target_node, 1, 0); + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "%d:%d transaction failed %d, size %lld-%lld\n", - proc->pid, thread->pid, return_error, - (u64)tr->data_size, (u64)tr->offsets_size); + "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n", + proc->pid, thread->pid, return_error, return_error_param, + (u64)tr->data_size, (u64)tr->offsets_size, + return_error_line); { struct binder_transaction_log_entry *fe; - fe = binder_transaction_log_add( - &context->transaction_log_failed); + e->return_error = return_error; + e->return_error_param = return_error_param; + e->return_error_line = return_error_line; + fe = binder_transaction_log_add(&binder_transaction_log_failed); *fe = *e; + /* + * write barrier to synchronize with initialization + * of log entry + */ + smp_wmb(); + WRITE_ONCE(e->debug_id_done, t_debug_id); + WRITE_ONCE(fe->debug_id_done, t_debug_id); } - BUG_ON(thread->return_error != BR_OK); + BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { - thread->return_error = BR_TRANSACTION_COMPLETE; + binder_restore_priority(current, in_reply_to->saved_priority); + thread->return_error.cmd = BR_TRANSACTION_COMPLETE; + binder_enqueue_work(thread->proc, + &thread->return_error.work, + &thread->todo); binder_send_failed_reply(in_reply_to, return_error); - } else - thread->return_error = return_error; + } else { + thread->return_error.cmd = return_error; + binder_enqueue_work(thread->proc, + &thread->return_error.work, + &thread->todo); + } } static int binder_thread_write(struct binder_proc *proc, @@ -2270,15 +3332,17 @@ static int binder_thread_write(struct binder_proc *proc, void __user *ptr = buffer + *consumed; void __user *end = buffer + size; - while (ptr < end && thread->return_error == BR_OK) { + while (ptr < end && thread->return_error.cmd == BR_OK) { + int ret; + if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); trace_binder_command(cmd); - if (_IOC_NR(cmd) < ARRAY_SIZE(context->binder_stats.bc)) { - context->binder_stats.bc[_IOC_NR(cmd)]++; - proc->stats.bc[_IOC_NR(cmd)]++; - thread->stats.bc[_IOC_NR(cmd)]++; + if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) { + atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]); + atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]); + atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]); } switch (cmd) { case BC_INCREFS: @@ -2286,53 +3350,61 @@ static int binder_thread_write(struct binder_proc *proc, case BC_RELEASE: case BC_DECREFS: { uint32_t target; - struct binder_ref *ref; const char *debug_string; + bool strong = cmd == BC_ACQUIRE || cmd == BC_RELEASE; + bool increment = cmd == BC_INCREFS || cmd == BC_ACQUIRE; + struct binder_ref_data rdata; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; + ptr += sizeof(uint32_t); - if (target == 0 && context->binder_context_mgr_node && - (cmd == BC_INCREFS || cmd == BC_ACQUIRE)) { - ref = binder_get_ref_for_node(proc, - context->binder_context_mgr_node); - if (ref->desc != target) { - binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n", - proc->pid, thread->pid, - ref->desc); - } - } else - ref = binder_get_ref(proc, target, - cmd == BC_ACQUIRE || - cmd == BC_RELEASE); - if (ref == NULL) { - binder_user_error("%d:%d refcount change on invalid ref %d\n", - proc->pid, thread->pid, target); - break; + ret = -1; + if (increment && !target) { + struct binder_node *ctx_mgr_node; + mutex_lock(&context->context_mgr_node_lock); + ctx_mgr_node = context->binder_context_mgr_node; + if (ctx_mgr_node) + ret = binder_inc_ref_for_node( + proc, ctx_mgr_node, + strong, NULL, &rdata); + mutex_unlock(&context->context_mgr_node_lock); + } + if (ret) + ret = binder_update_ref_for_handle( + proc, target, increment, strong, + &rdata); + if (!ret && rdata.desc != target) { + binder_user_error("%d:%d tried to acquire reference to desc %d, got %d instead\n", + proc->pid, thread->pid, + target, rdata.desc); } switch (cmd) { case BC_INCREFS: debug_string = "IncRefs"; - binder_inc_ref(ref, 0, NULL); break; case BC_ACQUIRE: debug_string = "Acquire"; - binder_inc_ref(ref, 1, NULL); break; case BC_RELEASE: debug_string = "Release"; - binder_dec_ref(ref, 1); break; case BC_DECREFS: default: debug_string = "DecRefs"; - binder_dec_ref(ref, 0); + break; + } + if (ret) { + binder_user_error("%d:%d %s %d refcount change on invalid ref %d ret %d\n", + proc->pid, thread->pid, debug_string, + strong, target, ret); break; } binder_debug(BINDER_DEBUG_USER_REFS, - "%d:%d %s ref %d desc %d s %d w %d for node %d\n", - proc->pid, thread->pid, debug_string, ref->debug_id, - ref->desc, ref->strong, ref->weak, ref->node->debug_id); + "%d:%d %s ref %d desc %d s %d w %d\n", + proc->pid, thread->pid, debug_string, + rdata.debug_id, rdata.desc, rdata.strong, + rdata.weak); break; } case BC_INCREFS_DONE: @@ -2340,6 +3412,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_uintptr_t node_ptr; binder_uintptr_t cookie; struct binder_node *node; + bool free_node; if (get_user(node_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; @@ -2364,13 +3437,17 @@ static int binder_thread_write(struct binder_proc *proc, "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", (u64)node_ptr, node->debug_id, (u64)cookie, (u64)node->cookie); + binder_put_node(node); break; } + binder_node_inner_lock(node); if (cmd == BC_ACQUIRE_DONE) { if (node->pending_strong_ref == 0) { binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n", proc->pid, thread->pid, node->debug_id); + binder_node_inner_unlock(node); + binder_put_node(node); break; } node->pending_strong_ref = 0; @@ -2379,16 +3456,23 @@ static int binder_thread_write(struct binder_proc *proc, binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n", proc->pid, thread->pid, node->debug_id); + binder_node_inner_unlock(node); + binder_put_node(node); break; } node->pending_weak_ref = 0; } - binder_dec_node(node, cmd == BC_ACQUIRE_DONE, 0); + free_node = binder_dec_node_nilocked(node, + cmd == BC_ACQUIRE_DONE, 0); + WARN_ON(free_node); binder_debug(BINDER_DEBUG_USER_REFS, - "%d:%d %s node %d ls %d lw %d\n", + "%d:%d %s node %d ls %d lw %d tr %d\n", proc->pid, thread->pid, cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", - node->debug_id, node->local_strong_refs, node->local_weak_refs); + node->debug_id, node->local_strong_refs, + node->local_weak_refs, node->tmp_refs); + binder_node_inner_unlock(node); + binder_put_node(node); break; } case BC_ATTEMPT_ACQUIRE: @@ -2406,7 +3490,8 @@ static int binder_thread_write(struct binder_proc *proc, return -EFAULT; ptr += sizeof(binder_uintptr_t); - buffer = binder_buffer_lookup(proc, data_ptr); + buffer = binder_alloc_prepare_to_free(&proc->alloc, + data_ptr); if (buffer == NULL) { binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", proc->pid, thread->pid, (u64)data_ptr); @@ -2428,15 +3513,27 @@ static int binder_thread_write(struct binder_proc *proc, buffer->transaction = NULL; } if (buffer->async_transaction && buffer->target_node) { - BUG_ON(!buffer->target_node->has_async_transaction); - if (list_empty(&buffer->target_node->async_todo)) - buffer->target_node->has_async_transaction = 0; - else - list_move_tail(buffer->target_node->async_todo.next, &thread->todo); + struct binder_node *buf_node; + struct binder_work *w; + + buf_node = buffer->target_node; + binder_node_inner_lock(buf_node); + BUG_ON(!buf_node->has_async_transaction); + BUG_ON(buf_node->proc != proc); + w = binder_dequeue_work_head_ilocked( + &buf_node->async_todo); + if (!w) { + buf_node->has_async_transaction = 0; + } else { + binder_enqueue_work_ilocked( + w, &proc->todo); + binder_wakeup_proc_ilocked(proc); + } + binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); binder_transaction_buffer_release(proc, buffer, NULL); - binder_free_buf(proc, buffer); + binder_alloc_free_buf(&proc->alloc, buffer); break; } @@ -2467,6 +3564,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_debug(BINDER_DEBUG_THREADS, "%d:%d BC_REGISTER_LOOPER\n", proc->pid, thread->pid); + binder_inner_proc_lock(proc); if (thread->looper & BINDER_LOOPER_STATE_ENTERED) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n", @@ -2480,6 +3578,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->requested_threads_started++; } thread->looper |= BINDER_LOOPER_STATE_REGISTERED; + binder_inner_proc_unlock(proc); break; case BC_ENTER_LOOPER: binder_debug(BINDER_DEBUG_THREADS, @@ -2504,7 +3603,7 @@ static int binder_thread_write(struct binder_proc *proc, uint32_t target; binder_uintptr_t cookie; struct binder_ref *ref; - struct binder_ref_death *death; + struct binder_ref_death *death = NULL; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; @@ -2512,7 +3611,29 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - ref = binder_get_ref(proc, target, false); + if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { + /* + * Allocate memory for death notification + * before taking lock + */ + death = kzalloc(sizeof(*death), GFP_KERNEL); + if (death == NULL) { + WARN_ON(thread->return_error.cmd != + BR_OK); + thread->return_error.cmd = BR_ERROR; + binder_enqueue_work( + thread->proc, + &thread->return_error.work, + &thread->todo); + binder_debug( + BINDER_DEBUG_FAILED_TRANSACTION, + "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", + proc->pid, thread->pid); + break; + } + } + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, target, false); if (ref == NULL) { binder_user_error("%d:%d %s invalid ref %d\n", proc->pid, thread->pid, @@ -2520,6 +3641,8 @@ static int binder_thread_write(struct binder_proc *proc, "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", target); + binder_proc_unlock(proc); + kfree(death); break; } @@ -2529,21 +3652,18 @@ static int binder_thread_write(struct binder_proc *proc, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", - (u64)cookie, ref->debug_id, ref->desc, - ref->strong, ref->weak, ref->node->debug_id); + (u64)cookie, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak, ref->node->debug_id); + binder_node_lock(ref->node); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n", proc->pid, thread->pid); - break; - } - death = kzalloc(sizeof(*death), GFP_KERNEL); - if (death == NULL) { - thread->return_error = BR_ERROR; - binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", - proc->pid, thread->pid); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + kfree(death); break; } binder_stats_created(BINDER_STAT_DEATH); @@ -2552,17 +3672,19 @@ static int binder_thread_write(struct binder_proc *proc, ref->death = death; if (ref->node->proc == NULL) { ref->death->work.type = BINDER_WORK_DEAD_BINDER; - if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) { - list_add_tail(&ref->death->work.entry, &thread->todo); - } else { - list_add_tail(&ref->death->work.entry, &proc->todo); - wake_up_interruptible(&proc->wait); - } + + binder_inner_proc_lock(proc); + binder_enqueue_work_ilocked( + &ref->death->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); + binder_inner_proc_unlock(proc); } } else { if (ref->death == NULL) { binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n", proc->pid, thread->pid); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); break; } death = ref->death; @@ -2571,22 +3693,35 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid, (u64)death->cookie, (u64)cookie); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); break; } ref->death = NULL; + binder_inner_proc_lock(proc); if (list_empty(&death->work.entry)) { death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; - if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) { - list_add_tail(&death->work.entry, &thread->todo); - } else { - list_add_tail(&death->work.entry, &proc->todo); - wake_up_interruptible(&proc->wait); + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_work_ilocked( + &death->work, + &thread->todo); + else { + binder_enqueue_work_ilocked( + &death->work, + &proc->todo); + binder_wakeup_proc_ilocked( + proc); } } else { BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER); death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR; } + binder_inner_proc_unlock(proc); } + binder_node_unlock(ref->node); + binder_proc_unlock(proc); } break; case BC_DEAD_BINDER_DONE: { struct binder_work *w; @@ -2597,8 +3732,13 @@ static int binder_thread_write(struct binder_proc *proc, return -EFAULT; ptr += sizeof(cookie); - list_for_each_entry(w, &proc->delivered_death, entry) { - struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work); + binder_inner_proc_lock(proc); + list_for_each_entry(w, &proc->delivered_death, + entry) { + struct binder_ref_death *tmp_death = + container_of(w, + struct binder_ref_death, + work); if (tmp_death->cookie == cookie) { death = tmp_death; @@ -2612,19 +3752,25 @@ static int binder_thread_write(struct binder_proc *proc, if (death == NULL) { binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n", proc->pid, thread->pid, (u64)cookie); + binder_inner_proc_unlock(proc); break; } - - list_del_init(&death->work.entry); + binder_dequeue_work_ilocked(&death->work); if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) { death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; - if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) { - list_add_tail(&death->work.entry, &thread->todo); - } else { - list_add_tail(&death->work.entry, &proc->todo); - wake_up_interruptible(&proc->wait); + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_work_ilocked( + &death->work, &thread->todo); + else { + binder_enqueue_work_ilocked( + &death->work, + &proc->todo); + binder_wakeup_proc_ilocked(proc); } } + binder_inner_proc_unlock(proc); } break; default: @@ -2641,24 +3787,74 @@ static void binder_stat_br(struct binder_proc *proc, struct binder_thread *thread, uint32_t cmd) { trace_binder_return(cmd); - if (_IOC_NR(cmd) < ARRAY_SIZE(proc->stats.br)) { - proc->context->binder_stats.br[_IOC_NR(cmd)]++; - proc->stats.br[_IOC_NR(cmd)]++; - thread->stats.br[_IOC_NR(cmd)]++; + if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) { + atomic_inc(&binder_stats.br[_IOC_NR(cmd)]); + atomic_inc(&proc->stats.br[_IOC_NR(cmd)]); + atomic_inc(&thread->stats.br[_IOC_NR(cmd)]); } } -static int binder_has_proc_work(struct binder_proc *proc, - struct binder_thread *thread) +static int binder_put_node_cmd(struct binder_proc *proc, + struct binder_thread *thread, + void __user **ptrp, + binder_uintptr_t node_ptr, + binder_uintptr_t node_cookie, + int node_debug_id, + uint32_t cmd, const char *cmd_name) { - return !list_empty(&proc->todo) || - (thread->looper & BINDER_LOOPER_STATE_NEED_RETURN); + void __user *ptr = *ptrp; + + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + + if (put_user(node_ptr, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + if (put_user(node_cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + binder_stat_br(proc, thread, cmd); + binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s %d u%016llx c%016llx\n", + proc->pid, thread->pid, cmd_name, node_debug_id, + (u64)node_ptr, (u64)node_cookie); + + *ptrp = ptr; + return 0; } -static int binder_has_thread_work(struct binder_thread *thread) +static int binder_wait_for_work(struct binder_thread *thread, + bool do_proc_work) { - return !list_empty(&thread->todo) || thread->return_error != BR_OK || - (thread->looper & BINDER_LOOPER_STATE_NEED_RETURN); + DEFINE_WAIT(wait); + struct binder_proc *proc = thread->proc; + int ret = 0; + + freezer_do_not_count(); + binder_inner_proc_lock(proc); + for (;;) { + prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE); + if (binder_has_work_ilocked(thread, do_proc_work)) + break; + if (do_proc_work) + list_add(&thread->waiting_thread_node, + &proc->waiting_threads); + binder_inner_proc_unlock(proc); + schedule(); + binder_inner_proc_lock(proc); + list_del_init(&thread->waiting_thread_node); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + finish_wait(&thread->wait, &wait); + binder_inner_proc_unlock(proc); + freezer_count(); + + return ret; } static int binder_thread_read(struct binder_proc *proc, @@ -2680,37 +3876,15 @@ static int binder_thread_read(struct binder_proc *proc, } retry: - wait_for_proc_work = thread->transaction_stack == NULL && - list_empty(&thread->todo); - - if (thread->return_error != BR_OK && ptr < end) { - if (thread->return_error2 != BR_OK) { - if (put_user(thread->return_error2, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - binder_stat_br(proc, thread, thread->return_error2); - if (ptr == end) - goto done; - thread->return_error2 = BR_OK; - } - if (put_user(thread->return_error, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - binder_stat_br(proc, thread, thread->return_error); - thread->return_error = BR_OK; - goto done; - } - + binder_inner_proc_lock(proc); + wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); + binder_inner_proc_unlock(proc); thread->looper |= BINDER_LOOPER_STATE_WAITING; - if (wait_for_proc_work) - proc->ready_threads++; - - binder_unlock(proc->context, __func__); trace_binder_wait_for_work(wait_for_proc_work, !!thread->transaction_stack, - !list_empty(&thread->todo)); + !binder_worklist_empty(proc, &thread->todo)); if (wait_for_proc_work) { if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED))) { @@ -2719,24 +3893,16 @@ retry: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); } - binder_set_nice(proc->default_priority); - if (non_block) { - if (!binder_has_proc_work(proc, thread)) - ret = -EAGAIN; - } else - ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread)); - } else { - if (non_block) { - if (!binder_has_thread_work(thread)) - ret = -EAGAIN; - } else - ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); + binder_restore_priority(current, proc->default_priority); } - binder_lock(proc->context, __func__); + if (non_block) { + if (!binder_has_work(thread, wait_for_proc_work)) + ret = -EAGAIN; + } else { + ret = binder_wait_for_work(thread, wait_for_proc_work); + } - if (wait_for_proc_work) - proc->ready_threads--; thread->looper &= ~BINDER_LOOPER_STATE_WAITING; if (ret) @@ -2745,31 +3911,52 @@ retry: while (1) { uint32_t cmd; struct binder_transaction_data tr; - struct binder_work *w; + struct binder_work *w = NULL; + struct list_head *list = NULL; struct binder_transaction *t = NULL; + struct binder_thread *t_from; + + binder_inner_proc_lock(proc); + if (!binder_worklist_empty_ilocked(&thread->todo)) + list = &thread->todo; + else if (!binder_worklist_empty_ilocked(&proc->todo) && + wait_for_proc_work) + list = &proc->todo; + else { + binder_inner_proc_unlock(proc); - if (!list_empty(&thread->todo)) { - w = list_first_entry(&thread->todo, struct binder_work, - entry); - } else if (!list_empty(&proc->todo) && wait_for_proc_work) { - w = list_first_entry(&proc->todo, struct binder_work, - entry); - } else { /* no data added */ - if (ptr - buffer == 4 && - !(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN)) + if (ptr - buffer == 4 && !thread->looper_need_return) goto retry; break; } - if (end - ptr < sizeof(tr) + 4) + if (end - ptr < sizeof(tr) + 4) { + binder_inner_proc_unlock(proc); break; + } + w = binder_dequeue_work_head_ilocked(list); switch (w->type) { case BINDER_WORK_TRANSACTION: { + binder_inner_proc_unlock(proc); t = container_of(w, struct binder_transaction, work); } break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + WARN_ON(e->cmd == BR_OK); + binder_inner_proc_unlock(proc); + if (put_user(e->cmd, (uint32_t __user *)ptr)) + return -EFAULT; + e->cmd = BR_OK; + ptr += sizeof(uint32_t); + + binder_stat_br(proc, thread, cmd); + } break; case BINDER_WORK_TRANSACTION_COMPLETE: { + binder_inner_proc_unlock(proc); cmd = BR_TRANSACTION_COMPLETE; if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; @@ -2779,113 +3966,134 @@ retry: binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE, "%d:%d BR_TRANSACTION_COMPLETE\n", proc->pid, thread->pid); - - list_del(&w->entry); kfree(w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; case BINDER_WORK_NODE: { struct binder_node *node = container_of(w, struct binder_node, work); - uint32_t cmd = BR_NOOP; - const char *cmd_name; - int strong = node->internal_strong_refs || node->local_strong_refs; - int weak = !hlist_empty(&node->refs) || node->local_weak_refs || strong; - - if (weak && !node->has_weak_ref) { - cmd = BR_INCREFS; - cmd_name = "BR_INCREFS"; + int strong, weak; + binder_uintptr_t node_ptr = node->ptr; + binder_uintptr_t node_cookie = node->cookie; + int node_debug_id = node->debug_id; + int has_weak_ref; + int has_strong_ref; + void __user *orig_ptr = ptr; + + BUG_ON(proc != node->proc); + strong = node->internal_strong_refs || + node->local_strong_refs; + weak = !hlist_empty(&node->refs) || + node->local_weak_refs || + node->tmp_refs || strong; + has_strong_ref = node->has_strong_ref; + has_weak_ref = node->has_weak_ref; + + if (weak && !has_weak_ref) { node->has_weak_ref = 1; node->pending_weak_ref = 1; node->local_weak_refs++; - } else if (strong && !node->has_strong_ref) { - cmd = BR_ACQUIRE; - cmd_name = "BR_ACQUIRE"; + } + if (strong && !has_strong_ref) { node->has_strong_ref = 1; node->pending_strong_ref = 1; node->local_strong_refs++; - } else if (!strong && node->has_strong_ref) { - cmd = BR_RELEASE; - cmd_name = "BR_RELEASE"; + } + if (!strong && has_strong_ref) node->has_strong_ref = 0; - } else if (!weak && node->has_weak_ref) { - cmd = BR_DECREFS; - cmd_name = "BR_DECREFS"; + if (!weak && has_weak_ref) node->has_weak_ref = 0; - } - if (cmd != BR_NOOP) { - if (put_user(cmd, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - if (put_user(node->ptr, - (binder_uintptr_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(binder_uintptr_t); - if (put_user(node->cookie, - (binder_uintptr_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(binder_uintptr_t); - - binder_stat_br(proc, thread, cmd); - binder_debug(BINDER_DEBUG_USER_REFS, - "%d:%d %s %d u%016llx c%016llx\n", - proc->pid, thread->pid, cmd_name, - node->debug_id, - (u64)node->ptr, (u64)node->cookie); - } else { - list_del_init(&w->entry); - if (!weak && !strong) { - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d:%d node %d u%016llx c%016llx deleted\n", - proc->pid, thread->pid, - node->debug_id, - (u64)node->ptr, - (u64)node->cookie); - rb_erase(&node->rb_node, &proc->nodes); - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); - } else { - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d:%d node %d u%016llx c%016llx state unchanged\n", - proc->pid, thread->pid, - node->debug_id, - (u64)node->ptr, - (u64)node->cookie); - } - } + if (!weak && !strong) { + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx deleted\n", + proc->pid, thread->pid, + node_debug_id, + (u64)node_ptr, + (u64)node_cookie); + rb_erase(&node->rb_node, &proc->nodes); + binder_inner_proc_unlock(proc); + binder_node_lock(node); + /* + * Acquire the node lock before freeing the + * node to serialize with other threads that + * may have been holding the node lock while + * decrementing this node (avoids race where + * this thread frees while the other thread + * is unlocking the node after the final + * decrement) + */ + binder_node_unlock(node); + binder_free_node(node); + } else + binder_inner_proc_unlock(proc); + + if (weak && !has_weak_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_INCREFS, "BR_INCREFS"); + if (!ret && strong && !has_strong_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_ACQUIRE, "BR_ACQUIRE"); + if (!ret && !strong && has_strong_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_RELEASE, "BR_RELEASE"); + if (!ret && !weak && has_weak_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_DECREFS, "BR_DECREFS"); + if (orig_ptr == ptr) + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx state unchanged\n", + proc->pid, thread->pid, + node_debug_id, + (u64)node_ptr, + (u64)node_cookie); + if (ret) + return ret; } break; case BINDER_WORK_DEAD_BINDER: case BINDER_WORK_DEAD_BINDER_AND_CLEAR: case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { struct binder_ref_death *death; uint32_t cmd; + binder_uintptr_t cookie; death = container_of(w, struct binder_ref_death, work); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; else cmd = BR_DEAD_BINDER; - if (put_user(cmd, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - if (put_user(death->cookie, - (binder_uintptr_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(binder_uintptr_t); - binder_stat_br(proc, thread, cmd); + cookie = death->cookie; + binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "%d:%d %s %016llx\n", proc->pid, thread->pid, cmd == BR_DEAD_BINDER ? "BR_DEAD_BINDER" : "BR_CLEAR_DEATH_NOTIFICATION_DONE", - (u64)death->cookie); - + (u64)cookie); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) { - list_del(&w->entry); + binder_inner_proc_unlock(proc); kfree(death); binder_stats_deleted(BINDER_STAT_DEATH); - } else - list_move(&w->entry, &proc->delivered_death); + } else { + binder_enqueue_work_ilocked( + w, &proc->delivered_death); + binder_inner_proc_unlock(proc); + } + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + if (put_user(cookie, + (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + binder_stat_br(proc, thread, cmd); if (cmd == BR_DEAD_BINDER) goto done; /* DEAD_BINDER notifications can cause transactions */ } break; @@ -2897,16 +4105,14 @@ retry: BUG_ON(t->buffer == NULL); if (t->buffer->target_node) { struct binder_node *target_node = t->buffer->target_node; + struct binder_priority node_prio; tr.target.ptr = target_node->ptr; tr.cookie = target_node->cookie; - t->saved_priority = task_nice(current); - if (t->priority < target_node->min_priority && - !(t->flags & TF_ONE_WAY)) - binder_set_nice(t->priority); - else if (!(t->flags & TF_ONE_WAY) || - t->saved_priority > target_node->min_priority) - binder_set_nice(target_node->min_priority); + node_prio.sched_policy = target_node->sched_policy; + node_prio.prio = target_node->min_priority; + binder_transaction_priority(current, t, node_prio, + target_node->inherit_rt); cmd = BR_TRANSACTION; } else { tr.target.ptr = 0; @@ -2917,8 +4123,9 @@ retry: tr.flags = t->flags; tr.sender_euid = from_kuid(current_user_ns(), t->sender_euid); - if (t->from) { - struct task_struct *sender = t->from->proc->tsk; + t_from = binder_get_txn_from(t); + if (t_from) { + struct task_struct *sender = t_from->proc->tsk; tr.sender_pid = task_tgid_nr_ns(sender, task_active_pid_ns(current)); @@ -2928,18 +4135,24 @@ retry: tr.data_size = t->buffer->data_size; tr.offsets_size = t->buffer->offsets_size; - tr.data.ptr.buffer = (binder_uintptr_t)( - (uintptr_t)t->buffer->data + - proc->user_buffer_offset); + tr.data.ptr.buffer = (binder_uintptr_t) + ((uintptr_t)t->buffer->data + + binder_alloc_get_user_buffer_offset(&proc->alloc)); tr.data.ptr.offsets = tr.data.ptr.buffer + ALIGN(t->buffer->data_size, sizeof(void *)); - if (put_user(cmd, (uint32_t __user *)ptr)) + if (put_user(cmd, (uint32_t __user *)ptr)) { + if (t_from) + binder_thread_dec_tmpref(t_from); return -EFAULT; + } ptr += sizeof(uint32_t); - if (copy_to_user(ptr, &tr, sizeof(tr))) + if (copy_to_user(ptr, &tr, sizeof(tr))) { + if (t_from) + binder_thread_dec_tmpref(t_from); return -EFAULT; + } ptr += sizeof(tr); trace_binder_transaction_received(t); @@ -2949,21 +4162,22 @@ retry: proc->pid, thread->pid, (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" : "BR_REPLY", - t->debug_id, t->from ? t->from->proc->pid : 0, - t->from ? t->from->pid : 0, cmd, + t->debug_id, t_from ? t_from->proc->pid : 0, + t_from ? t_from->pid : 0, cmd, t->buffer->data_size, t->buffer->offsets_size, (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets); - list_del(&t->work.entry); + if (t_from) + binder_thread_dec_tmpref(t_from); t->buffer->allow_user_free = 1; if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) { + binder_inner_proc_lock(thread->proc); t->to_parent = thread->transaction_stack; t->to_thread = thread; thread->transaction_stack = t; + binder_inner_proc_unlock(thread->proc); } else { - t->buffer->transaction = NULL; - kfree(t); - binder_stats_deleted(BINDER_STAT_TRANSACTION); + binder_free_transaction(t); } break; } @@ -2971,29 +4185,36 @@ retry: done: *consumed = ptr - buffer; - if (proc->requested_threads + proc->ready_threads == 0 && + binder_inner_proc_lock(proc); + if (proc->requested_threads == 0 && + list_empty(&thread->proc->waiting_threads) && proc->requested_threads_started < proc->max_threads && (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */ /*spawn a new thread if we leave this out */) { proc->requested_threads++; + binder_inner_proc_unlock(proc); binder_debug(BINDER_DEBUG_THREADS, "%d:%d BR_SPAWN_LOOPER\n", proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; binder_stat_br(proc, thread, BR_SPAWN_LOOPER); - } + } else + binder_inner_proc_unlock(proc); return 0; } -static void binder_release_work(struct list_head *list) +static void binder_release_work(struct binder_proc *proc, + struct list_head *list) { struct binder_work *w; - while (!list_empty(list)) { - w = list_first_entry(list, struct binder_work, entry); - list_del_init(&w->entry); + while (1) { + w = binder_dequeue_work_head(proc, list); + if (!w) + return; + switch (w->type) { case BINDER_WORK_TRANSACTION: { struct binder_transaction *t; @@ -3006,11 +4227,17 @@ static void binder_release_work(struct list_head *list) binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered transaction %d\n", t->debug_id); - t->buffer->transaction = NULL; - kfree(t); - binder_stats_deleted(BINDER_STAT_TRANSACTION); + binder_free_transaction(t); } } break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered TRANSACTION_ERROR: %u\n", + e->cmd); + } break; case BINDER_WORK_TRANSACTION_COMPLETE: { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_COMPLETE\n"); @@ -3037,7 +4264,8 @@ static void binder_release_work(struct list_head *list) } -static struct binder_thread *binder_get_thread(struct binder_proc *proc) +static struct binder_thread *binder_get_thread_ilocked( + struct binder_proc *proc, struct binder_thread *new_thread) { struct binder_thread *thread = NULL; struct rb_node *parent = NULL; @@ -3052,38 +4280,102 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) else if (current->pid > thread->pid) p = &(*p)->rb_right; else - break; + return thread; } - if (*p == NULL) { - thread = kzalloc(sizeof(*thread), GFP_KERNEL); - if (thread == NULL) + if (!new_thread) + return NULL; + thread = new_thread; + binder_stats_created(BINDER_STAT_THREAD); + thread->proc = proc; + thread->pid = current->pid; + get_task_struct(current); + thread->task = current; + atomic_set(&thread->tmp_ref, 0); + init_waitqueue_head(&thread->wait); + INIT_LIST_HEAD(&thread->todo); + rb_link_node(&thread->rb_node, parent, p); + rb_insert_color(&thread->rb_node, &proc->threads); + thread->looper_need_return = true; + thread->return_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->return_error.cmd = BR_OK; + thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->reply_error.cmd = BR_OK; + INIT_LIST_HEAD(&new_thread->waiting_thread_node); + return thread; +} + +static struct binder_thread *binder_get_thread(struct binder_proc *proc) +{ + struct binder_thread *thread; + struct binder_thread *new_thread; + + binder_inner_proc_lock(proc); + thread = binder_get_thread_ilocked(proc, NULL); + binder_inner_proc_unlock(proc); + if (!thread) { + new_thread = kzalloc(sizeof(*thread), GFP_KERNEL); + if (new_thread == NULL) return NULL; - binder_stats_created(BINDER_STAT_THREAD); - thread->proc = proc; - thread->pid = current->pid; - init_waitqueue_head(&thread->wait); - INIT_LIST_HEAD(&thread->todo); - rb_link_node(&thread->rb_node, parent, p); - rb_insert_color(&thread->rb_node, &proc->threads); - thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN; - thread->return_error = BR_OK; - thread->return_error2 = BR_OK; + binder_inner_proc_lock(proc); + thread = binder_get_thread_ilocked(proc, new_thread); + binder_inner_proc_unlock(proc); + if (thread != new_thread) + kfree(new_thread); } return thread; } -static int binder_free_thread(struct binder_proc *proc, - struct binder_thread *thread) +static void binder_free_proc(struct binder_proc *proc) +{ + BUG_ON(!list_empty(&proc->todo)); + BUG_ON(!list_empty(&proc->delivered_death)); + binder_alloc_deferred_release(&proc->alloc); + put_task_struct(proc->tsk); + binder_stats_deleted(BINDER_STAT_PROC); + kfree(proc); +} + +static void binder_free_thread(struct binder_thread *thread) +{ + BUG_ON(!list_empty(&thread->todo)); + binder_stats_deleted(BINDER_STAT_THREAD); + binder_proc_dec_tmpref(thread->proc); + put_task_struct(thread->task); + kfree(thread); +} + +static int binder_thread_release(struct binder_proc *proc, + struct binder_thread *thread) { struct binder_transaction *t; struct binder_transaction *send_reply = NULL; int active_transactions = 0; + struct binder_transaction *last_t = NULL; + binder_inner_proc_lock(thread->proc); + /* + * take a ref on the proc so it survives + * after we remove this thread from proc->threads. + * The corresponding dec is when we actually + * free the thread in binder_free_thread() + */ + proc->tmp_ref++; + /* + * take a ref on this thread to ensure it + * survives while we are releasing it + */ + atomic_inc(&thread->tmp_ref); rb_erase(&thread->rb_node, &proc->threads); t = thread->transaction_stack; - if (t && t->to_thread == thread) - send_reply = t; + if (t) { + spin_lock(&t->lock); + if (t->to_thread == thread) + send_reply = t; + } + thread->is_dead = true; + while (t) { + last_t = t; active_transactions++; binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "release %d:%d transaction %d %s, still active\n", @@ -3104,12 +4396,16 @@ static int binder_free_thread(struct binder_proc *proc, t = t->from_parent; } else BUG(); + spin_unlock(&last_t->lock); + if (t) + spin_lock(&t->lock); } + binder_inner_proc_unlock(thread->proc); + if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); - binder_release_work(&thread->todo); - kfree(thread); - binder_stats_deleted(BINDER_STAT_THREAD); + binder_release_work(proc, &thread->todo); + binder_thread_dec_tmpref(thread); return active_transactions; } @@ -3118,30 +4414,21 @@ static unsigned int binder_poll(struct file *filp, { struct binder_proc *proc = filp->private_data; struct binder_thread *thread = NULL; - int wait_for_proc_work; - - binder_lock(proc->context, __func__); + bool wait_for_proc_work; thread = binder_get_thread(proc); - wait_for_proc_work = thread->transaction_stack == NULL && - list_empty(&thread->todo) && thread->return_error == BR_OK; + binder_inner_proc_lock(thread->proc); + thread->looper |= BINDER_LOOPER_STATE_POLL; + wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); - binder_unlock(proc->context, __func__); + binder_inner_proc_unlock(thread->proc); + + poll_wait(filp, &thread->wait, wait); + + if (binder_has_work(thread, wait_for_proc_work)) + return POLLIN; - if (wait_for_proc_work) { - if (binder_has_proc_work(proc, thread)) - return POLLIN; - poll_wait(filp, &proc->wait, wait); - if (binder_has_proc_work(proc, thread)) - return POLLIN; - } else { - if (binder_has_thread_work(thread)) - return POLLIN; - poll_wait(filp, &thread->wait, wait); - if (binder_has_thread_work(thread)) - return POLLIN; - } return 0; } @@ -3188,8 +4475,10 @@ static int binder_ioctl_write_read(struct file *filp, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); trace_binder_read_done(ret); - if (!list_empty(&proc->todo)) - wake_up_interruptible(&proc->wait); + binder_inner_proc_lock(proc); + if (!binder_worklist_empty_ilocked(&proc->todo)) + binder_wakeup_proc_ilocked(proc); + binder_inner_proc_unlock(proc); if (ret < 0) { if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; @@ -3214,9 +4503,10 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) int ret = 0; struct binder_proc *proc = filp->private_data; struct binder_context *context = proc->context; - + struct binder_node *new_node; kuid_t curr_euid = current_euid(); + mutex_lock(&context->context_mgr_node_lock); if (context->binder_context_mgr_node) { pr_err("BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; @@ -3237,24 +4527,52 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) } else { context->binder_context_mgr_uid = curr_euid; } - context->binder_context_mgr_node = binder_new_node(proc, 0, 0); - if (!context->binder_context_mgr_node) { + new_node = binder_new_node(proc, NULL); + if (!new_node) { ret = -ENOMEM; goto out; } - context->binder_context_mgr_node->local_weak_refs++; - context->binder_context_mgr_node->local_strong_refs++; - context->binder_context_mgr_node->has_strong_ref = 1; - context->binder_context_mgr_node->has_weak_ref = 1; + binder_node_lock(new_node); + new_node->local_weak_refs++; + new_node->local_strong_refs++; + new_node->has_strong_ref = 1; + new_node->has_weak_ref = 1; + context->binder_context_mgr_node = new_node; + binder_node_unlock(new_node); + binder_put_node(new_node); out: + mutex_unlock(&context->context_mgr_node_lock); return ret; } +static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, + struct binder_node_debug_info *info) { + struct rb_node *n; + binder_uintptr_t ptr = info->ptr; + + memset(info, 0, sizeof(*info)); + + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { + struct binder_node *node = rb_entry(n, struct binder_node, + rb_node); + if (node->ptr > ptr) { + info->ptr = node->ptr; + info->cookie = node->cookie; + info->has_strong_ref = node->has_strong_ref; + info->has_weak_ref = node->has_weak_ref; + break; + } + } + binder_inner_proc_unlock(proc); + + return 0; +} + static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; struct binder_proc *proc = filp->private_data; - struct binder_context *context = proc->context; struct binder_thread *thread; unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; @@ -3268,7 +4586,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (ret) goto err_unlocked; - binder_lock(context, __func__); thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; @@ -3281,12 +4598,19 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (ret) goto err; break; - case BINDER_SET_MAX_THREADS: - if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { + case BINDER_SET_MAX_THREADS: { + int max_threads; + + if (copy_from_user(&max_threads, ubuf, + sizeof(max_threads))) { ret = -EINVAL; goto err; } + binder_inner_proc_lock(proc); + proc->max_threads = max_threads; + binder_inner_proc_unlock(proc); break; + } case BINDER_SET_CONTEXT_MGR: ret = binder_ioctl_set_ctx_mgr(filp); if (ret) @@ -3295,7 +4619,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case BINDER_THREAD_EXIT: binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n", proc->pid, thread->pid); - binder_free_thread(proc, thread); + binder_thread_release(proc, thread); thread = NULL; break; case BINDER_VERSION: { @@ -3312,6 +4636,24 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } break; } + case BINDER_GET_NODE_DEBUG_INFO: { + struct binder_node_debug_info info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_node_debug_info(proc, &info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + break; + } default: ret = -EINVAL; goto err; @@ -3319,8 +4661,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = 0; err: if (thread) - thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN; - binder_unlock(context, __func__); + thread->looper_need_return = false; wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); @@ -3349,8 +4690,7 @@ static void binder_vma_close(struct vm_area_struct *vma) proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); - proc->vma = NULL; - proc->vma_vm_mm = NULL; + binder_alloc_vma_close(&proc->alloc); binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } @@ -3368,10 +4708,8 @@ static const struct vm_operations_struct binder_vm_ops = { static int binder_mmap(struct file *filp, struct vm_area_struct *vma) { int ret; - struct vm_struct *area; struct binder_proc *proc = filp->private_data; const char *failure_string; - struct binder_buffer *buffer; if (proc->tsk != current->group_leader) return -EINVAL; @@ -3380,8 +4718,8 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_end = vma->vm_start + SZ_4M; binder_debug(BINDER_DEBUG_OPEN_CLOSE, - "binder_mmap: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", - proc->pid, vma->vm_start, vma->vm_end, + "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", + __func__, proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); @@ -3391,73 +4729,15 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) goto err_bad_arg; } vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; - - mutex_lock(&proc->context->binder_mmap_lock); - if (proc->buffer) { - ret = -EBUSY; - failure_string = "already mapped"; - goto err_already_mapped; - } - - area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP); - if (area == NULL) { - ret = -ENOMEM; - failure_string = "get_vm_area"; - goto err_get_vm_area_failed; - } - proc->buffer = area->addr; - proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; - mutex_unlock(&proc->context->binder_mmap_lock); - -#ifdef CONFIG_CPU_CACHE_VIPT - if (cache_is_vipt_aliasing()) { - while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { - pr_info("binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); - vma->vm_start += PAGE_SIZE; - } - } -#endif - proc->pages = kzalloc(sizeof(proc->pages[0]) * ((vma->vm_end - vma->vm_start) / PAGE_SIZE), GFP_KERNEL); - if (proc->pages == NULL) { - ret = -ENOMEM; - failure_string = "alloc page array"; - goto err_alloc_pages_failed; - } - proc->buffer_size = vma->vm_end - vma->vm_start; - vma->vm_ops = &binder_vm_ops; vma->vm_private_data = proc; - if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)) { - ret = -ENOMEM; - failure_string = "alloc small buf"; - goto err_alloc_small_buf_failed; - } - buffer = proc->buffer; - INIT_LIST_HEAD(&proc->buffers); - list_add(&buffer->entry, &proc->buffers); - buffer->free = 1; - binder_insert_free_buffer(proc, buffer); - proc->free_async_space = proc->buffer_size / 2; - barrier(); + ret = binder_alloc_mmap_handler(&proc->alloc, vma); + if (ret) + return ret; proc->files = get_files_struct(current); - proc->vma = vma; - proc->vma_vm_mm = vma->vm_mm; - - /*pr_info("binder_mmap: %d %lx-%lx maps %p\n", - proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/ return 0; -err_alloc_small_buf_failed: - kfree(proc->pages); - proc->pages = NULL; -err_alloc_pages_failed: - mutex_lock(&proc->context->binder_mmap_lock); - vfree(proc->buffer); - proc->buffer = NULL; -err_get_vm_area_failed: -err_already_mapped: - mutex_unlock(&proc->context->binder_mmap_lock); err_bad_arg: pr_err("binder_mmap: %d %lx-%lx %s failed %d\n", proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); @@ -3475,24 +4755,33 @@ static int binder_open(struct inode *nodp, struct file *filp) proc = kzalloc(sizeof(*proc), GFP_KERNEL); if (proc == NULL) return -ENOMEM; + spin_lock_init(&proc->inner_lock); + spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; INIT_LIST_HEAD(&proc->todo); - init_waitqueue_head(&proc->wait); - proc->default_priority = task_nice(current); + if (binder_supported_policy(current->policy)) { + proc->default_priority.sched_policy = current->policy; + proc->default_priority.prio = current->normal_prio; + } else { + proc->default_priority.sched_policy = SCHED_NORMAL; + proc->default_priority.prio = NICE_TO_PRIO(0); + } + binder_dev = container_of(filp->private_data, struct binder_device, miscdev); proc->context = &binder_dev->context; - - binder_lock(proc->context, __func__); + binder_alloc_init(&proc->alloc); binder_stats_created(BINDER_STAT_PROC); - hlist_add_head(&proc->proc_node, &proc->context->binder_procs); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); + INIT_LIST_HEAD(&proc->waiting_threads); filp->private_data = proc; - binder_unlock(proc->context, __func__); + mutex_lock(&binder_procs_lock); + hlist_add_head(&proc->proc_node, &binder_procs); + mutex_unlock(&binder_procs_lock); if (binder_debugfs_dir_entry_proc) { char strbuf[11]; @@ -3528,16 +4817,17 @@ static void binder_deferred_flush(struct binder_proc *proc) struct rb_node *n; int wake_count = 0; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node); - thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN; + thread->looper_need_return = true; if (thread->looper & BINDER_LOOPER_STATE_WAITING) { wake_up_interruptible(&thread->wait); wake_count++; } } - wake_up_interruptible_all(&proc->wait); + binder_inner_proc_unlock(proc); binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_flush: %d woke %d threads\n", proc->pid, @@ -3557,15 +4847,22 @@ static int binder_release(struct inode *nodp, struct file *filp) static int binder_node_release(struct binder_node *node, int refs) { struct binder_ref *ref; - struct binder_context *context = node->proc->context; int death = 0; + struct binder_proc *proc = node->proc; - list_del_init(&node->work.entry); - binder_release_work(&node->async_todo); + binder_release_work(proc, &node->async_todo); - if (hlist_empty(&node->refs)) { - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); + binder_node_lock(node); + binder_inner_proc_lock(proc); + binder_dequeue_work_ilocked(&node->work); + /* + * The caller must have taken a temporary ref on the node, + */ + BUG_ON(!node->tmp_refs); + if (hlist_empty(&node->refs) && node->tmp_refs == 1) { + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + binder_free_node(node); return refs; } @@ -3573,45 +4870,58 @@ static int binder_node_release(struct binder_node *node, int refs) node->proc = NULL; node->local_strong_refs = 0; node->local_weak_refs = 0; - hlist_add_head(&node->dead_node, &context->binder_dead_nodes); + binder_inner_proc_unlock(proc); + + spin_lock(&binder_dead_nodes_lock); + hlist_add_head(&node->dead_node, &binder_dead_nodes); + spin_unlock(&binder_dead_nodes_lock); hlist_for_each_entry(ref, &node->refs, node_entry) { refs++; - - if (!ref->death) + /* + * Need the node lock to synchronize + * with new notification requests and the + * inner lock to synchronize with queued + * death notifications. + */ + binder_inner_proc_lock(ref->proc); + if (!ref->death) { + binder_inner_proc_unlock(ref->proc); continue; + } death++; - if (list_empty(&ref->death->work.entry)) { - ref->death->work.type = BINDER_WORK_DEAD_BINDER; - list_add_tail(&ref->death->work.entry, - &ref->proc->todo); - wake_up_interruptible(&ref->proc->wait); - } else - BUG(); + BUG_ON(!list_empty(&ref->death->work.entry)); + ref->death->work.type = BINDER_WORK_DEAD_BINDER; + binder_enqueue_work_ilocked(&ref->death->work, + &ref->proc->todo); + binder_wakeup_proc_ilocked(ref->proc); + binder_inner_proc_unlock(ref->proc); } binder_debug(BINDER_DEBUG_DEAD_BINDER, "node %d now dead, refs %d, death %d\n", node->debug_id, refs, death); + binder_node_unlock(node); + binder_put_node(node); return refs; } static void binder_deferred_release(struct binder_proc *proc) { - struct binder_transaction *t; struct binder_context *context = proc->context; struct rb_node *n; - int threads, nodes, incoming_refs, outgoing_refs, buffers, - active_transactions, page_count; + int threads, nodes, incoming_refs, outgoing_refs, active_transactions; - BUG_ON(proc->vma); BUG_ON(proc->files); + mutex_lock(&binder_procs_lock); hlist_del(&proc->proc_node); + mutex_unlock(&binder_procs_lock); + mutex_lock(&context->context_mgr_node_lock); if (context->binder_context_mgr_node && context->binder_context_mgr_node->proc == proc) { binder_debug(BINDER_DEBUG_DEAD_BINDER, @@ -3619,15 +4929,25 @@ static void binder_deferred_release(struct binder_proc *proc) __func__, proc->pid); context->binder_context_mgr_node = NULL; } + mutex_unlock(&context->context_mgr_node_lock); + binder_inner_proc_lock(proc); + /* + * Make sure proc stays alive after we + * remove all the threads + */ + proc->tmp_ref++; + proc->is_dead = true; threads = 0; active_transactions = 0; while ((n = rb_first(&proc->threads))) { struct binder_thread *thread; thread = rb_entry(n, struct binder_thread, rb_node); + binder_inner_proc_unlock(proc); threads++; - active_transactions += binder_free_thread(proc, thread); + active_transactions += binder_thread_release(proc, thread); + binder_inner_proc_lock(proc); } nodes = 0; @@ -3637,90 +4957,55 @@ static void binder_deferred_release(struct binder_proc *proc) node = rb_entry(n, struct binder_node, rb_node); nodes++; + /* + * take a temporary ref on the node before + * calling binder_node_release() which will either + * kfree() the node or call binder_put_node() + */ + binder_inc_node_tmpref_ilocked(node); rb_erase(&node->rb_node, &proc->nodes); - incoming_refs = binder_node_release(node, - incoming_refs); + binder_inner_proc_unlock(proc); + incoming_refs = binder_node_release(node, incoming_refs); + binder_inner_proc_lock(proc); } + binder_inner_proc_unlock(proc); outgoing_refs = 0; + binder_proc_lock(proc); while ((n = rb_first(&proc->refs_by_desc))) { struct binder_ref *ref; ref = rb_entry(n, struct binder_ref, rb_node_desc); outgoing_refs++; - binder_delete_ref(ref); - } - - binder_release_work(&proc->todo); - binder_release_work(&proc->delivered_death); - - buffers = 0; - while ((n = rb_first(&proc->allocated_buffers))) { - struct binder_buffer *buffer; - - buffer = rb_entry(n, struct binder_buffer, rb_node); - - t = buffer->transaction; - if (t) { - t->buffer = NULL; - buffer->transaction = NULL; - pr_err("release proc %d, transaction %d, not freed\n", - proc->pid, t->debug_id); - /*BUG();*/ - } - - binder_free_buf(proc, buffer); - buffers++; + binder_cleanup_ref_olocked(ref); + binder_proc_unlock(proc); + binder_free_ref(ref); + binder_proc_lock(proc); } + binder_proc_unlock(proc); - binder_stats_deleted(BINDER_STAT_PROC); - - page_count = 0; - if (proc->pages) { - int i; - - for (i = 0; i < proc->buffer_size / PAGE_SIZE; i++) { - void *page_addr; - - if (!proc->pages[i]) - continue; - - page_addr = proc->buffer + i * PAGE_SIZE; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%s: %d: page %d at %p not freed\n", - __func__, proc->pid, i, page_addr); - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); - __free_page(proc->pages[i]); - page_count++; - } - kfree(proc->pages); - vfree(proc->buffer); - } - - put_task_struct(proc->tsk); + binder_release_work(proc, &proc->todo); + binder_release_work(proc, &proc->delivered_death); binder_debug(BINDER_DEBUG_OPEN_CLOSE, - "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d, buffers %d, pages %d\n", + "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n", __func__, proc->pid, threads, nodes, incoming_refs, - outgoing_refs, active_transactions, buffers, page_count); + outgoing_refs, active_transactions); - kfree(proc); + binder_proc_dec_tmpref(proc); } static void binder_deferred_func(struct work_struct *work) { struct binder_proc *proc; struct files_struct *files; - struct binder_context *context = - container_of(work, struct binder_context, deferred_work); int defer; do { - binder_lock(context, __func__); - mutex_lock(&context->binder_deferred_lock); - if (!hlist_empty(&context->binder_deferred_list)) { - proc = hlist_entry(context->binder_deferred_list.first, + mutex_lock(&binder_deferred_lock); + if (!hlist_empty(&binder_deferred_list)) { + proc = hlist_entry(binder_deferred_list.first, struct binder_proc, deferred_work_node); hlist_del_init(&proc->deferred_work_node); defer = proc->deferred_work; @@ -3729,7 +5014,7 @@ static void binder_deferred_func(struct work_struct *work) proc = NULL; defer = 0; } - mutex_unlock(&context->binder_deferred_lock); + mutex_unlock(&binder_deferred_lock); files = NULL; if (defer & BINDER_DEFERRED_PUT_FILES) { @@ -3744,61 +5029,71 @@ static void binder_deferred_func(struct work_struct *work) if (defer & BINDER_DEFERRED_RELEASE) binder_deferred_release(proc); /* frees proc */ - binder_unlock(context, __func__); if (files) put_files_struct(files); } while (proc); } +static DECLARE_WORK(binder_deferred_work, binder_deferred_func); static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) { - mutex_lock(&proc->context->binder_deferred_lock); + mutex_lock(&binder_deferred_lock); proc->deferred_work |= defer; if (hlist_unhashed(&proc->deferred_work_node)) { hlist_add_head(&proc->deferred_work_node, - &proc->context->binder_deferred_list); - queue_work(proc->context->binder_deferred_workqueue, - &proc->context->deferred_work); + &binder_deferred_list); + queue_work(binder_deferred_workqueue, &binder_deferred_work); } - mutex_unlock(&proc->context->binder_deferred_lock); + mutex_unlock(&binder_deferred_lock); } -static void print_binder_transaction(struct seq_file *m, const char *prefix, - struct binder_transaction *t) +static void print_binder_transaction_ilocked(struct seq_file *m, + struct binder_proc *proc, + const char *prefix, + struct binder_transaction *t) { + struct binder_proc *to_proc; + struct binder_buffer *buffer = t->buffer; + + spin_lock(&t->lock); + to_proc = t->to_proc; seq_printf(m, - "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", + "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %d:%d r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, - t->to_proc ? t->to_proc->pid : 0, + to_proc ? to_proc->pid : 0, t->to_thread ? t->to_thread->pid : 0, - t->code, t->flags, t->priority, t->need_reply); - if (t->buffer == NULL) { + t->code, t->flags, t->priority.sched_policy, + t->priority.prio, t->need_reply); + spin_unlock(&t->lock); + + if (proc != to_proc) { + /* + * Can only safely deref buffer if we are holding the + * correct proc inner lock for this node + */ + seq_puts(m, "\n"); + return; + } + + if (buffer == NULL) { seq_puts(m, " buffer free\n"); return; } - if (t->buffer->target_node) - seq_printf(m, " node %d", - t->buffer->target_node->debug_id); + if (buffer->target_node) + seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd data %p\n", - t->buffer->data_size, t->buffer->offsets_size, - t->buffer->data); -} - -static void print_binder_buffer(struct seq_file *m, const char *prefix, - struct binder_buffer *buffer) -{ - seq_printf(m, "%s %d: %p size %zd:%zd %s\n", - prefix, buffer->debug_id, buffer->data, buffer->data_size, buffer->offsets_size, - buffer->transaction ? "active" : "delivered"); + buffer->data); } -static void print_binder_work(struct seq_file *m, const char *prefix, - const char *transaction_prefix, - struct binder_work *w) +static void print_binder_work_ilocked(struct seq_file *m, + struct binder_proc *proc, + const char *prefix, + const char *transaction_prefix, + struct binder_work *w) { struct binder_node *node; struct binder_transaction *t; @@ -3806,8 +5101,16 @@ static void print_binder_work(struct seq_file *m, const char *prefix, switch (w->type) { case BINDER_WORK_TRANSACTION: t = container_of(w, struct binder_transaction, work); - print_binder_transaction(m, transaction_prefix, t); + print_binder_transaction_ilocked( + m, proc, transaction_prefix, t); break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + seq_printf(m, "%stransaction error: %u\n", + prefix, e->cmd); + } break; case BINDER_WORK_TRANSACTION_COMPLETE: seq_printf(m, "%stransaction complete\n", prefix); break; @@ -3832,40 +5135,46 @@ static void print_binder_work(struct seq_file *m, const char *prefix, } } -static void print_binder_thread(struct seq_file *m, - struct binder_thread *thread, - int print_always) +static void print_binder_thread_ilocked(struct seq_file *m, + struct binder_thread *thread, + int print_always) { struct binder_transaction *t; struct binder_work *w; size_t start_pos = m->count; size_t header_pos; - seq_printf(m, " thread %d: l %02x\n", thread->pid, thread->looper); + seq_printf(m, " thread %d: l %02x need_return %d tr %d\n", + thread->pid, thread->looper, + thread->looper_need_return, + atomic_read(&thread->tmp_ref)); header_pos = m->count; t = thread->transaction_stack; while (t) { if (t->from == thread) { - print_binder_transaction(m, - " outgoing transaction", t); + print_binder_transaction_ilocked(m, thread->proc, + " outgoing transaction", t); t = t->from_parent; } else if (t->to_thread == thread) { - print_binder_transaction(m, + print_binder_transaction_ilocked(m, thread->proc, " incoming transaction", t); t = t->to_parent; } else { - print_binder_transaction(m, " bad transaction", t); + print_binder_transaction_ilocked(m, thread->proc, + " bad transaction", t); t = NULL; } } list_for_each_entry(w, &thread->todo, entry) { - print_binder_work(m, " ", " pending transaction", w); + print_binder_work_ilocked(m, thread->proc, " ", + " pending transaction", w); } if (!print_always && m->count == header_pos) m->count = start_pos; } -static void print_binder_node(struct seq_file *m, struct binder_node *node) +static void print_binder_node_nilocked(struct seq_file *m, + struct binder_node *node) { struct binder_ref *ref; struct binder_work *w; @@ -3875,27 +5184,35 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) hlist_for_each_entry(ref, &node->refs, node_entry) count++; - seq_printf(m, " node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d", + seq_printf(m, " node %d: u%016llx c%016llx pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d", node->debug_id, (u64)node->ptr, (u64)node->cookie, + node->sched_policy, node->min_priority, node->has_strong_ref, node->has_weak_ref, node->local_strong_refs, node->local_weak_refs, - node->internal_strong_refs, count); + node->internal_strong_refs, count, node->tmp_refs); if (count) { seq_puts(m, " proc"); hlist_for_each_entry(ref, &node->refs, node_entry) seq_printf(m, " %d", ref->proc->pid); } seq_puts(m, "\n"); - list_for_each_entry(w, &node->async_todo, entry) - print_binder_work(m, " ", - " pending async transaction", w); + if (node->proc) { + list_for_each_entry(w, &node->async_todo, entry) + print_binder_work_ilocked(m, node->proc, " ", + " pending async transaction", w); + } } -static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) +static void print_binder_ref_olocked(struct seq_file *m, + struct binder_ref *ref) { - seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %p\n", - ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ", - ref->node->debug_id, ref->strong, ref->weak, ref->death); + binder_node_lock(ref->node); + seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", + ref->data.debug_id, ref->data.desc, + ref->node->proc ? "" : "dead ", + ref->node->debug_id, ref->data.strong, + ref->data.weak, ref->death); + binder_node_unlock(ref->node); } static void print_binder_proc(struct seq_file *m, @@ -3905,36 +5222,60 @@ static void print_binder_proc(struct seq_file *m, struct rb_node *n; size_t start_pos = m->count; size_t header_pos; + struct binder_node *last_node = NULL; seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); header_pos = m->count; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) - print_binder_thread(m, rb_entry(n, struct binder_thread, + print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread, rb_node), print_all); + for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); - if (print_all || node->has_async_transaction) - print_binder_node(m, node); - } + /* + * take a temporary reference on the node so it + * survives and isn't removed from the tree + * while we print it. + */ + binder_inc_node_tmpref_ilocked(node); + /* Need to drop inner lock to take node lock */ + binder_inner_proc_unlock(proc); + if (last_node) + binder_put_node(last_node); + binder_node_inner_lock(node); + print_binder_node_nilocked(m, node); + binder_node_inner_unlock(node); + last_node = node; + binder_inner_proc_lock(proc); + } + binder_inner_proc_unlock(proc); + if (last_node) + binder_put_node(last_node); + if (print_all) { + binder_proc_lock(proc); for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) - print_binder_ref(m, rb_entry(n, struct binder_ref, - rb_node_desc)); + print_binder_ref_olocked(m, rb_entry(n, + struct binder_ref, + rb_node_desc)); + binder_proc_unlock(proc); } - for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n)) - print_binder_buffer(m, " buffer", - rb_entry(n, struct binder_buffer, rb_node)); + binder_alloc_print_allocated(m, &proc->alloc); + binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) - print_binder_work(m, " ", " pending transaction", w); + print_binder_work_ilocked(m, proc, " ", + " pending transaction", w); list_for_each_entry(w, &proc->delivered_death, entry) { seq_puts(m, " has delivered dead binder\n"); break; } + binder_inner_proc_unlock(proc); if (!print_all && m->count == header_pos) m->count = start_pos; } @@ -3992,54 +5333,45 @@ static const char * const binder_objstat_strings[] = { "transaction_complete" }; -static void add_binder_stats(struct binder_stats *from, struct binder_stats *to) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(to->bc); i++) - to->bc[i] += from->bc[i]; - - for (i = 0; i < ARRAY_SIZE(to->br); i++) - to->br[i] += from->br[i]; -} - static void print_binder_stats(struct seq_file *m, const char *prefix, - struct binder_stats *stats, - struct binder_obj_stats *obj_stats) + struct binder_stats *stats) { int i; BUILD_BUG_ON(ARRAY_SIZE(stats->bc) != ARRAY_SIZE(binder_command_strings)); for (i = 0; i < ARRAY_SIZE(stats->bc); i++) { - if (stats->bc[i]) + int temp = atomic_read(&stats->bc[i]); + + if (temp) seq_printf(m, "%s%s: %d\n", prefix, - binder_command_strings[i], stats->bc[i]); + binder_command_strings[i], temp); } BUILD_BUG_ON(ARRAY_SIZE(stats->br) != ARRAY_SIZE(binder_return_strings)); for (i = 0; i < ARRAY_SIZE(stats->br); i++) { - if (stats->br[i]) + int temp = atomic_read(&stats->br[i]); + + if (temp) seq_printf(m, "%s%s: %d\n", prefix, - binder_return_strings[i], stats->br[i]); + binder_return_strings[i], temp); } - if (!obj_stats) - return; - - BUILD_BUG_ON(ARRAY_SIZE(obj_stats->obj_created) != + BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != ARRAY_SIZE(binder_objstat_strings)); - BUILD_BUG_ON(ARRAY_SIZE(obj_stats->obj_created) != - ARRAY_SIZE(obj_stats->obj_deleted)); - for (i = 0; i < ARRAY_SIZE(obj_stats->obj_created); i++) { - int obj_created = atomic_read(&obj_stats->obj_created[i]); - int obj_deleted = atomic_read(&obj_stats->obj_deleted[i]); - - if (obj_created || obj_deleted) - seq_printf(m, "%s%s: active %d total %d\n", prefix, - binder_objstat_strings[i], - obj_created - obj_deleted, obj_created); + BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != + ARRAY_SIZE(stats->obj_deleted)); + for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) { + int created = atomic_read(&stats->obj_created[i]); + int deleted = atomic_read(&stats->obj_deleted[i]); + + if (created || deleted) + seq_printf(m, "%s%s: active %d total %d\n", + prefix, + binder_objstat_strings[i], + created - deleted, + created); } } @@ -4047,226 +5379,193 @@ static void print_binder_proc_stats(struct seq_file *m, struct binder_proc *proc) { struct binder_work *w; + struct binder_thread *thread; struct rb_node *n; - int count, strong, weak; + int count, strong, weak, ready_threads; + size_t free_async_space = + binder_alloc_get_free_async_space(&proc->alloc); seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); count = 0; + ready_threads = 0; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) count++; + + list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node) + ready_threads++; + seq_printf(m, " threads: %d\n", count); seq_printf(m, " requested threads: %d+%d/%d\n" " ready threads %d\n" " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, - proc->ready_threads, proc->free_async_space); + ready_threads, + free_async_space); count = 0; for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) count++; + binder_inner_proc_unlock(proc); seq_printf(m, " nodes: %d\n", count); count = 0; strong = 0; weak = 0; + binder_proc_lock(proc); for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) { struct binder_ref *ref = rb_entry(n, struct binder_ref, rb_node_desc); count++; - strong += ref->strong; - weak += ref->weak; + strong += ref->data.strong; + weak += ref->data.weak; } + binder_proc_unlock(proc); seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); - count = 0; - for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n)) - count++; + count = binder_alloc_get_allocated_count(&proc->alloc); seq_printf(m, " buffers: %d\n", count); count = 0; + binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) { - switch (w->type) { - case BINDER_WORK_TRANSACTION: + if (w->type == BINDER_WORK_TRANSACTION) count++; - break; - default: - break; - } } + binder_inner_proc_unlock(proc); seq_printf(m, " pending transactions: %d\n", count); - print_binder_stats(m, " ", &proc->stats, NULL); + print_binder_stats(m, " ", &proc->stats); } static int binder_state_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *proc; struct binder_node *node; - int do_lock = !binder_debug_no_lock; - bool wrote_dead_nodes_header = false; + struct binder_node *last_node = NULL; seq_puts(m, "binder state:\n"); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - if (!wrote_dead_nodes_header && - !hlist_empty(&context->binder_dead_nodes)) { - seq_puts(m, "dead nodes:\n"); - wrote_dead_nodes_header = true; - } - hlist_for_each_entry(node, &context->binder_dead_nodes, - dead_node) - print_binder_node(m, node); - - if (do_lock) - binder_unlock(context, __func__); - } - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); + spin_lock(&binder_dead_nodes_lock); + if (!hlist_empty(&binder_dead_nodes)) + seq_puts(m, "dead nodes:\n"); + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) { + /* + * take a temporary reference on the node so it + * survives and isn't removed from the list + * while we print it. + */ + node->tmp_refs++; + spin_unlock(&binder_dead_nodes_lock); + if (last_node) + binder_put_node(last_node); + binder_node_lock(node); + print_binder_node_nilocked(m, node); + binder_node_unlock(node); + last_node = node; + spin_lock(&binder_dead_nodes_lock); + } + spin_unlock(&binder_dead_nodes_lock); + if (last_node) + binder_put_node(last_node); + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc(m, proc, 1); + mutex_unlock(&binder_procs_lock); - hlist_for_each_entry(proc, &context->binder_procs, proc_node) - print_binder_proc(m, proc, 1); - if (do_lock) - binder_unlock(context, __func__); - } return 0; } static int binder_stats_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *proc; - struct binder_stats total_binder_stats; - int do_lock = !binder_debug_no_lock; - - memset(&total_binder_stats, 0, sizeof(struct binder_stats)); - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - - add_binder_stats(&context->binder_stats, &total_binder_stats); - - if (do_lock) - binder_unlock(context, __func__); - } seq_puts(m, "binder stats:\n"); - print_binder_stats(m, "", &total_binder_stats, &binder_obj_stats); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); + print_binder_stats(m, "", &binder_stats); + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc_stats(m, proc); + mutex_unlock(&binder_procs_lock); - hlist_for_each_entry(proc, &context->binder_procs, proc_node) - print_binder_proc_stats(m, proc); - if (do_lock) - binder_unlock(context, __func__); - } return 0; } static int binder_transactions_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *proc; - int do_lock = !binder_debug_no_lock; seq_puts(m, "binder transactions:\n"); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - - hlist_for_each_entry(proc, &context->binder_procs, proc_node) - print_binder_proc(m, proc, 0); - if (do_lock) - binder_unlock(context, __func__); - } + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc(m, proc, 0); + mutex_unlock(&binder_procs_lock); + return 0; } static int binder_proc_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *itr; int pid = (unsigned long)m->private; - int do_lock = !binder_debug_no_lock; - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - - hlist_for_each_entry(itr, &context->binder_procs, proc_node) { - if (itr->pid == pid) { - seq_puts(m, "binder proc state:\n"); - print_binder_proc(m, itr, 1); - } + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(itr, &binder_procs, proc_node) { + if (itr->pid == pid) { + seq_puts(m, "binder proc state:\n"); + print_binder_proc(m, itr, 1); } - if (do_lock) - binder_unlock(context, __func__); } + mutex_unlock(&binder_procs_lock); + return 0; } static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { + int debug_id = READ_ONCE(e->debug_id_done); + /* + * read barrier to guarantee debug_id_done read before + * we print the log values + */ + smp_rmb(); seq_printf(m, - "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d\n", + "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d", e->debug_id, (e->call_type == 2) ? "reply" : ((e->call_type == 1) ? "async" : "call "), e->from_proc, e->from_thread, e->to_proc, e->to_thread, e->context_name, - e->to_node, e->target_handle, e->data_size, e->offsets_size); -} - -static int print_binder_transaction_log(struct seq_file *m, - struct binder_transaction_log *log) -{ - int i; - if (log->full) { - for (i = log->next; i < ARRAY_SIZE(log->entry); i++) - print_binder_transaction_log_entry(m, &log->entry[i]); - } - for (i = 0; i < log->next; i++) - print_binder_transaction_log_entry(m, &log->entry[i]); - return 0; + e->to_node, e->target_handle, e->data_size, e->offsets_size, + e->return_error, e->return_error_param, + e->return_error_line); + /* + * read-barrier to guarantee read of debug_id_done after + * done printing the fields of the entry + */ + smp_rmb(); + seq_printf(m, debug_id && debug_id == READ_ONCE(e->debug_id_done) ? + "\n" : " (incomplete)\n"); } static int binder_transaction_log_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - print_binder_transaction_log(m, &context->transaction_log); - } - return 0; -} + struct binder_transaction_log *log = m->private; + unsigned int log_cur = atomic_read(&log->cur); + unsigned int count; + unsigned int cur; + int i; -static int binder_failed_transaction_log_show(struct seq_file *m, void *unused) -{ - struct binder_device *device; - struct binder_context *context; + count = log_cur + 1; + cur = count < ARRAY_SIZE(log->entry) && !log->full ? + 0 : count % ARRAY_SIZE(log->entry); + if (count > ARRAY_SIZE(log->entry) || log->full) + count = ARRAY_SIZE(log->entry); + for (i = 0; i < count; i++) { + unsigned int index = cur++ % ARRAY_SIZE(log->entry); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - print_binder_transaction_log(m, - &context->transaction_log_failed); + print_binder_transaction_log_entry(m, &log->entry[index]); } return 0; } @@ -4286,20 +5585,11 @@ BINDER_DEBUG_ENTRY(state); BINDER_DEBUG_ENTRY(stats); BINDER_DEBUG_ENTRY(transactions); BINDER_DEBUG_ENTRY(transaction_log); -BINDER_DEBUG_ENTRY(failed_transaction_log); - -static void __init free_binder_device(struct binder_device *device) -{ - if (device->context.binder_deferred_workqueue) - destroy_workqueue(device->context.binder_deferred_workqueue); - kfree(device); -} static int __init init_binder_device(const char *name) { int ret; struct binder_device *binder_device; - struct binder_context *context; binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL); if (!binder_device) @@ -4309,65 +5599,34 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; - context = &binder_device->context; - context->binder_context_mgr_uid = INVALID_UID; - context->name = name; - - mutex_init(&context->binder_main_lock); - mutex_init(&context->binder_deferred_lock); - mutex_init(&context->binder_mmap_lock); - - context->binder_deferred_workqueue = - create_singlethread_workqueue(name); - - if (!context->binder_deferred_workqueue) { - ret = -ENOMEM; - goto err_create_singlethread_workqueue_failed; - } - - INIT_HLIST_HEAD(&context->binder_procs); - INIT_HLIST_HEAD(&context->binder_dead_nodes); - INIT_HLIST_HEAD(&context->binder_deferred_list); - INIT_WORK(&context->deferred_work, binder_deferred_func); + binder_device->context.binder_context_mgr_uid = INVALID_UID; + binder_device->context.name = name; + mutex_init(&binder_device->context.context_mgr_node_lock); ret = misc_register(&binder_device->miscdev); if (ret < 0) { - goto err_misc_register_failed; + kfree(binder_device); + return ret; } hlist_add_head(&binder_device->hlist, &binder_devices); - return ret; - -err_create_singlethread_workqueue_failed: -err_misc_register_failed: - free_binder_device(binder_device); return ret; } static int __init binder_init(void) { - int ret = 0; + int ret; char *device_name, *device_names; struct binder_device *device; struct hlist_node *tmp; - /* - * Copy the module_parameter string, because we don't want to - * tokenize it in-place. - */ - device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); - if (!device_names) + atomic_set(&binder_transaction_log.cur, ~0U); + atomic_set(&binder_transaction_log_failed.cur, ~0U); + binder_deferred_workqueue = create_singlethread_workqueue("binder"); + if (!binder_deferred_workqueue) return -ENOMEM; - strcpy(device_names, binder_devices_param); - - while ((device_name = strsep(&device_names, ","))) { - ret = init_binder_device(device_name); - if (ret) - goto err_init_binder_device_failed; - } - binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); if (binder_debugfs_dir_entry_root) binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", @@ -4392,13 +5651,30 @@ static int __init binder_init(void) debugfs_create_file("transaction_log", S_IRUGO, binder_debugfs_dir_entry_root, - NULL, + &binder_transaction_log, &binder_transaction_log_fops); debugfs_create_file("failed_transaction_log", S_IRUGO, binder_debugfs_dir_entry_root, - NULL, - &binder_failed_transaction_log_fops); + &binder_transaction_log_failed, + &binder_transaction_log_fops); + } + + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } + strcpy(device_names, binder_devices_param); + + while ((device_name = strsep(&device_names, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; } return ret; @@ -4407,8 +5683,12 @@ err_init_binder_device_failed: hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { misc_deregister(&device->miscdev); hlist_del(&device->hlist); - free_binder_device(device); + kfree(device); } +err_alloc_device_names_failed: + debugfs_remove_recursive(binder_debugfs_dir_entry_root); + + destroy_workqueue(binder_deferred_workqueue); return ret; } diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c new file mode 100644 index 000000000000..aabfebac6e57 --- /dev/null +++ b/drivers/android/binder_alloc.c @@ -0,0 +1,802 @@ +/* binder_alloc.c + * + * Android IPC Subsystem + * + * Copyright (C) 2007-2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <asm/cacheflush.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/rtmutex.h> +#include <linux/rbtree.h> +#include <linux/seq_file.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include "binder_alloc.h" +#include "binder_trace.h" + +static DEFINE_MUTEX(binder_alloc_mmap_lock); + +enum { + BINDER_DEBUG_OPEN_CLOSE = 1U << 1, + BINDER_DEBUG_BUFFER_ALLOC = 1U << 2, + BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3, +}; +static uint32_t binder_alloc_debug_mask; + +module_param_named(debug_mask, binder_alloc_debug_mask, + uint, S_IWUSR | S_IRUGO); + +#define binder_alloc_debug(mask, x...) \ + do { \ + if (binder_alloc_debug_mask & mask) \ + pr_info(x); \ + } while (0) + +static size_t binder_alloc_buffer_size(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + if (list_is_last(&buffer->entry, &alloc->buffers)) + return alloc->buffer + + alloc->buffer_size - (void *)buffer->data; + return (size_t)list_entry(buffer->entry.next, + struct binder_buffer, entry) - (size_t)buffer->data; +} + +static void binder_insert_free_buffer(struct binder_alloc *alloc, + struct binder_buffer *new_buffer) +{ + struct rb_node **p = &alloc->free_buffers.rb_node; + struct rb_node *parent = NULL; + struct binder_buffer *buffer; + size_t buffer_size; + size_t new_buffer_size; + + BUG_ON(!new_buffer->free); + + new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: add free buffer, size %zd, at %pK\n", + alloc->pid, new_buffer_size, new_buffer); + + while (*p) { + parent = *p; + buffer = rb_entry(parent, struct binder_buffer, rb_node); + BUG_ON(!buffer->free); + + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + if (new_buffer_size < buffer_size) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&new_buffer->rb_node, parent, p); + rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers); +} + +static void binder_insert_allocated_buffer_locked( + struct binder_alloc *alloc, struct binder_buffer *new_buffer) +{ + struct rb_node **p = &alloc->allocated_buffers.rb_node; + struct rb_node *parent = NULL; + struct binder_buffer *buffer; + + BUG_ON(new_buffer->free); + + while (*p) { + parent = *p; + buffer = rb_entry(parent, struct binder_buffer, rb_node); + BUG_ON(buffer->free); + + if (new_buffer < buffer) + p = &parent->rb_left; + else if (new_buffer > buffer) + p = &parent->rb_right; + else + BUG(); + } + rb_link_node(&new_buffer->rb_node, parent, p); + rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); +} + +static struct binder_buffer *binder_alloc_prepare_to_free_locked( + struct binder_alloc *alloc, + uintptr_t user_ptr) +{ + struct rb_node *n = alloc->allocated_buffers.rb_node; + struct binder_buffer *buffer; + struct binder_buffer *kern_ptr; + + kern_ptr = (struct binder_buffer *)(user_ptr - alloc->user_buffer_offset + - offsetof(struct binder_buffer, data)); + + while (n) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + BUG_ON(buffer->free); + + if (kern_ptr < buffer) + n = n->rb_left; + else if (kern_ptr > buffer) + n = n->rb_right; + else { + /* + * Guard against user threads attempting to + * free the buffer twice + */ + if (buffer->free_in_progress) { + pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n", + alloc->pid, current->pid, (u64)user_ptr); + return NULL; + } + buffer->free_in_progress = 1; + return buffer; + } + } + return NULL; +} + +/** + * binder_alloc_buffer_lookup() - get buffer given user ptr + * @alloc: binder_alloc for this proc + * @user_ptr: User pointer to buffer data + * + * Validate userspace pointer to buffer data and return buffer corresponding to + * that user pointer. Search the rb tree for buffer that matches user data + * pointer. + * + * Return: Pointer to buffer or NULL + */ +struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, + uintptr_t user_ptr) +{ + struct binder_buffer *buffer; + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_prepare_to_free_locked(alloc, user_ptr); + mutex_unlock(&alloc->mutex); + return buffer; +} + +static int binder_update_page_range(struct binder_alloc *alloc, int allocate, + void *start, void *end, + struct vm_area_struct *vma) +{ + void *page_addr; + unsigned long user_page_addr; + struct page **page; + struct mm_struct *mm; + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: %s pages %pK-%pK\n", alloc->pid, + allocate ? "allocate" : "free", start, end); + + if (end <= start) + return 0; + + trace_binder_update_page_range(alloc, allocate, start, end); + + if (vma) + mm = NULL; + else + mm = get_task_mm(alloc->tsk); + + if (mm) { + down_write(&mm->mmap_sem); + vma = alloc->vma; + if (vma && mm != alloc->vma_vm_mm) { + pr_err("%d: vma mm and task mm mismatch\n", + alloc->pid); + vma = NULL; + } + } + + if (allocate == 0) + goto free_range; + + if (vma == NULL) { + pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n", + alloc->pid); + goto err_no_vma; + } + + for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { + int ret; + + page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; + + BUG_ON(*page); + *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (*page == NULL) { + pr_err("%d: binder_alloc_buf failed for page at %pK\n", + alloc->pid, page_addr); + goto err_alloc_page_failed; + } + ret = map_kernel_range_noflush((unsigned long)page_addr, + PAGE_SIZE, PAGE_KERNEL, page); + flush_cache_vmap((unsigned long)page_addr, + (unsigned long)page_addr + PAGE_SIZE); + if (ret != 1) { + pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", + alloc->pid, page_addr); + goto err_map_kernel_failed; + } + user_page_addr = + (uintptr_t)page_addr + alloc->user_buffer_offset; + ret = vm_insert_page(vma, user_page_addr, page[0]); + if (ret) { + pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", + alloc->pid, user_page_addr); + goto err_vm_insert_page_failed; + } + /* vm_insert_page does not seem to increment the refcount */ + } + if (mm) { + up_write(&mm->mmap_sem); + mmput(mm); + } + return 0; + +free_range: + for (page_addr = end - PAGE_SIZE; page_addr >= start; + page_addr -= PAGE_SIZE) { + page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; + if (vma) + zap_page_range(vma, (uintptr_t)page_addr + + alloc->user_buffer_offset, PAGE_SIZE, NULL); +err_vm_insert_page_failed: + unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); +err_map_kernel_failed: + __free_page(*page); + *page = NULL; +err_alloc_page_failed: + ; + } +err_no_vma: + if (mm) { + up_write(&mm->mmap_sem); + mmput(mm); + } + return vma ? -ENOMEM : -ESRCH; +} + +struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) +{ + struct rb_node *n = alloc->free_buffers.rb_node; + struct binder_buffer *buffer; + size_t buffer_size; + struct rb_node *best_fit = NULL; + void *has_page_addr; + void *end_page_addr; + size_t size, data_offsets_size; + int ret; + + if (alloc->vma == NULL) { + pr_err("%d: binder_alloc_buf, no vma\n", + alloc->pid); + return ERR_PTR(-ESRCH); + } + + data_offsets_size = ALIGN(data_size, sizeof(void *)) + + ALIGN(offsets_size, sizeof(void *)); + + if (data_offsets_size < data_size || data_offsets_size < offsets_size) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid size %zd-%zd\n", + alloc->pid, data_size, offsets_size); + return ERR_PTR(-EINVAL); + } + size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); + if (size < data_offsets_size || size < extra_buffers_size) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid extra_buffers_size %zd\n", + alloc->pid, extra_buffers_size); + return ERR_PTR(-EINVAL); + } + if (is_async && + alloc->free_async_space < size + sizeof(struct binder_buffer)) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd failed, no async space left\n", + alloc->pid, size); + return ERR_PTR(-ENOSPC); + } + + while (n) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + BUG_ON(!buffer->free); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + if (size < buffer_size) { + best_fit = n; + n = n->rb_left; + } else if (size > buffer_size) + n = n->rb_right; + else { + best_fit = n; + break; + } + } + if (best_fit == NULL) { + size_t allocated_buffers = 0; + size_t largest_alloc_size = 0; + size_t total_alloc_size = 0; + size_t free_buffers = 0; + size_t largest_free_size = 0; + size_t total_free_size = 0; + + for (n = rb_first(&alloc->allocated_buffers); n != NULL; + n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + allocated_buffers++; + total_alloc_size += buffer_size; + if (buffer_size > largest_alloc_size) + largest_alloc_size = buffer_size; + } + for (n = rb_first(&alloc->free_buffers); n != NULL; + n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + free_buffers++; + total_free_size += buffer_size; + if (buffer_size > largest_free_size) + largest_free_size = buffer_size; + } + pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", + alloc->pid, size); + pr_err("allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n", + total_alloc_size, allocated_buffers, largest_alloc_size, + total_free_size, free_buffers, largest_free_size); + return ERR_PTR(-ENOSPC); + } + if (n == NULL) { + buffer = rb_entry(best_fit, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + } + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", + alloc->pid, size, buffer, buffer_size); + + has_page_addr = + (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK); + if (n == NULL) { + if (size + sizeof(struct binder_buffer) + 4 >= buffer_size) + buffer_size = size; /* no room for other buffers */ + else + buffer_size = size + sizeof(struct binder_buffer); + } + end_page_addr = + (void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size); + if (end_page_addr > has_page_addr) + end_page_addr = has_page_addr; + ret = binder_update_page_range(alloc, 1, + (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL); + if (ret) + return ERR_PTR(ret); + + rb_erase(best_fit, &alloc->free_buffers); + buffer->free = 0; + buffer->free_in_progress = 0; + binder_insert_allocated_buffer_locked(alloc, buffer); + if (buffer_size != size) { + struct binder_buffer *new_buffer = (void *)buffer->data + size; + + list_add(&new_buffer->entry, &buffer->entry); + new_buffer->free = 1; + binder_insert_free_buffer(alloc, new_buffer); + } + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got %pK\n", + alloc->pid, size, buffer); + buffer->data_size = data_size; + buffer->offsets_size = offsets_size; + buffer->async_transaction = is_async; + buffer->extra_buffers_size = extra_buffers_size; + if (is_async) { + alloc->free_async_space -= size + sizeof(struct binder_buffer); + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + "%d: binder_alloc_buf size %zd async free %zd\n", + alloc->pid, size, alloc->free_async_space); + } + return buffer; +} + +/** + * binder_alloc_new_buf() - Allocate a new binder buffer + * @alloc: binder_alloc for this proc + * @data_size: size of user data buffer + * @offsets_size: user specified buffer offset + * @extra_buffers_size: size of extra space for meta-data (eg, security context) + * @is_async: buffer for async transaction + * + * Allocate a new buffer given the requested sizes. Returns + * the kernel version of the buffer pointer. The size allocated + * is the sum of the three given sizes (each rounded up to + * pointer-sized boundary) + * + * Return: The allocated buffer or %NULL if error + */ +struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) +{ + struct binder_buffer *buffer; + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_new_buf_locked(alloc, data_size, offsets_size, + extra_buffers_size, is_async); + mutex_unlock(&alloc->mutex); + return buffer; +} + +static void *buffer_start_page(struct binder_buffer *buffer) +{ + return (void *)((uintptr_t)buffer & PAGE_MASK); +} + +static void *buffer_end_page(struct binder_buffer *buffer) +{ + return (void *)(((uintptr_t)(buffer + 1) - 1) & PAGE_MASK); +} + +static void binder_delete_free_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + struct binder_buffer *prev, *next = NULL; + int free_page_end = 1; + int free_page_start = 1; + + BUG_ON(alloc->buffers.next == &buffer->entry); + prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); + BUG_ON(!prev->free); + if (buffer_end_page(prev) == buffer_start_page(buffer)) { + free_page_start = 0; + if (buffer_end_page(prev) == buffer_end_page(buffer)) + free_page_end = 0; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK share page with %pK\n", + alloc->pid, buffer, prev); + } + + if (!list_is_last(&buffer->entry, &alloc->buffers)) { + next = list_entry(buffer->entry.next, + struct binder_buffer, entry); + if (buffer_start_page(next) == buffer_end_page(buffer)) { + free_page_end = 0; + if (buffer_start_page(next) == + buffer_start_page(buffer)) + free_page_start = 0; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK share page with %pK\n", + alloc->pid, buffer, prev); + } + } + list_del(&buffer->entry); + if (free_page_start || free_page_end) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK do not share page%s%s with %pK or %pK\n", + alloc->pid, buffer, free_page_start ? "" : " end", + free_page_end ? "" : " start", prev, next); + binder_update_page_range(alloc, 0, free_page_start ? + buffer_start_page(buffer) : buffer_end_page(buffer), + (free_page_end ? buffer_end_page(buffer) : + buffer_start_page(buffer)) + PAGE_SIZE, NULL); + } +} + +static void binder_free_buf_locked(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + size_t size, buffer_size; + + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + size = ALIGN(buffer->data_size, sizeof(void *)) + + ALIGN(buffer->offsets_size, sizeof(void *)) + + ALIGN(buffer->extra_buffers_size, sizeof(void *)); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_free_buf %pK size %zd buffer_size %zd\n", + alloc->pid, buffer, size, buffer_size); + + BUG_ON(buffer->free); + BUG_ON(size > buffer_size); + BUG_ON(buffer->transaction != NULL); + BUG_ON((void *)buffer < alloc->buffer); + BUG_ON((void *)buffer > alloc->buffer + alloc->buffer_size); + + if (buffer->async_transaction) { + alloc->free_async_space += size + sizeof(struct binder_buffer); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + "%d: binder_free_buf size %zd async free %zd\n", + alloc->pid, size, alloc->free_async_space); + } + + binder_update_page_range(alloc, 0, + (void *)PAGE_ALIGN((uintptr_t)buffer->data), + (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), + NULL); + + rb_erase(&buffer->rb_node, &alloc->allocated_buffers); + buffer->free = 1; + if (!list_is_last(&buffer->entry, &alloc->buffers)) { + struct binder_buffer *next = list_entry(buffer->entry.next, + struct binder_buffer, entry); + + if (next->free) { + rb_erase(&next->rb_node, &alloc->free_buffers); + binder_delete_free_buffer(alloc, next); + } + } + if (alloc->buffers.next != &buffer->entry) { + struct binder_buffer *prev = list_entry(buffer->entry.prev, + struct binder_buffer, entry); + + if (prev->free) { + binder_delete_free_buffer(alloc, buffer); + rb_erase(&prev->rb_node, &alloc->free_buffers); + buffer = prev; + } + } + binder_insert_free_buffer(alloc, buffer); +} + +/** + * binder_alloc_free_buf() - free a binder buffer + * @alloc: binder_alloc for this proc + * @buffer: kernel pointer to buffer + * + * Free the buffer allocated via binder_alloc_new_buffer() + */ +void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + mutex_lock(&alloc->mutex); + binder_free_buf_locked(alloc, buffer); + mutex_unlock(&alloc->mutex); +} + +/** + * binder_alloc_mmap_handler() - map virtual address space for proc + * @alloc: alloc structure for this proc + * @vma: vma passed to mmap() + * + * Called by binder_mmap() to initialize the space specified in + * vma for allocating binder buffers + * + * Return: + * 0 = success + * -EBUSY = address space already mapped + * -ENOMEM = failed to map memory to given address space + */ +int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma) +{ + int ret; + struct vm_struct *area; + const char *failure_string; + struct binder_buffer *buffer; + + mutex_lock(&binder_alloc_mmap_lock); + if (alloc->buffer) { + ret = -EBUSY; + failure_string = "already mapped"; + goto err_already_mapped; + } + + area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP); + if (area == NULL) { + ret = -ENOMEM; + failure_string = "get_vm_area"; + goto err_get_vm_area_failed; + } + alloc->buffer = area->addr; + alloc->user_buffer_offset = + vma->vm_start - (uintptr_t)alloc->buffer; + mutex_unlock(&binder_alloc_mmap_lock); + +#ifdef CONFIG_CPU_CACHE_VIPT + if (cache_is_vipt_aliasing()) { + while (CACHE_COLOUR( + (vma->vm_start ^ (uint32_t)alloc->buffer))) { + pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", + alloc->pid, vma->vm_start, vma->vm_end, + alloc->buffer); + vma->vm_start += PAGE_SIZE; + } + } +#endif + alloc->pages = kzalloc(sizeof(alloc->pages[0]) * + ((vma->vm_end - vma->vm_start) / PAGE_SIZE), + GFP_KERNEL); + if (alloc->pages == NULL) { + ret = -ENOMEM; + failure_string = "alloc page array"; + goto err_alloc_pages_failed; + } + alloc->buffer_size = vma->vm_end - vma->vm_start; + + if (binder_update_page_range(alloc, 1, alloc->buffer, + alloc->buffer + PAGE_SIZE, vma)) { + ret = -ENOMEM; + failure_string = "alloc small buf"; + goto err_alloc_small_buf_failed; + } + buffer = alloc->buffer; + INIT_LIST_HEAD(&alloc->buffers); + list_add(&buffer->entry, &alloc->buffers); + buffer->free = 1; + binder_insert_free_buffer(alloc, buffer); + alloc->free_async_space = alloc->buffer_size / 2; + barrier(); + alloc->vma = vma; + alloc->vma_vm_mm = vma->vm_mm; + + return 0; + +err_alloc_small_buf_failed: + kfree(alloc->pages); + alloc->pages = NULL; +err_alloc_pages_failed: + mutex_lock(&binder_alloc_mmap_lock); + vfree(alloc->buffer); + alloc->buffer = NULL; +err_get_vm_area_failed: +err_already_mapped: + mutex_unlock(&binder_alloc_mmap_lock); + pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, + alloc->pid, vma->vm_start, vma->vm_end, failure_string, ret); + return ret; +} + + +void binder_alloc_deferred_release(struct binder_alloc *alloc) +{ + struct rb_node *n; + int buffers, page_count; + + BUG_ON(alloc->vma); + + buffers = 0; + mutex_lock(&alloc->mutex); + while ((n = rb_first(&alloc->allocated_buffers))) { + struct binder_buffer *buffer; + + buffer = rb_entry(n, struct binder_buffer, rb_node); + + /* Transaction should already have been freed */ + BUG_ON(buffer->transaction); + + binder_free_buf_locked(alloc, buffer); + buffers++; + } + + page_count = 0; + if (alloc->pages) { + int i; + + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + void *page_addr; + + if (!alloc->pages[i]) + continue; + + page_addr = alloc->buffer + i * PAGE_SIZE; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%s: %d: page %d at %pK not freed\n", + __func__, alloc->pid, i, page_addr); + unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); + __free_page(alloc->pages[i]); + page_count++; + } + kfree(alloc->pages); + vfree(alloc->buffer); + } + mutex_unlock(&alloc->mutex); + + binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE, + "%s: %d buffers %d, pages %d\n", + __func__, alloc->pid, buffers, page_count); +} + +static void print_binder_buffer(struct seq_file *m, const char *prefix, + struct binder_buffer *buffer) +{ + seq_printf(m, "%s %d: %pK size %zd:%zd:%zd %s\n", + prefix, buffer->debug_id, buffer->data, + buffer->data_size, buffer->offsets_size, + buffer->extra_buffers_size, + buffer->transaction ? "active" : "delivered"); +} + +/** + * binder_alloc_print_allocated() - print buffer info + * @m: seq_file for output via seq_printf() + * @alloc: binder_alloc for this proc + * + * Prints information about every buffer associated with + * the binder_alloc state to the given seq_file + */ +void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc) +{ + struct rb_node *n; + + mutex_lock(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + print_binder_buffer(m, " buffer", + rb_entry(n, struct binder_buffer, rb_node)); + mutex_unlock(&alloc->mutex); +} + +/** + * binder_alloc_get_allocated_count() - return count of buffers + * @alloc: binder_alloc for this proc + * + * Return: count of allocated buffers + */ +int binder_alloc_get_allocated_count(struct binder_alloc *alloc) +{ + struct rb_node *n; + int count = 0; + + mutex_lock(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + count++; + mutex_unlock(&alloc->mutex); + return count; +} + + +/** + * binder_alloc_vma_close() - invalidate address space + * @alloc: binder_alloc for this proc + * + * Called from binder_vma_close() when releasing address space. + * Clears alloc->vma to prevent new incoming transactions from + * allocating more buffers. + */ +void binder_alloc_vma_close(struct binder_alloc *alloc) +{ + WRITE_ONCE(alloc->vma, NULL); + WRITE_ONCE(alloc->vma_vm_mm, NULL); +} + +/** + * binder_alloc_init() - called by binder_open() for per-proc initialization + * @alloc: binder_alloc for this proc + * + * Called from binder_open() to initialize binder_alloc fields for + * new binder proc + */ +void binder_alloc_init(struct binder_alloc *alloc) +{ + alloc->tsk = current->group_leader; + alloc->pid = current->group_leader->pid; + mutex_init(&alloc->mutex); +} + diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h new file mode 100644 index 000000000000..088e4ffc6230 --- /dev/null +++ b/drivers/android/binder_alloc.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_BINDER_ALLOC_H +#define _LINUX_BINDER_ALLOC_H + +#include <linux/rbtree.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/rtmutex.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> + +struct binder_transaction; + +/** + * struct binder_buffer - buffer used for binder transactions + * @entry: entry alloc->buffers + * @rb_node: node for allocated_buffers/free_buffers rb trees + * @free: true if buffer is free + * @allow_user_free: describe the second member of struct blah, + * @async_transaction: describe the second member of struct blah, + * @debug_id: describe the second member of struct blah, + * @transaction: describe the second member of struct blah, + * @target_node: describe the second member of struct blah, + * @data_size: describe the second member of struct blah, + * @offsets_size: describe the second member of struct blah, + * @extra_buffers_size: describe the second member of struct blah, + * @data:i describe the second member of struct blah, + * + * Bookkeeping structure for binder transaction buffers + */ +struct binder_buffer { + struct list_head entry; /* free and allocated entries by address */ + struct rb_node rb_node; /* free entry by size or allocated entry */ + /* by address */ + unsigned free:1; + unsigned allow_user_free:1; + unsigned async_transaction:1; + unsigned free_in_progress:1; + unsigned debug_id:28; + + struct binder_transaction *transaction; + + struct binder_node *target_node; + size_t data_size; + size_t offsets_size; + size_t extra_buffers_size; + uint8_t data[0]; +}; + +/** + * struct binder_alloc - per-binder proc state for binder allocator + * @vma: vm_area_struct passed to mmap_handler + * (invarient after mmap) + * @tsk: tid for task that called init for this proc + * (invariant after init) + * @vma_vm_mm: copy of vma->vm_mm (invarient after mmap) + * @buffer: base of per-proc address space mapped via mmap + * @user_buffer_offset: offset between user and kernel VAs for buffer + * @buffers: list of all buffers for this proc + * @free_buffers: rb tree of buffers available for allocation + * sorted by size + * @allocated_buffers: rb tree of allocated buffers sorted by address + * @free_async_space: VA space available for async buffers. This is + * initialized at mmap time to 1/2 the full VA space + * @pages: array of physical page addresses for each + * page of mmap'd space + * @buffer_size: size of address space specified via mmap + * @pid: pid for associated binder_proc (invariant after init) + * + * Bookkeeping structure for per-proc address space management for binder + * buffers. It is normally initialized during binder_init() and binder_mmap() + * calls. The address space is used for both user-visible buffers and for + * struct binder_buffer objects used to track the user buffers + */ +struct binder_alloc { + struct mutex mutex; + struct task_struct *tsk; + struct vm_area_struct *vma; + struct mm_struct *vma_vm_mm; + void *buffer; + ptrdiff_t user_buffer_offset; + struct list_head buffers; + struct rb_root free_buffers; + struct rb_root allocated_buffers; + size_t free_async_space; + struct page **pages; + size_t buffer_size; + uint32_t buffer_free; + int pid; +}; + +extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async); +extern void binder_alloc_init(struct binder_alloc *alloc); +extern void binder_alloc_vma_close(struct binder_alloc *alloc); +extern struct binder_buffer * +binder_alloc_prepare_to_free(struct binder_alloc *alloc, + uintptr_t user_ptr); +extern void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer); +extern int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma); +extern void binder_alloc_deferred_release(struct binder_alloc *alloc); +extern int binder_alloc_get_allocated_count(struct binder_alloc *alloc); +extern void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc); + +/** + * binder_alloc_get_free_async_space() - get free space available for async + * @alloc: binder_alloc for this proc + * + * Return: the bytes remaining in the address-space for async transactions + */ +static inline size_t +binder_alloc_get_free_async_space(struct binder_alloc *alloc) +{ + size_t free_async_space; + + mutex_lock(&alloc->mutex); + free_async_space = alloc->free_async_space; + mutex_unlock(&alloc->mutex); + return free_async_space; +} + +/** + * binder_alloc_get_user_buffer_offset() - get offset between kernel/user addrs + * @alloc: binder_alloc for this proc + * + * Return: the offset between kernel and user-space addresses to use for + * virtual address conversion + */ +static inline ptrdiff_t +binder_alloc_get_user_buffer_offset(struct binder_alloc *alloc) +{ + /* + * user_buffer_offset is constant if vma is set and + * undefined if vma is not set. It is possible to + * get here with !alloc->vma if the target process + * is dying while a transaction is being initiated. + * Returning the old value is ok in this case and + * the transaction will fail. + */ + return alloc->user_buffer_offset; +} + +#endif /* _LINUX_BINDER_ALLOC_H */ + diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index 7f20f3dc8369..7967db16ba5a 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h @@ -23,7 +23,8 @@ struct binder_buffer; struct binder_node; struct binder_proc; -struct binder_ref; +struct binder_alloc; +struct binder_ref_data; struct binder_thread; struct binder_transaction; @@ -146,8 +147,8 @@ TRACE_EVENT(binder_transaction_received, TRACE_EVENT(binder_transaction_node_to_ref, TP_PROTO(struct binder_transaction *t, struct binder_node *node, - struct binder_ref *ref), - TP_ARGS(t, node, ref), + struct binder_ref_data *rdata), + TP_ARGS(t, node, rdata), TP_STRUCT__entry( __field(int, debug_id) @@ -160,8 +161,8 @@ TRACE_EVENT(binder_transaction_node_to_ref, __entry->debug_id = t->debug_id; __entry->node_debug_id = node->debug_id; __entry->node_ptr = node->ptr; - __entry->ref_debug_id = ref->debug_id; - __entry->ref_desc = ref->desc; + __entry->ref_debug_id = rdata->debug_id; + __entry->ref_desc = rdata->desc; ), TP_printk("transaction=%d node=%d src_ptr=0x%016llx ==> dest_ref=%d dest_desc=%d", __entry->debug_id, __entry->node_debug_id, @@ -170,8 +171,9 @@ TRACE_EVENT(binder_transaction_node_to_ref, ); TRACE_EVENT(binder_transaction_ref_to_node, - TP_PROTO(struct binder_transaction *t, struct binder_ref *ref), - TP_ARGS(t, ref), + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *rdata), + TP_ARGS(t, node, rdata), TP_STRUCT__entry( __field(int, debug_id) @@ -182,10 +184,10 @@ TRACE_EVENT(binder_transaction_ref_to_node, ), TP_fast_assign( __entry->debug_id = t->debug_id; - __entry->ref_debug_id = ref->debug_id; - __entry->ref_desc = ref->desc; - __entry->node_debug_id = ref->node->debug_id; - __entry->node_ptr = ref->node->ptr; + __entry->ref_debug_id = rdata->debug_id; + __entry->ref_desc = rdata->desc; + __entry->node_debug_id = node->debug_id; + __entry->node_ptr = node->ptr; ), TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%016llx", __entry->debug_id, __entry->node_debug_id, @@ -194,9 +196,10 @@ TRACE_EVENT(binder_transaction_ref_to_node, ); TRACE_EVENT(binder_transaction_ref_to_ref, - TP_PROTO(struct binder_transaction *t, struct binder_ref *src_ref, - struct binder_ref *dest_ref), - TP_ARGS(t, src_ref, dest_ref), + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *src_ref, + struct binder_ref_data *dest_ref), + TP_ARGS(t, node, src_ref, dest_ref), TP_STRUCT__entry( __field(int, debug_id) @@ -208,7 +211,7 @@ TRACE_EVENT(binder_transaction_ref_to_ref, ), TP_fast_assign( __entry->debug_id = t->debug_id; - __entry->node_debug_id = src_ref->node->debug_id; + __entry->node_debug_id = node->debug_id; __entry->src_ref_debug_id = src_ref->debug_id; __entry->src_ref_desc = src_ref->desc; __entry->dest_ref_debug_id = dest_ref->debug_id; @@ -268,9 +271,9 @@ DEFINE_EVENT(binder_buffer_class, binder_transaction_failed_buffer_release, TP_ARGS(buffer)); TRACE_EVENT(binder_update_page_range, - TP_PROTO(struct binder_proc *proc, bool allocate, + TP_PROTO(struct binder_alloc *alloc, bool allocate, void *start, void *end), - TP_ARGS(proc, allocate, start, end), + TP_ARGS(alloc, allocate, start, end), TP_STRUCT__entry( __field(int, proc) __field(bool, allocate) @@ -278,9 +281,9 @@ TRACE_EVENT(binder_update_page_range, __field(size_t, size) ), TP_fast_assign( - __entry->proc = proc->pid; + __entry->proc = alloc->pid; __entry->allocate = allocate; - __entry->offset = start - proc->buffer; + __entry->offset = start - alloc->buffer; __entry->size = end - start; ), TP_printk("proc=%d allocate=%d offset=%zu size=%zu", diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e417e1a1d02c..5b2aee83d776 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2832,10 +2832,12 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) static struct ata_device *ata_find_dev(struct ata_port *ap, int devno) { if (!sata_pmp_attached(ap)) { - if (likely(devno < ata_link_max_devices(&ap->link))) + if (likely(devno >= 0 && + devno < ata_link_max_devices(&ap->link))) return &ap->link.device[devno]; } else { - if (likely(devno < ap->nr_pmp_links)) + if (likely(devno >= 0 && + devno < ap->nr_pmp_links)) return &ap->pmp_link[devno].device[0]; } diff --git a/drivers/base/core.c b/drivers/base/core.c index f18856f5954b..afe045792796 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2094,7 +2094,11 @@ void device_shutdown(void) pm_runtime_get_noresume(dev); pm_runtime_barrier(dev); - if (dev->bus && dev->bus->shutdown) { + if (dev->class && dev->class->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->class->shutdown(dev); + } else if (dev->bus && dev->bus->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->bus->shutdown(dev); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 331ecf0b5fbf..894e815be4fa 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -965,7 +965,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, } dev->power.subsys_data->domain_data = &gpd_data->base; - dev->pm_domain = &genpd->domain; spin_unlock_irq(&dev->power.lock); @@ -984,7 +983,6 @@ static void genpd_free_dev_data(struct device *dev, { spin_lock_irq(&dev->power.lock); - dev->pm_domain = NULL; dev->power.subsys_data->domain_data = NULL; spin_unlock_irq(&dev->power.lock); @@ -1025,6 +1023,8 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (ret) goto out; + dev->pm_domain = &genpd->domain; + genpd->device_count++; genpd->max_off_time_changed = true; @@ -1076,6 +1076,8 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, if (genpd->detach_dev) genpd->detach_dev(genpd, dev); + dev->pm_domain = NULL; + list_del_init(&pdd->list_node); mutex_unlock(&genpd->lock); @@ -1150,7 +1152,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *l, *link; int ret = -EINVAL; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) @@ -1166,7 +1168,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->master_links, master_node) { + list_for_each_entry_safe(link, l, &genpd->master_links, master_node) { if (link->slave != subdomain) continue; @@ -1417,10 +1419,10 @@ EXPORT_SYMBOL_GPL(__of_genpd_add_provider); */ void of_genpd_del_provider(struct device_node *np) { - struct of_genpd_provider *cp; + struct of_genpd_provider *cp, *tmp; mutex_lock(&of_genpd_mutex); - list_for_each_entry(cp, &of_genpd_providers, link) { + list_for_each_entry_safe(cp, tmp, &of_genpd_providers, link) { if (cp->node == np) { list_del(&cp->link); of_node_put(cp->node); diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index a7b46798c81d..39efa7e6c0c0 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -268,6 +268,8 @@ static ssize_t pm_qos_latency_tolerance_store(struct device *dev, value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; else if (!strcmp(buf, "any") || !strcmp(buf, "any\n")) value = PM_QOS_LATENCY_ANY; + else + return -EINVAL; } ret = dev_pm_qos_update_user_latency_tolerance(dev, value); return ret < 0 ? ret : n; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 0e494108c20c..7af116e12e53 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -61,6 +61,8 @@ static LIST_HEAD(wakeup_sources); static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); +DEFINE_STATIC_SRCU(wakeup_srcu); + static struct wakeup_source deleted_ws = { .name = "deleted", .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), @@ -199,7 +201,7 @@ void wakeup_source_remove(struct wakeup_source *ws) spin_lock_irqsave(&events_lock, flags); list_del_rcu(&ws->entry); spin_unlock_irqrestore(&events_lock, flags); - synchronize_rcu(); + synchronize_srcu(&wakeup_srcu); } EXPORT_SYMBOL_GPL(wakeup_source_remove); @@ -331,13 +333,14 @@ void device_wakeup_detach_irq(struct device *dev) void device_wakeup_arm_wake_irqs(void) { struct wakeup_source *ws; + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->wakeirq) dev_pm_arm_wake_irq(ws->wakeirq); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } /** @@ -348,13 +351,14 @@ void device_wakeup_arm_wake_irqs(void) void device_wakeup_disarm_wake_irqs(void) { struct wakeup_source *ws; + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->wakeirq) dev_pm_disarm_wake_irq(ws->wakeirq); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } /** @@ -839,10 +843,10 @@ EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources); void pm_print_active_wakeup_sources(void) { struct wakeup_source *ws; - int active = 0; + int srcuidx, active = 0; struct wakeup_source *last_activity_ws = NULL; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { pr_info("active wakeup source: %s\n", ws->name); @@ -858,7 +862,7 @@ void pm_print_active_wakeup_sources(void) if (!active && last_activity_ws) pr_info("last active wakeup source: %s\n", last_activity_ws->name); - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources); @@ -985,8 +989,9 @@ void pm_wakep_autosleep_enabled(bool set) { struct wakeup_source *ws; ktime_t now = ktime_get(); + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { spin_lock_irq(&ws->lock); if (ws->autosleep_enabled != set) { @@ -1000,7 +1005,7 @@ void pm_wakep_autosleep_enabled(bool set) } spin_unlock_irq(&ws->lock); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } #endif /* CONFIG_PM_AUTOSLEEP */ @@ -1061,15 +1066,16 @@ static int print_wakeup_source_stats(struct seq_file *m, static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; + int srcuidx; seq_puts(m, "name\t\t\t\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" "last_change\tprevent_suspend_time\n"); - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) print_wakeup_source_stats(m, ws); - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); print_wakeup_source_stats(m, &deleted_ws); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ca35495a5be..1e5cd39d0cc2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -641,11 +641,12 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_put_disk; - q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); + q = blk_mq_init_queue(&vblk->tag_set); if (IS_ERR(q)) { err = -ENOMEM; goto out_free_tags; } + vblk->disk->queue = q; q->queuedata = vblk; diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 41fb1a917b17..33e23a7a691f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -595,8 +595,6 @@ int xen_blkif_schedule(void *arg) unsigned long timeout; int ret; - xen_blkif_get(blkif); - while (!kthread_should_stop()) { if (try_to_freeze()) continue; @@ -650,7 +648,6 @@ purge_gnt_list: print_stats(blkif); blkif->xenblkd = NULL; - xen_blkif_put(blkif); return 0; } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f53cff42f8da..923308201375 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -221,7 +221,6 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); wake_up(&blkif->shutdown_wq); - blkif->xenblkd = NULL; } /* The above kthread_stop() guarantees that at this point we @@ -266,9 +265,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif) { - - xen_blkif_disconnect(blkif); + WARN_ON(xen_blkif_disconnect(blkif)); xen_vbd_free(&blkif->vbd); + kfree(blkif->be->mode); + kfree(blkif->be); /* Make sure everything is drained before shutting down */ BUG_ON(blkif->persistent_gnt_c != 0); @@ -445,8 +445,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) xen_blkif_put(be->blkif); } - kfree(be->mode); - kfree(be); return 0; } diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a084a4751fa9..25372dc381d4 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3877,6 +3877,9 @@ static void smi_recv_tasklet(unsigned long val) * because the lower layer is allowed to hold locks while calling * message delivery. */ + + rcu_read_lock(); + if (!run_to_completion) spin_lock_irqsave(&intf->xmit_msgs_lock, flags); if (intf->curr_msg == NULL && !intf->in_shutdown) { @@ -3899,6 +3902,8 @@ static void smi_recv_tasklet(unsigned long val) if (newmsg) intf->handlers->sender(intf->send_info, newmsg); + rcu_read_unlock(); + handle_new_recv_msgs(intf); } diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 0d83cfb9708f..f53e8ba2c718 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -758,6 +758,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, result, len, data[2]); } else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || data[1] != IPMI_GET_MSG_FLAGS_CMD) { + /* + * Don't abort here, maybe it was a queued + * response to a previous command. + */ + ipmi_ssif_unlock_cond(ssif_info, flags); pr_warn(PFX "Invalid response getting flags: %x %x\n", data[0], data[1]); } else { diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 096f0cef4da1..40d400fe5bb7 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -1162,10 +1162,11 @@ static int wdog_reboot_handler(struct notifier_block *this, ipmi_watchdog_state = WDOG_TIMEOUT_NONE; ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { - /* Set a long timer to let the reboot happens, but - reboot if it hangs, but only if the watchdog + /* Set a long timer to let the reboot happen or + reset if it hangs, but only if the watchdog timer was already running. */ - timeout = 120; + if (timeout < 120) + timeout = 120; pretimeout = 0; ipmi_watchdog_state = WDOG_TIMEOUT_RESET; ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 252142524ff2..a0d9ac6b6cc9 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -29,33 +29,92 @@ #include "tpm.h" #include "tpm_eventlog.h" -static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); -static LIST_HEAD(tpm_chip_list); -static DEFINE_SPINLOCK(driver_lock); +DEFINE_IDR(dev_nums_idr); +static DEFINE_MUTEX(idr_lock); struct class *tpm_class; dev_t tpm_devt; -/* - * tpm_chip_find_get - return tpm_chip for a given chip number - * @chip_num the device number for the chip +/** + * tpm_try_get_ops() - Get a ref to the tpm_chip + * @chip: Chip to ref + * + * The caller must already have some kind of locking to ensure that chip is + * valid. This function will lock the chip so that the ops member can be + * accessed safely. The locking prevents tpm_chip_unregister from + * completing, so it should not be held for long periods. + * + * Returns -ERRNO if the chip could not be got. */ -struct tpm_chip *tpm_chip_find_get(int chip_num) +int tpm_try_get_ops(struct tpm_chip *chip) { - struct tpm_chip *pos, *chip = NULL; + int rc = -EIO; - rcu_read_lock(); - list_for_each_entry_rcu(pos, &tpm_chip_list, list) { - if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) - continue; + get_device(&chip->dev); - if (try_module_get(pos->pdev->driver->owner)) { - chip = pos; - break; - } + down_read(&chip->ops_sem); + if (!chip->ops) + goto out_lock; + + if (!try_module_get(chip->dev.parent->driver->owner)) + goto out_lock; + + return 0; +out_lock: + up_read(&chip->ops_sem); + put_device(&chip->dev); + return rc; +} +EXPORT_SYMBOL_GPL(tpm_try_get_ops); + +/** + * tpm_put_ops() - Release a ref to the tpm_chip + * @chip: Chip to put + * + * This is the opposite pair to tpm_try_get_ops(). After this returns chip may + * be kfree'd. + */ +void tpm_put_ops(struct tpm_chip *chip) +{ + module_put(chip->dev.parent->driver->owner); + up_read(&chip->ops_sem); + put_device(&chip->dev); +} +EXPORT_SYMBOL_GPL(tpm_put_ops); + +/** + * tpm_chip_find_get() - return tpm_chip for a given chip number + * @chip_num: id to find + * + * The return'd chip has been tpm_try_get_ops'd and must be released via + * tpm_put_ops + */ +struct tpm_chip *tpm_chip_find_get(int chip_num) +{ + struct tpm_chip *chip, *res = NULL; + int chip_prev; + + mutex_lock(&idr_lock); + + if (chip_num == TPM_ANY_NUM) { + chip_num = 0; + do { + chip_prev = chip_num; + chip = idr_get_next(&dev_nums_idr, &chip_num); + if (chip && !tpm_try_get_ops(chip)) { + res = chip; + break; + } + } while (chip_prev != chip_num); + } else { + chip = idr_find_slowpath(&dev_nums_idr, chip_num); + if (chip && !tpm_try_get_ops(chip)) + res = chip; } - rcu_read_unlock(); - return chip; + + mutex_unlock(&idr_lock); + + return res; } /** @@ -68,12 +127,48 @@ static void tpm_dev_release(struct device *dev) { struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev); - spin_lock(&driver_lock); - clear_bit(chip->dev_num, dev_mask); - spin_unlock(&driver_lock); + mutex_lock(&idr_lock); + idr_remove(&dev_nums_idr, chip->dev_num); + mutex_unlock(&idr_lock); + kfree(chip); } + +/** + * tpm_class_shutdown() - prepare the TPM device for loss of power. + * @dev: device to which the chip is associated. + * + * Issues a TPM2_Shutdown command prior to loss of power, as required by the + * TPM 2.0 spec. + * Then, calls bus- and device- specific shutdown code. + * + * XXX: This codepath relies on the fact that sysfs is not enabled for + * TPM2: sysfs uses an implicit lock on chip->ops, so this could race if TPM2 + * has sysfs support enabled before TPM sysfs's implicit locking is fixed. + */ +static int tpm_class_shutdown(struct device *dev) +{ + struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev); + + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + down_write(&chip->ops_sem); + tpm2_shutdown(chip, TPM2_SU_CLEAR); + chip->ops = NULL; + up_write(&chip->ops_sem); + } + /* Allow bus- and device-specific code to run. Note: since chip->ops + * is NULL, more-specific shutdown code will not be able to issue TPM + * commands. + */ + if (dev->bus && dev->bus->shutdown) + dev->bus->shutdown(dev); + else if (dev->driver && dev->driver->shutdown) + dev->driver->shutdown(dev); + return 0; +} + + /** * tpmm_chip_alloc() - allocate a new struct tpm_chip instance * @dev: device to which the chip is associated @@ -88,37 +183,35 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, const struct tpm_class_ops *ops) { struct tpm_chip *chip; + int rc; chip = kzalloc(sizeof(*chip), GFP_KERNEL); if (chip == NULL) return ERR_PTR(-ENOMEM); mutex_init(&chip->tpm_mutex); - INIT_LIST_HEAD(&chip->list); + init_rwsem(&chip->ops_sem); chip->ops = ops; - spin_lock(&driver_lock); - chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES); - spin_unlock(&driver_lock); - - if (chip->dev_num >= TPM_NUM_DEVICES) { + mutex_lock(&idr_lock); + rc = idr_alloc(&dev_nums_idr, NULL, 0, TPM_NUM_DEVICES, GFP_KERNEL); + mutex_unlock(&idr_lock); + if (rc < 0) { dev_err(dev, "No available tpm device numbers\n"); kfree(chip); - return ERR_PTR(-ENOMEM); + return ERR_PTR(rc); } - - set_bit(chip->dev_num, dev_mask); + chip->dev_num = rc; scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num); - chip->pdev = dev; - dev_set_drvdata(dev, chip); chip->dev.class = tpm_class; + chip->dev.class->shutdown = tpm_class_shutdown; chip->dev.release = tpm_dev_release; - chip->dev.parent = chip->pdev; + chip->dev.parent = dev; #ifdef CONFIG_ACPI chip->dev.groups = chip->groups; #endif @@ -133,7 +226,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, device_initialize(&chip->dev); cdev_init(&chip->cdev, &tpm_fops); - chip->cdev.owner = chip->pdev->driver->owner; + chip->cdev.owner = dev->driver->owner; chip->cdev.kobj.parent = &chip->dev.kobj; devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); @@ -167,6 +260,11 @@ static int tpm_add_char_device(struct tpm_chip *chip) return rc; } + /* Make the chip available. */ + mutex_lock(&idr_lock); + idr_replace(&dev_nums_idr, chip, chip->dev_num); + mutex_unlock(&idr_lock); + return rc; } @@ -174,6 +272,16 @@ static void tpm_del_char_device(struct tpm_chip *chip) { cdev_del(&chip->cdev); device_del(&chip->dev); + + /* Make the chip unavailable. */ + mutex_lock(&idr_lock); + idr_replace(&dev_nums_idr, NULL, chip->dev_num); + mutex_unlock(&idr_lock); + + /* Make the driver uncallable. */ + down_write(&chip->ops_sem); + chip->ops = NULL; + up_write(&chip->ops_sem); } static int tpm1_chip_register(struct tpm_chip *chip) @@ -228,17 +336,11 @@ int tpm_chip_register(struct tpm_chip *chip) if (rc) goto out_err; - /* Make the chip available. */ - spin_lock(&driver_lock); - list_add_tail_rcu(&chip->list, &tpm_chip_list); - spin_unlock(&driver_lock); - chip->flags |= TPM_CHIP_FLAG_REGISTERED; if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { - rc = __compat_only_sysfs_link_entry_to_kobj(&chip->pdev->kobj, - &chip->dev.kobj, - "ppi"); + rc = __compat_only_sysfs_link_entry_to_kobj( + &chip->dev.parent->kobj, &chip->dev.kobj, "ppi"); if (rc && rc != -ENOENT) { tpm_chip_unregister(chip); return rc; @@ -259,6 +361,9 @@ EXPORT_SYMBOL_GPL(tpm_chip_register); * Takes the chip first away from the list of available TPM chips and then * cleans up all the resources reserved by tpm_chip_register(). * + * Once this function returns the driver call backs in 'op's will not be + * running and will no longer start. + * * NOTE: This function should be only called before deinitializing chip * resources. */ @@ -267,13 +372,8 @@ void tpm_chip_unregister(struct tpm_chip *chip) if (!(chip->flags & TPM_CHIP_FLAG_REGISTERED)) return; - spin_lock(&driver_lock); - list_del_rcu(&chip->list); - spin_unlock(&driver_lock); - synchronize_rcu(); - if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) - sysfs_remove_link(&chip->pdev->kobj, "ppi"); + sysfs_remove_link(&chip->dev.parent->kobj, "ppi"); tpm1_chip_unregister(chip); tpm_del_char_device(chip); diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index 4f3137d9a35e..912ad30be585 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -61,7 +61,7 @@ static int tpm_open(struct inode *inode, struct file *file) * by the check of is_open variable, which is protected * by driver_lock. */ if (test_and_set_bit(0, &chip->is_open)) { - dev_dbg(chip->pdev, "Another process owns this TPM\n"); + dev_dbg(&chip->dev, "Another process owns this TPM\n"); return -EBUSY; } @@ -79,7 +79,6 @@ static int tpm_open(struct inode *inode, struct file *file) INIT_WORK(&priv->work, timeout_work); file->private_data = priv; - get_device(chip->pdev); return 0; } @@ -137,9 +136,18 @@ static ssize_t tpm_write(struct file *file, const char __user *buf, return -EFAULT; } - /* atomic tpm command send and result receive */ + /* atomic tpm command send and result receive. We only hold the ops + * lock during this period so that the tpm can be unregistered even if + * the char dev is held open. + */ + if (tpm_try_get_ops(priv->chip)) { + mutex_unlock(&priv->buffer_mutex); + return -EPIPE; + } out_size = tpm_transmit(priv->chip, priv->data_buffer, sizeof(priv->data_buffer), 0); + + tpm_put_ops(priv->chip); if (out_size < 0) { mutex_unlock(&priv->buffer_mutex); return out_size; @@ -166,7 +174,6 @@ static int tpm_release(struct inode *inode, struct file *file) file->private_data = NULL; atomic_set(&priv->data_pending, 0); clear_bit(0, &priv->chip->is_open); - put_device(priv->chip->pdev); kfree(priv); return 0; } diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 17abe52e6365..aaa5fa95dede 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -343,7 +343,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, if (count == 0) return -ENODATA; if (count > bufsiz) { - dev_err(chip->pdev, + dev_err(&chip->dev, "invalid count value %x %zx\n", count, bufsiz); return -E2BIG; } @@ -353,7 +353,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, rc = chip->ops->send(chip, (u8 *) buf, count); if (rc < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "tpm_transmit: tpm_send: error %zd\n", rc); goto out; } @@ -372,7 +372,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, goto out_recv; if (chip->ops->req_canceled(chip, status)) { - dev_err(chip->pdev, "Operation Canceled\n"); + dev_err(&chip->dev, "Operation Canceled\n"); rc = -ECANCELED; goto out; } @@ -382,14 +382,14 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, } while (time_before(jiffies, stop)); chip->ops->cancel(chip); - dev_err(chip->pdev, "Operation Timed out\n"); + dev_err(&chip->dev, "Operation Timed out\n"); rc = -ETIME; goto out; out_recv: rc = chip->ops->recv(chip, (u8 *) buf, bufsiz); if (rc < 0) - dev_err(chip->pdev, + dev_err(&chip->dev, "tpm_transmit: tpm_recv: error %zd\n", rc); out: if (!(flags & TPM_TRANSMIT_UNLOCKED)) @@ -416,7 +416,7 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd, err = be32_to_cpu(header->return_code); if (err != 0 && desc) - dev_err(chip->pdev, "A TPM error (%d) occurred %s\n", err, + dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err, desc); return err; @@ -514,7 +514,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) if (rc == TPM_ERR_INVALID_POSTINIT) { /* The TPM is not started, we are the first to talk to it. Execute a startup command. */ - dev_info(chip->pdev, "Issuing TPM_STARTUP"); + dev_info(&chip->dev, "Issuing TPM_STARTUP"); if (tpm_startup(chip, TPM_ST_CLEAR)) return rc; @@ -526,7 +526,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) 0, NULL); } if (rc) { - dev_err(chip->pdev, + dev_err(&chip->dev, "A TPM error (%zd) occurred attempting to determine the timeouts\n", rc); goto duration; @@ -565,7 +565,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) /* Report adjusted timeouts */ if (chip->vendor.timeout_adjusted) { - dev_info(chip->pdev, + dev_info(&chip->dev, HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n", old_timeout[0], new_timeout[0], old_timeout[1], new_timeout[1], @@ -612,7 +612,7 @@ duration: chip->vendor.duration[TPM_MEDIUM] *= 1000; chip->vendor.duration[TPM_LONG] *= 1000; chip->vendor.duration_adjusted = true; - dev_info(chip->pdev, "Adjusting TPM timeout parameters."); + dev_info(&chip->dev, "Adjusting TPM timeout parameters."); } return 0; } @@ -687,7 +687,7 @@ int tpm_is_tpm2(u32 chip_num) rc = (chip->flags & TPM_CHIP_FLAG_TPM2) != 0; - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } @@ -716,7 +716,7 @@ int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) rc = tpm2_pcr_read(chip, pcr_idx, res_buf); else rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_pcr_read); @@ -751,7 +751,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) if (chip->flags & TPM_CHIP_FLAG_TPM2) { rc = tpm2_pcr_extend(chip, pcr_idx, hash); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } @@ -761,7 +761,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0, "attempting extend a PCR value"); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_pcr_extend); @@ -802,7 +802,9 @@ int tpm_do_selftest(struct tpm_chip *chip) * around 300ms while the self test is ongoing, keep trying * until the self test duration expires. */ if (rc == -ETIME) { - dev_info(chip->pdev, HW_ERR "TPM command timed out during continue self test"); + dev_info( + &chip->dev, HW_ERR + "TPM command timed out during continue self test"); msleep(delay_msec); continue; } @@ -812,7 +814,7 @@ int tpm_do_selftest(struct tpm_chip *chip) rc = be32_to_cpu(cmd.header.out.return_code); if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) { - dev_info(chip->pdev, + dev_info(&chip->dev, "TPM is disabled/deactivated (0x%X)\n", rc); /* TPM is disabled and/or deactivated; driver can * proceed and TPM does handle commands for @@ -840,7 +842,7 @@ int tpm_send(u32 chip_num, void *cmd, size_t buflen) rc = tpm_transmit_cmd(chip, cmd, buflen, 0, "attempting tpm_cmd"); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_send); @@ -966,10 +968,10 @@ int tpm_pm_suspend(struct device *dev) } if (rc) - dev_err(chip->pdev, + dev_err(&chip->dev, "Error (%d) sending savestate before suspend\n", rc); else if (try > 0) - dev_warn(chip->pdev, "TPM savestate took %dms\n", + dev_warn(&chip->dev, "TPM savestate took %dms\n", try * TPM_TIMEOUT_RETRY); return rc; @@ -1023,7 +1025,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) if (chip->flags & TPM_CHIP_FLAG_TPM2) { err = tpm2_get_random(chip, out, max); - tpm_chip_put(chip); + tpm_put_ops(chip); return err; } @@ -1045,7 +1047,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) num_bytes -= recd; } while (retries-- && total < max); - tpm_chip_put(chip); + tpm_put_ops(chip); return total ? total : -EIO; } EXPORT_SYMBOL_GPL(tpm_get_random); @@ -1071,7 +1073,7 @@ int tpm_seal_trusted(u32 chip_num, struct trusted_key_payload *payload, rc = tpm2_seal_trusted(chip, payload, options); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_seal_trusted); @@ -1097,7 +1099,8 @@ int tpm_unseal_trusted(u32 chip_num, struct trusted_key_payload *payload, rc = tpm2_unseal_trusted(chip, payload, options); - tpm_chip_put(chip); + tpm_put_ops(chip); + return rc; } EXPORT_SYMBOL_GPL(tpm_unseal_trusted); @@ -1124,6 +1127,7 @@ static int __init tpm_init(void) static void __exit tpm_exit(void) { + idr_destroy(&dev_nums_idr); class_destroy(tpm_class); unregister_chrdev_region(tpm_devt, TPM_NUM_DEVICES); } diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index f880856aa75e..06ac6e9657d2 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -38,6 +38,8 @@ static ssize_t pubek_show(struct device *dev, struct device_attribute *attr, struct tpm_chip *chip = dev_get_drvdata(dev); + memset(&tpm_cmd, 0, sizeof(tpm_cmd)); + tpm_cmd.header.in = tpm_readpubek_header; err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE, 0, "attempting to read the PUBEK"); @@ -284,16 +286,28 @@ static const struct attribute_group tpm_dev_group = { int tpm_sysfs_add_device(struct tpm_chip *chip) { int err; - err = sysfs_create_group(&chip->pdev->kobj, + + /* XXX: If you wish to remove this restriction, you must first update + * tpm_sysfs to explicitly lock chip->ops. + */ + if (chip->flags & TPM_CHIP_FLAG_TPM2) + return 0; + + err = sysfs_create_group(&chip->dev.parent->kobj, &tpm_dev_group); if (err) - dev_err(chip->pdev, + dev_err(&chip->dev, "failed to create sysfs attributes, %d\n", err); return err; } void tpm_sysfs_del_device(struct tpm_chip *chip) { - sysfs_remove_group(&chip->pdev->kobj, &tpm_dev_group); + /* The sysfs routines rely on an implicit tpm_try_get_ops, this + * function is called before ops is null'd and the sysfs core + * synchronizes this removal so that no callbacks are running or can + * run again + */ + sysfs_remove_group(&chip->dev.parent->kobj, &tpm_dev_group); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 2216861f89f1..772d99b3a8e4 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -34,7 +34,7 @@ enum tpm_const { TPM_MINOR = 224, /* officially assigned */ TPM_BUFSIZE = 4096, - TPM_NUM_DEVICES = 256, + TPM_NUM_DEVICES = 65536, TPM_RETRY = 50, /* 5 seconds */ }; @@ -171,11 +171,16 @@ enum tpm_chip_flags { }; struct tpm_chip { - struct device *pdev; /* Device stuff */ struct device dev; struct cdev cdev; + /* A driver callback under ops cannot be run unless ops_sem is held + * (sometimes implicitly, eg for the sysfs code). ops becomes null + * when the driver is unregistered, see tpm_try_get_ops. + */ + struct rw_semaphore ops_sem; const struct tpm_class_ops *ops; + unsigned int flags; int dev_num; /* /dev/tpm# */ @@ -195,17 +200,10 @@ struct tpm_chip { acpi_handle acpi_dev_handle; char ppi_version[TPM_PPI_VERSION_LEN + 1]; #endif /* CONFIG_ACPI */ - - struct list_head list; }; #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) -static inline void tpm_chip_put(struct tpm_chip *chip) -{ - module_put(chip->pdev->driver->owner); -} - static inline int tpm_read_index(int base, int index) { outb(index, base); @@ -497,6 +495,7 @@ static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value) extern struct class *tpm_class; extern dev_t tpm_devt; extern const struct file_operations tpm_fops; +extern struct idr dev_nums_idr; enum tpm_transmit_flags { TPM_TRANSMIT_UNLOCKED = BIT(0), @@ -517,6 +516,9 @@ extern int wait_for_tpm_stat(struct tpm_chip *, u8, unsigned long, wait_queue_head_t *, bool); struct tpm_chip *tpm_chip_find_get(int chip_num); +__must_check int tpm_try_get_ops(struct tpm_chip *chip); +void tpm_put_ops(struct tpm_chip *chip); + extern struct tpm_chip *tpmm_chip_alloc(struct device *dev, const struct tpm_class_ops *ops); extern int tpm_chip_register(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index cb7e4f6b70ba..286bd090a488 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -570,7 +570,7 @@ static void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle, rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_FLUSH_CONTEXT); if (rc) { - dev_warn(chip->pdev, "0x%08x was not flushed, out of memory\n", + dev_warn(&chip->dev, "0x%08x was not flushed, out of memory\n", handle); return; } @@ -580,7 +580,7 @@ static void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle, rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, "flushing context"); if (rc) - dev_warn(chip->pdev, "0x%08x was not flushed, rc=%d\n", handle, + dev_warn(&chip->dev, "0x%08x was not flushed, rc=%d\n", handle, rc); tpm_buf_destroy(&buf); @@ -753,7 +753,7 @@ void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type) * except print the error code on a system failure. */ if (rc < 0) - dev_warn(chip->pdev, "transmit returned %d while stopping the TPM", + dev_warn(&chip->dev, "transmit returned %d while stopping the TPM", rc); } EXPORT_SYMBOL_GPL(tpm2_shutdown); @@ -820,7 +820,7 @@ static int tpm2_start_selftest(struct tpm_chip *chip, bool full) * immediately. This is a workaround for that. */ if (rc == TPM2_RC_TESTING) { - dev_warn(chip->pdev, "Got RC_TESTING, ignoring\n"); + dev_warn(&chip->dev, "Got RC_TESTING, ignoring\n"); rc = 0; } diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index dfadad0916a1..a48a878f791d 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -49,7 +49,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) for (i = 0; i < 6; i++) { status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->pdev, "error reading header\n"); + dev_err(&chip->dev, "error reading header\n"); return -EIO; } *buf++ = ioread8(chip->vendor.iobase); @@ -60,12 +60,12 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) size = be32_to_cpu(*native_size); if (count < size) { - dev_err(chip->pdev, + dev_err(&chip->dev, "Recv size(%d) less than available space\n", size); for (; i < size; i++) { /* clear the waiting data anyway */ status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->pdev, "error reading data\n"); + dev_err(&chip->dev, "error reading data\n"); return -EIO; } } @@ -76,7 +76,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) for (; i < size; i++) { status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->pdev, "error reading data\n"); + dev_err(&chip->dev, "error reading data\n"); return -EIO; } *buf++ = ioread8(chip->vendor.iobase); @@ -86,7 +86,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) status = ioread8(chip->vendor.iobase + 1); if (status & ATML_STATUS_DATA_AVAIL) { - dev_err(chip->pdev, "data available is stuck\n"); + dev_err(&chip->dev, "data available is stuck\n"); return -EIO; } @@ -97,9 +97,9 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count) { int i; - dev_dbg(chip->pdev, "tpm_atml_send:\n"); + dev_dbg(&chip->dev, "tpm_atml_send:\n"); for (i = 0; i < count; i++) { - dev_dbg(chip->pdev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); + dev_dbg(&chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); iowrite8(buf[i], chip->vendor.iobase); } diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 8dfb88b9739c..dd8f0eb3170a 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -52,7 +52,7 @@ struct priv_data { static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); s32 status; priv->len = 0; @@ -62,7 +62,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) status = i2c_master_send(client, buf, len); - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, (int)min_t(size_t, 64, len), buf, len, status); return status; @@ -71,7 +71,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); struct tpm_output_header *hdr = (struct tpm_output_header *)priv->buffer; u32 expected_len; @@ -88,7 +88,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) return -ENOMEM; if (priv->len >= expected_len) { - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__, (int)min_t(size_t, 64, expected_len), buf, count, expected_len); @@ -97,7 +97,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) } rc = i2c_master_recv(client, buf, expected_len); - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__, (int)min_t(size_t, 64, expected_len), buf, count, expected_len); @@ -106,13 +106,13 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) static void i2c_atmel_cancel(struct tpm_chip *chip) { - dev_err(chip->pdev, "TPM operation cancellation was requested, but is not supported"); + dev_err(&chip->dev, "TPM operation cancellation was requested, but is not supported"); } static u8 i2c_atmel_read_status(struct tpm_chip *chip) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); int rc; /* The TPM fails the I2C read until it is ready, so we do the entire @@ -125,7 +125,7 @@ static u8 i2c_atmel_read_status(struct tpm_chip *chip) /* Once the TPM has completed the command the command remains readable * until another command is issued. */ rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer)); - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s: sts=%d", __func__, rc); if (rc <= 0) return 0; diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index 63d5d22e9e60..f2aa99e34b4b 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -446,7 +446,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) /* read first 10 bytes, including tag, paramsize, and result */ size = recv_data(chip, buf, TPM_HEADER_SIZE); if (size < TPM_HEADER_SIZE) { - dev_err(chip->pdev, "Unable to read header\n"); + dev_err(&chip->dev, "Unable to read header\n"); goto out; } @@ -459,14 +459,14 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE); if (size < expected) { - dev_err(chip->pdev, "Unable to read remainder of result\n"); + dev_err(&chip->dev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &status); if (status & TPM_STS_DATA_AVAIL) { /* retry? */ - dev_err(chip->pdev, "Error left over data\n"); + dev_err(&chip->dev, "Error left over data\n"); size = -EIO; goto out; } diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index 847f1597fe9b..a1e1474dda30 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -96,13 +96,13 @@ static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size, /* read TPM_STS register */ static u8 i2c_nuvoton_read_status(struct tpm_chip *chip) { - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); s32 status; u8 data; status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data); if (status <= 0) { - dev_err(chip->pdev, "%s() error return %d\n", __func__, + dev_err(&chip->dev, "%s() error return %d\n", __func__, status); data = TPM_STS_ERR_VAL; } @@ -127,13 +127,13 @@ static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) /* write commandReady to TPM_STS register */ static void i2c_nuvoton_ready(struct tpm_chip *chip) { - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); s32 status; /* this causes the current command to be aborted */ status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY); if (status < 0) - dev_err(chip->pdev, + dev_err(&chip->dev, "%s() fail to write TPM_STS.commandReady\n", __func__); } @@ -212,7 +212,7 @@ static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value, return 0; } while (time_before(jiffies, stop)); } - dev_err(chip->pdev, "%s(%02x, %02x) -> timeout\n", __func__, mask, + dev_err(&chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask, value); return -ETIMEDOUT; } @@ -240,7 +240,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, &chip->vendor.read_queue) == 0) { burst_count = i2c_nuvoton_get_burstcount(client, chip); if (burst_count < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "%s() fail to read burstCount=%d\n", __func__, burst_count); return -EIO; @@ -249,12 +249,12 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R, bytes2read, &buf[size]); if (rc < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "%s() fail on i2c_nuvoton_read_buf()=%d\n", __func__, rc); return -EIO; } - dev_dbg(chip->pdev, "%s(%d):", __func__, bytes2read); + dev_dbg(&chip->dev, "%s(%d):", __func__, bytes2read); size += bytes2read; } @@ -264,7 +264,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, /* Read TPM command results */ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) { - struct device *dev = chip->pdev; + struct device *dev = chip->dev.parent; struct i2c_client *client = to_i2c_client(dev); s32 rc; int expected, status, burst_count, retries, size = 0; @@ -334,7 +334,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) break; } i2c_nuvoton_ready(chip); - dev_dbg(chip->pdev, "%s() -> %d\n", __func__, size); + dev_dbg(&chip->dev, "%s() -> %d\n", __func__, size); return size; } @@ -347,7 +347,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) */ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) { - struct device *dev = chip->pdev; + struct device *dev = chip->dev.parent; struct i2c_client *client = to_i2c_client(dev); u32 ordinal; size_t count = 0; diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index 6c488e635fdd..e3cf9f3545c5 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -195,9 +195,9 @@ static int wait(struct tpm_chip *chip, int wait_for_bit) } if (i == TPM_MAX_TRIES) { /* timeout occurs */ if (wait_for_bit == STAT_XFE) - dev_err(chip->pdev, "Timeout in wait(STAT_XFE)\n"); + dev_err(&chip->dev, "Timeout in wait(STAT_XFE)\n"); if (wait_for_bit == STAT_RDA) - dev_err(chip->pdev, "Timeout in wait(STAT_RDA)\n"); + dev_err(&chip->dev, "Timeout in wait(STAT_RDA)\n"); return -EIO; } return 0; @@ -220,7 +220,7 @@ static void wait_and_send(struct tpm_chip *chip, u8 sendbyte) static void tpm_wtx(struct tpm_chip *chip) { number_of_wtx++; - dev_info(chip->pdev, "Granting WTX (%02d / %02d)\n", + dev_info(&chip->dev, "Granting WTX (%02d / %02d)\n", number_of_wtx, TPM_MAX_WTX_PACKAGES); wait_and_send(chip, TPM_VL_VER); wait_and_send(chip, TPM_CTRL_WTX); @@ -231,7 +231,7 @@ static void tpm_wtx(struct tpm_chip *chip) static void tpm_wtx_abort(struct tpm_chip *chip) { - dev_info(chip->pdev, "Aborting WTX\n"); + dev_info(&chip->dev, "Aborting WTX\n"); wait_and_send(chip, TPM_VL_VER); wait_and_send(chip, TPM_CTRL_WTX_ABORT); wait_and_send(chip, 0x00); @@ -257,7 +257,7 @@ recv_begin: } if (buf[0] != TPM_VL_VER) { - dev_err(chip->pdev, + dev_err(&chip->dev, "Wrong transport protocol implementation!\n"); return -EIO; } @@ -272,7 +272,7 @@ recv_begin: } if ((size == 0x6D00) && (buf[1] == 0x80)) { - dev_err(chip->pdev, "Error handling on vendor layer!\n"); + dev_err(&chip->dev, "Error handling on vendor layer!\n"); return -EIO; } @@ -284,7 +284,7 @@ recv_begin: } if (buf[1] == TPM_CTRL_WTX) { - dev_info(chip->pdev, "WTX-package received\n"); + dev_info(&chip->dev, "WTX-package received\n"); if (number_of_wtx < TPM_MAX_WTX_PACKAGES) { tpm_wtx(chip); goto recv_begin; @@ -295,14 +295,14 @@ recv_begin: } if (buf[1] == TPM_CTRL_WTX_ABORT_ACK) { - dev_info(chip->pdev, "WTX-abort acknowledged\n"); + dev_info(&chip->dev, "WTX-abort acknowledged\n"); return size; } if (buf[1] == TPM_CTRL_ERROR) { - dev_err(chip->pdev, "ERROR-package received:\n"); + dev_err(&chip->dev, "ERROR-package received:\n"); if (buf[4] == TPM_INF_NAK) - dev_err(chip->pdev, + dev_err(&chip->dev, "-> Negative acknowledgement" " - retransmit command!\n"); return -EIO; @@ -321,7 +321,7 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count) ret = empty_fifo(chip, 1); if (ret) { - dev_err(chip->pdev, "Timeout while clearing FIFO\n"); + dev_err(&chip->dev, "Timeout while clearing FIFO\n"); return -EIO; } diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 289389ecef84..766370bed60c 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -113,7 +113,7 @@ static int nsc_wait_for_ready(struct tpm_chip *chip) } while (time_before(jiffies, stop)); - dev_info(chip->pdev, "wait for ready failed\n"); + dev_info(&chip->dev, "wait for ready failed\n"); return -EBUSY; } @@ -129,12 +129,12 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) return -EIO; if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) { - dev_err(chip->pdev, "F0 timeout\n"); + dev_err(&chip->dev, "F0 timeout\n"); return -EIO; } if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) { - dev_err(chip->pdev, "not in normal mode (0x%x)\n", + dev_err(&chip->dev, "not in normal mode (0x%x)\n", data); return -EIO; } @@ -143,7 +143,7 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) for (p = buffer; p < &buffer[count]; p++) { if (wait_for_stat (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "OBF timeout (while reading data)\n"); return -EIO; } @@ -154,11 +154,11 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) if ((data & NSC_STATUS_F0) == 0 && (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) { - dev_err(chip->pdev, "F0 not set\n"); + dev_err(&chip->dev, "F0 not set\n"); return -EIO; } if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) { - dev_err(chip->pdev, + dev_err(&chip->dev, "expected end of command(0x%x)\n", data); return -EIO; } @@ -189,19 +189,19 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) return -EIO; if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->pdev, "IBF timeout\n"); + dev_err(&chip->dev, "IBF timeout\n"); return -EIO; } outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND); if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { - dev_err(chip->pdev, "IBR timeout\n"); + dev_err(&chip->dev, "IBR timeout\n"); return -EIO; } for (i = 0; i < count; i++) { if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "IBF timeout (while writing data)\n"); return -EIO; } @@ -209,7 +209,7 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) } if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->pdev, "IBF timeout\n"); + dev_err(&chip->dev, "IBF timeout\n"); return -EIO; } outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index f10a107614b4..7f13221aeb30 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -293,7 +293,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) /* read first 10 bytes, including tag, paramsize, and result */ if ((size = recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { - dev_err(chip->pdev, "Unable to read header\n"); + dev_err(&chip->dev, "Unable to read header\n"); goto out; } @@ -306,7 +306,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) if ((size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE)) < expected) { - dev_err(chip->pdev, "Unable to read remainder of result\n"); + dev_err(&chip->dev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } @@ -315,7 +315,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) &chip->vendor.int_queue, false); status = tpm_tis_status(chip); if (status & TPM_STS_DATA_AVAIL) { /* retry? */ - dev_err(chip->pdev, "Error left over data\n"); + dev_err(&chip->dev, "Error left over data\n"); size = -EIO; goto out; } @@ -401,7 +401,7 @@ static void disable_interrupts(struct tpm_chip *chip) iowrite32(intmask, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); - devm_free_irq(chip->pdev, chip->vendor.irq, chip); + devm_free_irq(&chip->dev, chip->vendor.irq, chip); chip->vendor.irq = 0; } @@ -463,7 +463,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) msleep(1); if (!priv->irq_tested) { disable_interrupts(chip); - dev_err(chip->pdev, + dev_err(&chip->dev, FW_BUG "TPM interrupt not working, polling instead\n"); } priv->irq_tested = true; @@ -533,7 +533,7 @@ static int probe_itpm(struct tpm_chip *chip) rc = tpm_tis_send_data(chip, cmd_getticks, len); if (rc == 0) { - dev_info(chip->pdev, "Detected an iTPM.\n"); + dev_info(&chip->dev, "Detected an iTPM.\n"); rc = 1; } else rc = -EFAULT; @@ -766,7 +766,7 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, if (devm_request_irq (dev, i, tis_int_probe, IRQF_SHARED, chip->devname, chip) != 0) { - dev_info(chip->pdev, + dev_info(&chip->dev, "Unable to request irq: %d for probe\n", i); continue; @@ -818,7 +818,7 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, if (devm_request_irq (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED, chip->devname, chip) != 0) { - dev_info(chip->pdev, + dev_info(&chip->dev, "Unable to request irq: %d for use\n", chip->vendor.irq); chip->vendor.irq = 0; diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 0dadb6332f0e..7abe908427df 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -963,7 +963,9 @@ static int atmel_sha_finup(struct ahash_request *req) ctx->flags |= SHA_FLAGS_FINUP; err1 = atmel_sha_update(req); - if (err1 == -EINPROGRESS || err1 == -EBUSY) + if (err1 == -EINPROGRESS || + (err1 == -EBUSY && (ahash_request_flags(req) & + CRYPTO_TFM_REQ_MAY_BACKLOG))) return err1; /* diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 99d5e11db194..e06cc5df30be 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -498,7 +498,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); if (!ret) { /* in progress */ - wait_for_completion_interruptible(&result.completion); + wait_for_completion(&result.completion); ret = result.err; #ifdef DEBUG print_hex_dump(KERN_ERR, diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index e1eaf4ff9762..3ce1d5cdcbd2 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -103,7 +103,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); if (!ret) { /* in progress */ - wait_for_completion_interruptible(&result.completion); + wait_for_completion(&result.completion); ret = result.err; #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 9a8a18aafd5c..6a60936b46e0 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -804,7 +804,7 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 -#define TALITOS_MAX_KEY_SIZE 96 +#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE) #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ struct talitos_ctx { @@ -1388,6 +1388,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, { struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); + if (keylen > TALITOS_MAX_KEY_SIZE) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + memcpy(&ctx->key, key, keylen); ctx->keylen = keylen; diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 690e3b4f8202..b36da3c1073f 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -64,6 +64,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e #define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f +#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021 + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 4ef0c5e07912..abb75ebd65ea 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -105,6 +105,8 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) }, + /* I/OAT v3.3 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, @@ -250,10 +252,15 @@ static bool is_bdx_ioat(struct pci_dev *pdev) } } +static inline bool is_skx_ioat(struct pci_dev *pdev) +{ + return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false; +} + static bool is_xeon_cb32(struct pci_dev *pdev) { return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev) || is_bdx_ioat(pdev); + is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev); } bool is_bwd_ioat(struct pci_dev *pdev) @@ -1350,6 +1357,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) device->version = readb(device->reg_base + IOAT_VER_OFFSET); if (device->version >= IOAT_VER_3_0) { + if (is_skx_ioat(pdev)) + device->version = IOAT_VER_3_2; err = ioat3_dma_probe(device, ioat_dca_enabled); if (device->version >= IOAT_VER_3_3) diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index a415edbe61b1..149ec2bd9bc6 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -146,6 +146,7 @@ static int ti_am335x_xbar_probe(struct platform_device *pdev) match = of_match_node(ti_am335x_master_match, dma_node); if (!match) { dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); return -EINVAL; } @@ -310,6 +311,7 @@ static int ti_dra7_xbar_probe(struct platform_device *pdev) match = of_match_node(ti_dra7_master_match, dma_node); if (!match) { dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 475c38fe9245..e40a6d8b0b92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1126,6 +1126,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (*pos >= adev->mc.mc_vram_size) + return -ENXIO; + while (size) { unsigned long flags; uint32_t value; diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index a3b96d691ac9..58bf94b69186 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -330,6 +330,13 @@ static bool drm_dp_sideband_msg_build(struct drm_dp_sideband_msg_rx *msg, return false; } + /* + * ignore out-of-order messages or messages that are part of a + * failed transaction + */ + if (!recv_hdr.somt && !msg->have_somt) + return false; + /* get length contained in this portion */ msg->curchunk_len = recv_hdr.msg_len; msg->curchunk_hdrlen = hdrlen; @@ -2163,7 +2170,7 @@ out_unlock: } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume); -static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) +static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) { int len; u8 replyblock[32]; @@ -2178,12 +2185,12 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) replyblock, len); if (ret != len) { DRM_DEBUG_KMS("failed to read DPCD down rep %d %d\n", len, ret); - return; + return false; } ret = drm_dp_sideband_msg_build(msg, replyblock, len, true); if (!ret) { DRM_DEBUG_KMS("sideband msg build failed %d\n", replyblock[0]); - return; + return false; } replylen = msg->curchunk_len + msg->curchunk_hdrlen; @@ -2195,21 +2202,32 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up) ret = drm_dp_dpcd_read(mgr->aux, basereg + curreply, replyblock, len); if (ret != len) { - DRM_DEBUG_KMS("failed to read a chunk\n"); + DRM_DEBUG_KMS("failed to read a chunk (len %d, ret %d)\n", + len, ret); + return false; } + ret = drm_dp_sideband_msg_build(msg, replyblock, len, false); - if (ret == false) + if (!ret) { DRM_DEBUG_KMS("failed to build sideband msg\n"); + return false; + } + curreply += len; replylen -= len; } + return true; } static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) { int ret = 0; - drm_dp_get_one_sb_msg(mgr, false); + if (!drm_dp_get_one_sb_msg(mgr, false)) { + memset(&mgr->down_rep_recv, 0, + sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } if (mgr->down_rep_recv.have_eomt) { struct drm_dp_sideband_msg_tx *txmsg; @@ -2265,7 +2283,12 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { int ret = 0; - drm_dp_get_one_sb_msg(mgr, true); + + if (!drm_dp_get_one_sb_msg(mgr, true)) { + memset(&mgr->up_req_recv, 0, + sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } if (mgr->up_req_recv.have_eomt) { struct drm_dp_sideband_msg_req_body msg; @@ -2317,7 +2340,9 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn); } - drm_dp_put_mst_branch_device(mstb); + if (mstb) + drm_dp_put_mst_branch_device(mstb); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); } return ret; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index a3b54cc76495..b66ffd44ff26 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -204,7 +204,14 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, void adreno_flush(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr; + + /* + * Mask wptr value that we calculate to fit in the HW range. This is + * to account for the possibility that the last command fit exactly into + * the ringbuffer and rb->next hasn't wrapped to zero yet + */ + wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); /* ensure writes to ringbuffer have hit system memory: */ mb(); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index f2cc9690dbb7..ed1efae0c3f2 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -85,7 +85,8 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, pagefault_disable(); } - if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) { + if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) || + !(submit_bo.flags & MSM_SUBMIT_BO_FLAGS)) { DRM_ERROR("invalid flags: %x\n", submit_bo.flags); ret = -EINVAL; goto out_unlock; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 1f14b908b221..ae317271cf81 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -23,7 +23,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) struct msm_ringbuffer *ring; int ret; - size = ALIGN(size, 4); /* size should be dword aligned */ + if (WARN_ON(!is_power_of_2(size))) + return ERR_PTR(-EINVAL); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c index c794b2c2d21e..6d8f21290aa2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c @@ -129,7 +129,7 @@ gf100_bar_init(struct nvkm_bar *base) if (bar->bar[0].mem) { addr = nvkm_memory_addr(bar->bar[0].mem) >> 12; - nvkm_wr32(device, 0x001714, 0xc0000000 | addr); + nvkm_wr32(device, 0x001714, 0x80000000 | addr); } return 0; diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 7c7242b256e0..062ef2607d0c 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -30,6 +30,7 @@ #include "radeon_audio.h" #include "atom.h" #include <linux/backlight.h> +#include <linux/dmi.h> extern int atom_debug; @@ -2183,9 +2184,17 @@ int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder, int fe_idx) goto assigned; } - /* on DCE32 and encoder can driver any block so just crtc id */ + /* + * On DCE32 any encoder can drive any block so usually just use crtc id, + * but Apple thinks different at least on iMac10,1, so there use linkb, + * otherwise the internal eDP panel will stay dark. + */ if (ASIC_IS_DCE32(rdev)) { - enc_idx = radeon_crtc->crtc_id; + if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1")) + enc_idx = (dig->linkb) ? 1 : 0; + else + enc_idx = radeon_crtc->crtc_id; + goto assigned; } diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 3c32f095a873..2ccf81168d1e 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -782,6 +782,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev) if (r600_dpm_get_vrefresh(rdev) > 120) return true; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (r600_dpm_get_vrefresh(rdev) > 120) + return true; + if (vblank_time < switch_limit) return true; else diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 88a4b706be16..4ec80ae1fa99 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -282,26 +282,6 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc) * Page Flip */ -void rcar_du_crtc_cancel_page_flip(struct rcar_du_crtc *rcrtc, - struct drm_file *file) -{ - struct drm_pending_vblank_event *event; - struct drm_device *dev = rcrtc->crtc.dev; - unsigned long flags; - - /* Destroy the pending vertical blanking event associated with the - * pending page flip, if any, and disable vertical blanking interrupts. - */ - spin_lock_irqsave(&dev->event_lock, flags); - event = rcrtc->event; - if (event && event->base.file_priv == file) { - rcrtc->event = NULL; - event->base.destroy(&event->base); - drm_crtc_vblank_put(&rcrtc->crtc); - } - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static void rcar_du_crtc_finish_page_flip(struct rcar_du_crtc *rcrtc) { struct drm_pending_vblank_event *event; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index 4b95d9d08c49..2bbe3f5aab65 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -67,8 +67,6 @@ enum rcar_du_output { int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index); void rcar_du_crtc_enable_vblank(struct rcar_du_crtc *rcrtc, bool enable); -void rcar_du_crtc_cancel_page_flip(struct rcar_du_crtc *rcrtc, - struct drm_file *file); void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc); void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 40422f6b645e..bb9cd35d7fdf 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -144,91 +144,6 @@ MODULE_DEVICE_TABLE(of, rcar_du_of_table); * DRM operations */ -static int rcar_du_unload(struct drm_device *dev) -{ - struct rcar_du_device *rcdu = dev->dev_private; - - if (rcdu->fbdev) - drm_fbdev_cma_fini(rcdu->fbdev); - - drm_kms_helper_poll_fini(dev); - drm_mode_config_cleanup(dev); - drm_vblank_cleanup(dev); - - dev->irq_enabled = 0; - dev->dev_private = NULL; - - return 0; -} - -static int rcar_du_load(struct drm_device *dev, unsigned long flags) -{ - struct platform_device *pdev = dev->platformdev; - struct device_node *np = pdev->dev.of_node; - struct rcar_du_device *rcdu; - struct resource *mem; - int ret; - - if (np == NULL) { - dev_err(dev->dev, "no platform data\n"); - return -ENODEV; - } - - rcdu = devm_kzalloc(&pdev->dev, sizeof(*rcdu), GFP_KERNEL); - if (rcdu == NULL) { - dev_err(dev->dev, "failed to allocate private data\n"); - return -ENOMEM; - } - - init_waitqueue_head(&rcdu->commit.wait); - - rcdu->dev = &pdev->dev; - rcdu->info = of_match_device(rcar_du_of_table, rcdu->dev)->data; - rcdu->ddev = dev; - dev->dev_private = rcdu; - - /* I/O resources */ - mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rcdu->mmio = devm_ioremap_resource(&pdev->dev, mem); - if (IS_ERR(rcdu->mmio)) - return PTR_ERR(rcdu->mmio); - - /* Initialize vertical blanking interrupts handling. Start with vblank - * disabled for all CRTCs. - */ - ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); - if (ret < 0) { - dev_err(&pdev->dev, "failed to initialize vblank\n"); - goto done; - } - - /* DRM/KMS objects */ - ret = rcar_du_modeset_init(rcdu); - if (ret < 0) { - dev_err(&pdev->dev, "failed to initialize DRM/KMS (%d)\n", ret); - goto done; - } - - dev->irq_enabled = 1; - - platform_set_drvdata(pdev, rcdu); - -done: - if (ret) - rcar_du_unload(dev); - - return ret; -} - -static void rcar_du_preclose(struct drm_device *dev, struct drm_file *file) -{ - struct rcar_du_device *rcdu = dev->dev_private; - unsigned int i; - - for (i = 0; i < rcdu->num_crtcs; ++i) - rcar_du_crtc_cancel_page_flip(&rcdu->crtcs[i], file); -} - static void rcar_du_lastclose(struct drm_device *dev) { struct rcar_du_device *rcdu = dev->dev_private; @@ -269,11 +184,7 @@ static const struct file_operations rcar_du_fops = { static struct drm_driver rcar_du_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC, - .load = rcar_du_load, - .unload = rcar_du_unload, - .preclose = rcar_du_preclose, .lastclose = rcar_du_lastclose, - .set_busid = drm_platform_set_busid, .get_vblank_counter = drm_vblank_no_hw_counter, .enable_vblank = rcar_du_enable_vblank, .disable_vblank = rcar_du_disable_vblank, @@ -333,18 +244,104 @@ static const struct dev_pm_ops rcar_du_pm_ops = { * Platform driver */ -static int rcar_du_probe(struct platform_device *pdev) +static int rcar_du_remove(struct platform_device *pdev) { - return drm_platform_init(&rcar_du_driver, pdev); + struct rcar_du_device *rcdu = platform_get_drvdata(pdev); + struct drm_device *ddev = rcdu->ddev; + + mutex_lock(&ddev->mode_config.mutex); + drm_connector_unplug_all(ddev); + mutex_unlock(&ddev->mode_config.mutex); + + drm_dev_unregister(ddev); + + if (rcdu->fbdev) + drm_fbdev_cma_fini(rcdu->fbdev); + + drm_kms_helper_poll_fini(ddev); + drm_mode_config_cleanup(ddev); + + drm_dev_unref(ddev); + + return 0; } -static int rcar_du_remove(struct platform_device *pdev) +static int rcar_du_probe(struct platform_device *pdev) { - struct rcar_du_device *rcdu = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + struct rcar_du_device *rcdu; + struct drm_connector *connector; + struct drm_device *ddev; + struct resource *mem; + int ret; + + if (np == NULL) { + dev_err(&pdev->dev, "no device tree node\n"); + return -ENODEV; + } - drm_put_dev(rcdu->ddev); + /* Allocate and initialize the DRM and R-Car device structures. */ + rcdu = devm_kzalloc(&pdev->dev, sizeof(*rcdu), GFP_KERNEL); + if (rcdu == NULL) + return -ENOMEM; + + init_waitqueue_head(&rcdu->commit.wait); + + rcdu->dev = &pdev->dev; + rcdu->info = of_match_device(rcar_du_of_table, rcdu->dev)->data; + + platform_set_drvdata(pdev, rcdu); + + /* I/O resources */ + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rcdu->mmio = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(rcdu->mmio)) + return PTR_ERR(rcdu->mmio); + + /* DRM/KMS objects */ + ddev = drm_dev_alloc(&rcar_du_driver, &pdev->dev); + if (!ddev) + return -ENOMEM; + + drm_dev_set_unique(ddev, dev_name(&pdev->dev)); + + rcdu->ddev = ddev; + ddev->dev_private = rcdu; + + ret = rcar_du_modeset_init(rcdu); + if (ret < 0) { + dev_err(&pdev->dev, "failed to initialize DRM/KMS (%d)\n", ret); + goto error; + } + + ddev->irq_enabled = 1; + + /* Register the DRM device with the core and the connectors with + * sysfs. + */ + ret = drm_dev_register(ddev, 0); + if (ret) + goto error; + + mutex_lock(&ddev->mode_config.mutex); + drm_for_each_connector(connector, ddev) { + ret = drm_connector_register(connector); + if (ret < 0) + break; + } + mutex_unlock(&ddev->mode_config.mutex); + + if (ret < 0) + goto error; + + DRM_INFO("Device %s probed\n", dev_name(&pdev->dev)); return 0; + +error: + rcar_du_remove(pdev); + + return ret; } static struct platform_driver rcar_du_platform_driver = { diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c index 96f2eb43713c..6038be93c58d 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c @@ -55,12 +55,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = { .best_encoder = rcar_du_connector_best_encoder, }; -static void rcar_du_hdmi_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - static enum drm_connector_status rcar_du_hdmi_connector_detect(struct drm_connector *connector, bool force) { @@ -79,7 +73,7 @@ static const struct drm_connector_funcs connector_funcs = { .reset = drm_atomic_helper_connector_reset, .detect = rcar_du_hdmi_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = rcar_du_hdmi_connector_destroy, + .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -108,9 +102,6 @@ int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu, return ret; drm_connector_helper_add(connector, &connector_helper_funcs); - ret = drm_connector_register(connector); - if (ret < 0) - return ret; connector->dpms = DRM_MODE_DPMS_OFF; drm_object_property_set_value(&connector->base, diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index ca12e8ca5552..46429c4be8e5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -761,6 +761,13 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) if (ret < 0) return ret; + /* Initialize vertical blanking interrupts handling. Start with vblank + * disabled for all CRTCs. + */ + ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); + if (ret < 0) + return ret; + /* Initialize the groups. */ num_groups = DIV_ROUND_UP(rcdu->num_crtcs, 2); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c index 0c43032fc693..e905f5da7aaa 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c @@ -62,12 +62,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = { .best_encoder = rcar_du_connector_best_encoder, }; -static void rcar_du_lvds_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - static enum drm_connector_status rcar_du_lvds_connector_detect(struct drm_connector *connector, bool force) { @@ -79,7 +73,7 @@ static const struct drm_connector_funcs connector_funcs = { .reset = drm_atomic_helper_connector_reset, .detect = rcar_du_lvds_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = rcar_du_lvds_connector_destroy, + .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -117,9 +111,6 @@ int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu, return ret; drm_connector_helper_add(connector, &connector_helper_funcs); - ret = drm_connector_register(connector); - if (ret < 0) - return ret; connector->dpms = DRM_MODE_DPMS_OFF; drm_object_property_set_value(&connector->base, diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c index e0a5d8f93963..9d7e5c99caf6 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c @@ -31,12 +31,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = { .best_encoder = rcar_du_connector_best_encoder, }; -static void rcar_du_vga_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - static enum drm_connector_status rcar_du_vga_connector_detect(struct drm_connector *connector, bool force) { @@ -48,7 +42,7 @@ static const struct drm_connector_funcs connector_funcs = { .reset = drm_atomic_helper_connector_reset, .detect = rcar_du_vga_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = rcar_du_vga_connector_destroy, + .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -76,9 +70,6 @@ int rcar_du_vga_connector_init(struct rcar_du_device *rcdu, return ret; drm_connector_helper_add(connector, &connector_helper_funcs); - ret = drm_connector_register(connector); - if (ret < 0) - return ret; connector->dpms = DRM_MODE_DPMS_OFF; drm_object_property_set_value(&connector->base, diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c index 6a81e084593b..2b59d80a09b8 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fb.c +++ b/drivers/gpu/drm/virtio/virtgpu_fb.c @@ -338,7 +338,7 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper, info->fbops = &virtio_gpufb_ops; info->pixmap.flags = FB_PIXMAP_SYSTEM; - info->screen_base = obj->vmap; + info->screen_buffer = obj->vmap; info->screen_size = obj->gem_base.size; drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth); drm_fb_helper_fill_var(info, &vfbdev->helper, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index ecf15cf0c3fd..04fd0f2b6af0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -471,7 +471,7 @@ static int vmw_cmd_invalid(struct vmw_private *dev_priv, struct vmw_sw_context *sw_context, SVGA3dCmdHeader *header) { - return capable(CAP_SYS_ADMIN) ? : -EINVAL; + return -EINVAL; } static int vmw_cmd_ok(struct vmw_private *dev_priv, diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 10a8f98b3b49..902897669fb9 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2433,6 +2433,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) }, + { HID_USB_DEVICE(USB_VENDOR_ID_PETZL, USB_DEVICE_ID_PETZL_HEADLAMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_PHILIPS, USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE) }, { HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) }, #if defined(CONFIG_MOUSE_SYNAPTICS_USB) || defined(CONFIG_MOUSE_SYNAPTICS_USB_MODULE) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ff9a518121ff..796a425fbba1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -782,6 +782,9 @@ #define USB_VENDOR_ID_PETALYNX 0x18b1 #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE 0x0037 +#define USB_VENDOR_ID_PETZL 0x2122 +#define USB_DEVICE_ID_PETZL_HEADLAMP 0x1234 + #define USB_VENDOR_ID_PHILIPS 0x0471 #define USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE 0x0617 diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index fa24d5196615..c7122919a8c0 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -194,7 +194,6 @@ struct bmc150_accel_data { struct device *dev; int irq; struct bmc150_accel_interrupt interrupts[BMC150_ACCEL_INTERRUPTS]; - atomic_t active_intr; struct bmc150_accel_trigger triggers[BMC150_ACCEL_TRIGGERS]; struct mutex mutex; u8 fifo_mode, watermark; @@ -489,11 +488,6 @@ static int bmc150_accel_set_interrupt(struct bmc150_accel_data *data, int i, goto out_fix_power_state; } - if (state) - atomic_inc(&data->active_intr); - else - atomic_dec(&data->active_intr); - return 0; out_fix_power_state: @@ -1704,8 +1698,7 @@ static int bmc150_accel_resume(struct device *dev) struct bmc150_accel_data *data = iio_priv(indio_dev); mutex_lock(&data->mutex); - if (atomic_read(&data->active_intr)) - bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0); + bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0); bmc150_accel_fifo_set_mode(data); mutex_unlock(&data->mutex); diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index b10f629cc44b..1dbc2143cdfc 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -77,7 +77,7 @@ #define VF610_ADC_ADSTS_MASK 0x300 #define VF610_ADC_ADLPC_EN 0x80 #define VF610_ADC_ADHSC_EN 0x400 -#define VF610_ADC_REFSEL_VALT 0x100 +#define VF610_ADC_REFSEL_VALT 0x800 #define VF610_ADC_REFSEL_VBG 0x1000 #define VF610_ADC_ADTRG_HARD 0x2000 #define VF610_ADC_AVGS_8 0x4000 diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c index 12731d6b89ec..ec1b2e798cc1 100644 --- a/drivers/iio/light/tsl2563.c +++ b/drivers/iio/light/tsl2563.c @@ -626,7 +626,7 @@ static irqreturn_t tsl2563_event_handler(int irq, void *private) struct tsl2563_chip *chip = iio_priv(dev_info); iio_push_event(dev_info, - IIO_UNMOD_EVENT_CODE(IIO_LIGHT, + IIO_UNMOD_EVENT_CODE(IIO_INTENSITY, 0, IIO_EV_TYPE_THRESH, IIO_EV_DIR_EITHER), diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3f5741a3e728..43d5166db4c6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -857,6 +857,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); + qp_attr->port_num = id_priv->id.port_num; + *qp_attr_mask |= IB_QP_PORT; } else ret = -ENOSYS; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 9eca4b41fa0a..b7a73f1a8beb 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2287,8 +2287,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - if (cmd.port_num < rdma_start_port(ib_dev) || - cmd.port_num > rdma_end_port(ib_dev)) + if ((cmd.attr_mask & IB_QP_PORT) && + (cmd.port_num < rdma_start_port(ib_dev) || + cmd.port_num > rdma_end_port(ib_dev))) return -EINVAL; INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b0edb66a291b..0b7f5a701c60 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1581,7 +1581,7 @@ isert_rcv_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, u32 xfer_len) { - struct ib_device *ib_dev = isert_conn->cm_id->device; + struct ib_device *ib_dev = isert_conn->device->ib_device; struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen; diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c index a5ea27ad0e16..fdcc14653b64 100644 --- a/drivers/input/misc/keychord.c +++ b/drivers/input/misc/keychord.c @@ -60,6 +60,10 @@ struct keychord_device { unsigned char head; unsigned char tail; __u16 buff[BUFFER_SIZE]; + /* Bit to serialize writes to this device */ +#define KEYCHORD_BUSY 0x01 + unsigned long flags; + wait_queue_head_t write_waitq; }; static int check_keychord(struct keychord_device *kdev, @@ -172,7 +176,6 @@ static int keychord_connect(struct input_handler *handler, goto err_input_open_device; pr_info("keychord: using input dev %s for fevent\n", dev->name); - return 0; err_input_open_device: @@ -225,6 +228,41 @@ static ssize_t keychord_read(struct file *file, char __user *buffer, } /* + * serializes writes on a device. can use mutex_lock_interruptible() + * for this particular use case as well - a matter of preference. + */ +static int +keychord_write_lock(struct keychord_device *kdev) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&kdev->lock, flags); + while (kdev->flags & KEYCHORD_BUSY) { + spin_unlock_irqrestore(&kdev->lock, flags); + ret = wait_event_interruptible(kdev->write_waitq, + ((kdev->flags & KEYCHORD_BUSY) == 0)); + if (ret) + return ret; + spin_lock_irqsave(&kdev->lock, flags); + } + kdev->flags |= KEYCHORD_BUSY; + spin_unlock_irqrestore(&kdev->lock, flags); + return 0; +} + +static void +keychord_write_unlock(struct keychord_device *kdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kdev->lock, flags); + kdev->flags &= ~KEYCHORD_BUSY; + spin_unlock_irqrestore(&kdev->lock, flags); + wake_up_interruptible(&kdev->write_waitq); +} + +/* * keychord_write is used to configure the driver */ static ssize_t keychord_write(struct file *file, const char __user *buffer, @@ -232,9 +270,11 @@ static ssize_t keychord_write(struct file *file, const char __user *buffer, { struct keychord_device *kdev = file->private_data; struct input_keychord *keychords = 0; - struct input_keychord *keychord, *next, *end; + struct input_keychord *keychord; int ret, i, key; unsigned long flags; + size_t resid = count; + size_t key_bytes; if (count < sizeof(struct input_keychord)) return -EINVAL; @@ -248,6 +288,22 @@ static ssize_t keychord_write(struct file *file, const char __user *buffer, return -EFAULT; } + /* + * Serialize writes to this device to prevent various races. + * 1) writers racing here could do duplicate input_unregister_handler() + * calls, resulting in attempting to unlink a node from a list that + * does not exist. + * 2) writers racing here could do duplicate input_register_handler() calls + * below, resulting in a duplicate insertion of a node into the list. + * 3) a double kfree of keychords can occur (in the event that + * input_register_handler() fails below. + */ + ret = keychord_write_lock(kdev); + if (ret) { + kfree(keychords); + return ret; + } + /* unregister handler before changing configuration */ if (kdev->registered) { input_unregister_handler(&kdev->input_handler); @@ -265,15 +321,29 @@ static ssize_t keychord_write(struct file *file, const char __user *buffer, kdev->head = kdev->tail = 0; keychord = keychords; - end = (struct input_keychord *)((char *)keychord + count); - while (keychord < end) { - next = NEXT_KEYCHORD(keychord); - if (keychord->count <= 0 || next > end) { + while (resid > 0) { + /* Is the entire keychord entry header present ? */ + if (resid < sizeof(struct input_keychord)) { + pr_err("keychord: Insufficient bytes present for header %zu\n", + resid); + goto err_unlock_return; + } + resid -= sizeof(struct input_keychord); + if (keychord->count <= 0) { pr_err("keychord: invalid keycode count %d\n", keychord->count); goto err_unlock_return; } + key_bytes = keychord->count * sizeof(keychord->keycodes[0]); + /* Do we have all the expected keycodes ? */ + if (resid < key_bytes) { + pr_err("keychord: Insufficient bytes present for keycount %zu\n", + resid); + goto err_unlock_return; + } + resid -= key_bytes; + if (keychord->version != KEYCHORD_VERSION) { pr_err("keychord: unsupported version %d\n", keychord->version); @@ -292,7 +362,7 @@ static ssize_t keychord_write(struct file *file, const char __user *buffer, } kdev->keychord_count++; - keychord = next; + keychord = NEXT_KEYCHORD(keychord); } kdev->keychords = keychords; @@ -302,15 +372,19 @@ static ssize_t keychord_write(struct file *file, const char __user *buffer, if (ret) { kfree(keychords); kdev->keychords = 0; + keychord_write_unlock(kdev); return ret; } kdev->registered = 1; + keychord_write_unlock(kdev); + return count; err_unlock_return: spin_unlock_irqrestore(&kdev->lock, flags); kfree(keychords); + keychord_write_unlock(kdev); return -EINVAL; } @@ -336,6 +410,7 @@ static int keychord_open(struct inode *inode, struct file *file) spin_lock_init(&kdev->lock); init_waitqueue_head(&kdev->waitq); + init_waitqueue_head(&kdev->write_waitq); kdev->input_handler.event = keychord_event; kdev->input_handler.connect = keychord_connect; @@ -357,6 +432,7 @@ static int keychord_release(struct inode *inode, struct file *file) if (kdev->registered) input_unregister_handler(&kdev->input_handler); + kfree(kdev->keychords); kfree(kdev); return 0; diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 89abfdb539ac..c84c685056b9 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -434,8 +434,10 @@ static int i8042_start(struct serio *serio) { struct i8042_port *port = serio->port_data; + spin_lock_irq(&i8042_lock); port->exists = true; - mb(); + spin_unlock_irq(&i8042_lock); + return 0; } @@ -448,16 +450,20 @@ static void i8042_stop(struct serio *serio) { struct i8042_port *port = serio->port_data; + spin_lock_irq(&i8042_lock); port->exists = false; + port->serio = NULL; + spin_unlock_irq(&i8042_lock); /* + * We need to make sure that interrupt handler finishes using + * our serio port before we return from this function. * We synchronize with both AUX and KBD IRQs because there is * a (very unlikely) chance that AUX IRQ is raised for KBD port * and vice versa. */ synchronize_irq(I8042_AUX_IRQ); synchronize_irq(I8042_KBD_IRQ); - port->serio = NULL; } /* @@ -574,7 +580,7 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&i8042_lock, flags); - if (likely(port->exists && !filtered)) + if (likely(serio && !filtered)) serio_interrupt(serio, data, dfl); out: diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 3d1514ac932d..0d65e8709036 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -644,6 +644,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, int enabled; u64 val; + if (cpu >= nr_cpu_ids) + return -EINVAL; + if (gic_irq_in_rdist(d)) return -EINVAL; diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index deb89d63a728..e684be1bb7c0 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -19,9 +19,9 @@ #include <linux/bitops.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/irqchip.h> -#include <linux/irqchip/chained_irq.h> #include <linux/of.h> #include <linux/of_platform.h> #include <linux/mfd/syscon.h> @@ -39,6 +39,7 @@ struct keystone_irq_device { struct irq_domain *irqd; struct regmap *devctrl_regs; u32 devctrl_offset; + raw_spinlock_t wa_lock; }; static inline u32 keystone_irq_readl(struct keystone_irq_device *kirq) @@ -83,17 +84,15 @@ static void keystone_irq_ack(struct irq_data *d) /* nothing to do here */ } -static void keystone_irq_handler(struct irq_desc *desc) +static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq) { - unsigned int irq = irq_desc_get_irq(desc); - struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc); + struct keystone_irq_device *kirq = keystone_irq; + unsigned long wa_lock_flags; unsigned long pending; int src, virq; dev_dbg(kirq->dev, "start irq %d\n", irq); - chained_irq_enter(irq_desc_get_chip(desc), desc); - pending = keystone_irq_readl(kirq); keystone_irq_writel(kirq, pending); @@ -111,13 +110,15 @@ static void keystone_irq_handler(struct irq_desc *desc) if (!virq) dev_warn(kirq->dev, "sporious irq detected hwirq %d, virq %d\n", src, virq); + raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags); generic_handle_irq(virq); + raw_spin_unlock_irqrestore(&kirq->wa_lock, + wa_lock_flags); } } - chained_irq_exit(irq_desc_get_chip(desc), desc); - dev_dbg(kirq->dev, "end irq %d\n", irq); + return IRQ_HANDLED; } static int keystone_irq_map(struct irq_domain *h, unsigned int virq, @@ -182,9 +183,16 @@ static int keystone_irq_probe(struct platform_device *pdev) return -ENODEV; } + raw_spin_lock_init(&kirq->wa_lock); + platform_set_drvdata(pdev, kirq); - irq_set_chained_handler_and_data(kirq->irq, keystone_irq_handler, kirq); + ret = request_irq(kirq->irq, keystone_irq_handler, + 0, dev_name(dev), kirq); + if (ret) { + irq_domain_remove(kirq->irqd); + return ret; + } /* clear all source bits */ keystone_irq_writel(kirq, ~0x0); @@ -199,6 +207,8 @@ static int keystone_irq_remove(struct platform_device *pdev) struct keystone_irq_device *kirq = platform_get_drvdata(pdev); int hwirq; + free_irq(kirq->irq, kirq); + for (hwirq = 0; hwirq < KEYSTONE_N_IRQ; hwirq++) irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq)); diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 17304705f2cf..05fa9f7af53c 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -131,12 +131,16 @@ static struct irq_chip mxs_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = icoll_mask_irq, .irq_unmask = icoll_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; static struct irq_chip asm9260_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = asm9260_mask_irq, .irq_unmask = asm9260_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs) diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index 9b856e1890d1..e4c43a17b333 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -1379,6 +1379,7 @@ isdn_ioctl(struct file *file, uint cmd, ulong arg) if (arg) { if (copy_from_user(bname, argp, sizeof(bname) - 1)) return -EFAULT; + bname[sizeof(bname)-1] = 0; } else return -EINVAL; ret = mutex_lock_interruptible(&dev->mtx); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index aa5dd5668528..dbad5c431bcb 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -2611,10 +2611,9 @@ isdn_net_newslave(char *parm) char newname[10]; if (p) { - /* Slave-Name MUST not be empty */ - if (!strlen(p + 1)) + /* Slave-Name MUST not be empty or overflow 'newname' */ + if (strscpy(newname, p + 1, sizeof(newname)) <= 0) return NULL; - strcpy(newname, p + 1); *p = 0; /* Master must already exist */ if (!(n = isdn_net_findif(parm))) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 9c1e8adaf4fc..bf3fbd00a091 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -2364,7 +2364,7 @@ static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_s id); return NULL; } else { - rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_KERNEL); + rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_ATOMIC); if (!rs) return NULL; rs->state = CCPResetIdle; diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 6a4811f85705..9cf826df89b1 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -104,11 +104,14 @@ static void tx_tick(struct mbox_chan *chan, int r) /* Submit next message */ msg_submit(chan); + if (!mssg) + return; + /* Notify the client */ - if (mssg && chan->cl->tx_done) + if (chan->cl->tx_done) chan->cl->tx_done(chan->cl, mssg, r); - if (chan->cl->tx_block) + if (r != -ETIME && chan->cl->tx_block) complete(&chan->tx_complete); } @@ -261,7 +264,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) msg_submit(chan); - if (chan->cl->tx_block && chan->active_req) { + if (chan->cl->tx_block) { unsigned long wait; int ret; @@ -272,8 +275,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) ret = wait_for_completion_timeout(&chan->tx_complete, wait); if (ret == 0) { - t = -EIO; - tx_tick(chan, -EIO); + t = -ETIME; + tx_tick(chan, t); } } diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index c3c9502baf18..c521df010ee3 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -645,6 +645,8 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) android_verity_target.iterate_devices = dm_linear_iterate_devices, android_verity_target.io_hints = NULL; + set_disk_ro(dm_disk(dm_table_get_md(ti->table)), 0); + err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args); if (!err) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d81be5e471d0..f24a9e14021d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1088,7 +1088,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) */ DEFINE_WAIT(w); for (;;) { - flush_signals(current); + sigset_t full, old; prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); if (bio_end_sector(bio) <= mddev->suspend_lo || @@ -1097,7 +1097,10 @@ static void make_request(struct mddev *mddev, struct bio * bio) !md_cluster_ops->area_resyncing(mddev, WRITE, bio->bi_iter.bi_sector, bio_end_sector(bio)))) break; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); } finish_wait(&conf->wait_barrier, &w); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4384b46cee1a..8f60520c8392 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5279,12 +5279,15 @@ static void make_request(struct mddev *mddev, struct bio * bi) * userspace, we want an interruptible * wait. */ - flush_signals(current); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_INTERRUPTIBLE); if (logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { + sigset_t full, old; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); do_prepare = true; } goto retry; @@ -5818,6 +5821,8 @@ static void raid5_do_work(struct work_struct *work) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); + + async_tx_issue_pending_all(); blk_finish_plug(&plug); pr_debug("--- raid5worker inactive\n"); @@ -7528,12 +7533,10 @@ static void end_reshape(struct r5conf *conf) { if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { - struct md_rdev *rdev; spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; - rdev_for_each(rdev, conf->mddev) - rdev->data_offset = rdev->new_data_offset; + md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; conf->mddev->reshape_position = MaxSector; diff --git a/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c b/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c index 8001cde1db1e..503135a4f47a 100644 --- a/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c +++ b/drivers/media/i2c/s5c73m3/s5c73m3-ctrls.c @@ -211,7 +211,7 @@ static int s5c73m3_3a_lock(struct s5c73m3 *state, struct v4l2_ctrl *ctrl) } if ((ctrl->val ^ ctrl->cur.val) & V4L2_LOCK_FOCUS) - ret = s5c73m3_af_run(state, ~af_lock); + ret = s5c73m3_af_run(state, !af_lock); return ret; } diff --git a/drivers/media/pci/cx88/cx88-cards.c b/drivers/media/pci/cx88/cx88-cards.c index 8f2556ec3971..61611d1682d1 100644 --- a/drivers/media/pci/cx88/cx88-cards.c +++ b/drivers/media/pci/cx88/cx88-cards.c @@ -3691,7 +3691,14 @@ struct cx88_core *cx88_core_create(struct pci_dev *pci, int nr) core->nr = nr; sprintf(core->name, "cx88[%d]", core->nr); - core->tvnorm = V4L2_STD_NTSC_M; + /* + * Note: Setting initial standard here would cause first call to + * cx88_set_tvnorm() to return without programming any registers. Leave + * it blank for at this point and it will get set later in + * cx8800_initdev() + */ + core->tvnorm = 0; + core->width = 320; core->height = 240; core->field = V4L2_FIELD_INTERLACED; diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index aef9acf351f6..abbf5b05b6f5 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -1429,7 +1429,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, /* initial device configuration */ mutex_lock(&core->lock); - cx88_set_tvnorm(core, core->tvnorm); + cx88_set_tvnorm(core, V4L2_STD_NTSC_M); v4l2_ctrl_handler_setup(&core->video_hdl); v4l2_ctrl_handler_setup(&core->audio_hdl); cx88_video_mux(core, 0); diff --git a/drivers/media/pci/saa7164/saa7164-bus.c b/drivers/media/pci/saa7164/saa7164-bus.c index a18fe5d47238..b4857cd7069e 100644 --- a/drivers/media/pci/saa7164/saa7164-bus.c +++ b/drivers/media/pci/saa7164/saa7164-bus.c @@ -393,11 +393,11 @@ int saa7164_bus_get(struct saa7164_dev *dev, struct tmComResInfo* msg, msg_tmp.size = le16_to_cpu((__force __le16)msg_tmp.size); msg_tmp.command = le32_to_cpu((__force __le32)msg_tmp.command); msg_tmp.controlselector = le16_to_cpu((__force __le16)msg_tmp.controlselector); + memcpy(msg, &msg_tmp, sizeof(*msg)); /* No need to update the read positions, because this was a peek */ /* If the caller specifically want to peek, return */ if (peekonly) { - memcpy(msg, &msg_tmp, sizeof(*msg)); goto peekout; } @@ -442,21 +442,15 @@ int saa7164_bus_get(struct saa7164_dev *dev, struct tmComResInfo* msg, space_rem = bus->m_dwSizeGetRing - curr_grp; if (space_rem < sizeof(*msg)) { - /* msg wraps around the ring */ - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, space_rem); - memcpy_fromio((u8 *)msg + space_rem, bus->m_pdwGetRing, - sizeof(*msg) - space_rem); if (buf) memcpy_fromio(buf, bus->m_pdwGetRing + sizeof(*msg) - space_rem, buf_size); } else if (space_rem == sizeof(*msg)) { - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, sizeof(*msg)); if (buf) memcpy_fromio(buf, bus->m_pdwGetRing, buf_size); } else { /* Additional data wraps around the ring */ - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, sizeof(*msg)); if (buf) { memcpy_fromio(buf, bus->m_pdwGetRing + curr_grp + sizeof(*msg), space_rem - sizeof(*msg)); @@ -469,15 +463,10 @@ int saa7164_bus_get(struct saa7164_dev *dev, struct tmComResInfo* msg, } else { /* No wrapping */ - memcpy_fromio(msg, bus->m_pdwGetRing + curr_grp, sizeof(*msg)); if (buf) memcpy_fromio(buf, bus->m_pdwGetRing + curr_grp + sizeof(*msg), buf_size); } - /* Convert from little endian to CPU */ - msg->size = le16_to_cpu((__force __le16)msg->size); - msg->command = le32_to_cpu((__force __le32)msg->command); - msg->controlselector = le16_to_cpu((__force __le16)msg->controlselector); /* Update the read positions, adjusting the ring */ saa7164_writel(bus->m_dwGetReadPos, new_grp); diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c index 7767e072d623..1f656a3a84b9 100644 --- a/drivers/media/platform/davinci/vpfe_capture.c +++ b/drivers/media/platform/davinci/vpfe_capture.c @@ -1709,27 +1709,9 @@ static long vpfe_param_handler(struct file *file, void *priv, switch (cmd) { case VPFE_CMD_S_CCDC_RAW_PARAMS: + ret = -EINVAL; v4l2_warn(&vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS: experimental ioctl\n"); - if (ccdc_dev->hw_ops.set_params) { - ret = ccdc_dev->hw_ops.set_params(param); - if (ret) { - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "Error setting parameters in CCDC\n"); - goto unlock_out; - } - ret = vpfe_get_ccdc_image_format(vpfe_dev, - &vpfe_dev->fmt); - if (ret < 0) { - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "Invalid image format at CCDC\n"); - goto unlock_out; - } - } else { - ret = -EINVAL; - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); - } + "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); break; default: ret = -ENOTTY; diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 4a608cbe0fdb..9c6fc09b88e0 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -1098,10 +1098,10 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, struct s5p_jpeg_ctx *ctx) { int c, components = 0, notfound, n_dht = 0, n_dqt = 0; - unsigned int height, width, word, subsampling = 0, sos = 0, sof = 0, - sof_len = 0; - unsigned int dht[S5P_JPEG_MAX_MARKER], dht_len[S5P_JPEG_MAX_MARKER], - dqt[S5P_JPEG_MAX_MARKER], dqt_len[S5P_JPEG_MAX_MARKER]; + unsigned int height = 0, width = 0, word, subsampling = 0; + unsigned int sos = 0, sof = 0, sof_len = 0; + unsigned int dht[S5P_JPEG_MAX_MARKER], dht_len[S5P_JPEG_MAX_MARKER]; + unsigned int dqt[S5P_JPEG_MAX_MARKER], dqt_len[S5P_JPEG_MAX_MARKER]; long length; struct s5p_jpeg_buffer jpeg_buffer; diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 65f80b8b9f7a..eb9e7feb9b13 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -1629,7 +1629,7 @@ static void imon_incoming_packet(struct imon_context *ictx, if (kc == KEY_KEYBOARD && !ictx->release_code) { ictx->last_keycode = kc; if (!nomouse) { - ictx->pad_mouse = ~(ictx->pad_mouse) & 0x1; + ictx->pad_mouse = !ictx->pad_mouse; dev_dbg(dev, "toggling to %s mode\n", ictx->pad_mouse ? "mouse" : "keyboard"); spin_unlock_irqrestore(&ictx->kc_lock, flags); diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c index a32659fcd266..efc21b1da211 100644 --- a/drivers/media/rc/ir-lirc-codec.c +++ b/drivers/media/rc/ir-lirc-codec.c @@ -254,7 +254,7 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd, return 0; case LIRC_GET_REC_RESOLUTION: - val = dev->rx_resolution; + val = dev->rx_resolution / 1000; break; case LIRC_SET_WIDEBAND_RECEIVER: diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 65fed7146e9b..cc91f7b3d90c 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -375,6 +375,7 @@ int enclosure_add_device(struct enclosure_device *edev, int component, struct device *dev) { struct enclosure_component *cdev; + int err; if (!edev || component >= edev->components) return -EINVAL; @@ -384,12 +385,17 @@ int enclosure_add_device(struct enclosure_device *edev, int component, if (cdev->dev == dev) return -EEXIST; - if (cdev->dev) + if (cdev->dev) { enclosure_remove_links(cdev); - - put_device(cdev->dev); + put_device(cdev->dev); + } cdev->dev = get_device(dev); - return enclosure_add_links(cdev); + err = enclosure_add_links(cdev); + if (err) { + put_device(cdev->dev); + cdev->dev = NULL; + } + return err; } EXPORT_SYMBOL_GPL(enclosure_add_device); diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c index ecc4a334c507..0a54e7dac0ab 100644 --- a/drivers/net/ethernet/aurora/nb8800.c +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -608,7 +608,7 @@ static void nb8800_mac_config(struct net_device *dev) mac_mode |= HALF_DUPLEX; if (gigabit) { - if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_is_rgmii(dev->phydev)) mac_mode |= RGMII_MODE; mac_mode |= GMAC_MODE; @@ -1295,11 +1295,10 @@ static int nb8800_tangox_init(struct net_device *dev) break; case PHY_INTERFACE_MODE_RGMII: - pad_mode = PAD_MODE_RGMII; - break; - + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY; + pad_mode = PAD_MODE_RGMII; break; default: diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 21e5b9ed1ead..3613469dc5c6 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8722,11 +8722,14 @@ static void tg3_free_consistent(struct tg3 *tp) tg3_mem_rx_release(tp); tg3_mem_tx_release(tp); + /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */ + tg3_full_lock(tp, 0); if (tp->hw_stats) { dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), tp->hw_stats, tp->stats_mapping); tp->hw_stats = NULL; } + tg3_full_unlock(tp); } /* diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index cc199063612a..6c66d2979795 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -630,6 +630,10 @@ static void dump_command(struct mlx5_core_dev *dev, pr_debug("\n"); } +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -638,16 +642,27 @@ static void cmd_work_handler(struct work_struct *work) struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; + int alloc_ret; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - ent->idx = alloc_ent(cmd); - if (ent->idx < 0) { + alloc_ret = alloc_ent(cmd); + if (alloc_ret < 0) { + if (ent->callback) { + ent->callback(-EAGAIN, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + free_cmd(ent); + } else { + ent->ret = -EAGAIN; + complete(&ent->done); + } mlx5_core_err(dev, "failed to allocate command entry\n"); up(sem); return; } + ent->idx = alloc_ret; } else { ent->idx = cmd->max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 79ef799f88ab..c5ea1018cb47 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -326,6 +326,7 @@ enum cfg_version { static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 480f3dae0780..479af106aaeb 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -750,6 +750,7 @@ static struct sh_eth_cpu_data sh7734_data = { .tsu = 1, .hw_crc = 1, .select_mii = 1, + .shift_rd0 = 1, }; /* SH7763 */ @@ -818,6 +819,7 @@ static struct sh_eth_cpu_data r8a7740_data = { .rpadir_value = 2 << 16, .no_trimd = 1, .no_ade = 1, + .hw_crc = 1, .tsu = 1, .select_mii = 1, .shift_rd0 = 1, diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c index bca6a1e72d1d..e1bb802d4a4d 100644 --- a/drivers/net/irda/mcs7780.c +++ b/drivers/net/irda/mcs7780.c @@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mcs, __u16 reg, __u16 val) static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val) { struct usb_device *dev = mcs->usbdev; - int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, - MCS_RD_RTYPE, 0, reg, val, 2, - msecs_to_jiffies(MCS_CTRL_TIMEOUT)); + void *dmabuf; + int ret; + + dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL); + if (!dmabuf) + return -ENOMEM; + + ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, + MCS_RD_RTYPE, 0, reg, dmabuf, 2, + msecs_to_jiffies(MCS_CTRL_TIMEOUT)); + + memcpy(val, dmabuf, sizeof(__u16)); + kfree(dmabuf); return ret; } diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 84b9cca152eb..e83acc608678 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -907,7 +907,7 @@ static void decode_txts(struct dp83640_private *dp83640, if (overflow) { pr_debug("tx timestamp queue overflow, count %d\n", overflow); while (skb) { - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); skb = skb_dequeue(&dp83640->tx_queue); } return; diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 76eaa4e2ebe0..3fa8ab3194d1 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -323,6 +323,13 @@ static int dp83867_config_init(struct phy_device *phydev) phy_write(phydev, MII_DP83867_PHYCTRL, val); } + /* Enable Interrupt output INT_OE in CFG3 register */ + if (phy_interrupt_is_valid(phydev)) { + val = phy_read(phydev, DP83867_CFG3); + val |= BIT(7); + phy_write(phydev, DP83867_CFG3, val); + } + return 0; } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 3e1e15b3e417..66e7e85ecc7b 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -540,6 +540,8 @@ static int ksz9031_read_status(struct phy_device *phydev) if ((regval & 0xFF) == 0xFF) { phy_init_hw(phydev); phydev->link = 0; + if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) + phydev->drv->config_intr(phydev); } return 0; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a8259c4337d4..edb5ff487121 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -541,6 +541,9 @@ void phy_stop_machine(struct phy_device *phydev) if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); + + /* Now we can run the state machine synchronously */ + phy_state_machine(&phydev->state_queue.work); } /** @@ -918,6 +921,15 @@ void phy_state_machine(struct work_struct *work) if (old_link != phydev->link) phydev->state = PHY_CHANGELINK; } + /* + * Failsafe: check that nobody set phydev->link=0 between two + * poll cycles, otherwise we won't leave RUNNING state as long + * as link remains down. + */ + if (!phydev->link && phydev->state == PHY_RUNNING) { + phydev->state = PHY_CHANGELINK; + dev_err(&phydev->dev, "no link in PHY_RUNNING\n"); + } break; case PHY_CHANGELINK: err = phy_read_status(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0bfbabad4431..8179727d3423 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1368,6 +1368,8 @@ static int phy_remove(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); + cancel_delayed_work_sync(&phydev->state_queue); + mutex_lock(&phydev->lock); phydev->state = PHY_DOWN; mutex_unlock(&phydev->lock); @@ -1442,7 +1444,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", - .soft_reset = genphy_soft_reset, + .soft_reset = genphy_no_soft_reset, .config_init = genphy_config_init, .features = PHY_GBIT_FEATURES | SUPPORTED_MII | SUPPORTED_AUI | SUPPORTED_FIBRE | diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index f64b25c221e8..cd93220c9b45 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1009,6 +1009,7 @@ static int kaweth_probe( struct net_device *netdev; const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; + int rv = -EIO; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", @@ -1029,6 +1030,7 @@ static int kaweth_probe( kaweth = netdev_priv(netdev); kaweth->dev = udev; kaweth->net = netdev; + kaweth->intf = intf; spin_lock_init(&kaweth->device_lock); init_waitqueue_head(&kaweth->term_wait); @@ -1048,6 +1050,10 @@ static int kaweth_probe( /* Download the firmware */ dev_info(dev, "Downloading firmware...\n"); kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL); + if (!kaweth->firmware_buf) { + rv = -ENOMEM; + goto err_free_netdev; + } if ((result = kaweth_download_firmware(kaweth, "kaweth/new_code.bin", 100, @@ -1139,8 +1145,6 @@ err_fw: dev_dbg(dev, "Initializing net device.\n"); - kaweth->intf = intf; - kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!kaweth->tx_urb) goto err_free_netdev; @@ -1204,7 +1208,7 @@ err_only_tx: err_free_netdev: free_netdev(netdev); - return -EIO; + return rv; } /**************************************************************** diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 349aecbc210a..ac945f8781ac 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -733,15 +733,15 @@ static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) static void vrf_dev_uninit(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - struct slave_queue *queue = &vrf->queue; - struct list_head *head = &queue->all_slaves; - struct slave *slave, *next; +// struct slave_queue *queue = &vrf->queue; +// struct list_head *head = &queue->all_slaves; +// struct slave *slave, *next; vrf_rtable_destroy(vrf); vrf_rt6_destroy(vrf); - list_for_each_entry_safe(slave, next, head, list) - vrf_del_slave(dev, slave->dev); +// list_for_each_entry_safe(slave, next, head, list) +// vrf_del_slave(dev, slave->dev); free_percpu(dev->dstats); dev->dstats = NULL; @@ -914,6 +914,14 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[]) static void vrf_dellink(struct net_device *dev, struct list_head *head) { + struct net_vrf *vrf = netdev_priv(dev); + struct slave_queue *queue = &vrf->queue; + struct list_head *all_slaves = &queue->all_slaves; + struct slave *slave, *next; + + list_for_each_entry_safe(slave, next, all_slaves, list) + vrf_del_slave(dev, slave->dev); + unregister_netdevice_queue(dev, head); } diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index 8f4f6a892581..cfed5808bc4e 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -639,6 +639,9 @@ ath10k_wmi_vdev_spectral_conf(struct ath10k *ar, struct sk_buff *skb; u32 cmd_id; + if (!ar->wmi.ops->gen_vdev_spectral_conf) + return -EOPNOTSUPP; + skb = ar->wmi.ops->gen_vdev_spectral_conf(ar, arg); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -654,6 +657,9 @@ ath10k_wmi_vdev_spectral_enable(struct ath10k *ar, u32 vdev_id, u32 trigger, struct sk_buff *skb; u32 cmd_id; + if (!ar->wmi.ops->gen_vdev_spectral_enable) + return -EOPNOTSUPP; + skb = ar->wmi.ops->gen_vdev_spectral_enable(ar, vdev_id, trigger, enable); if (IS_ERR(skb)) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 201425e7f9cb..fbc8c9a9014b 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1815,8 +1815,6 @@ static void ar9003_hw_spectral_scan_wait(struct ath_hw *ah) static void ar9003_hw_tx99_start(struct ath_hw *ah, u32 qnum) { REG_SET_BIT(ah, AR_PHY_TEST, PHY_AGC_CLR); - REG_SET_BIT(ah, 0x9864, 0x7f000); - REG_SET_BIT(ah, 0x9924, 0x7f00fe); REG_CLR_BIT(ah, AR_DIAG_SW, AR_DIAG_RX_DIS); REG_WRITE(ah, AR_CR, AR_CR_RXD); REG_WRITE(ah, AR_DLCL_IFS(qnum), 0); diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index ac4781f37e78..b4e6304afd40 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -190,22 +190,27 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, if (strtobool(buf, &start)) return -EINVAL; + mutex_lock(&sc->mutex); + if (start == sc->tx99_state) { if (!start) - return count; + goto out; ath_dbg(common, XMIT, "Resetting TX99\n"); ath9k_tx99_deinit(sc); } if (!start) { ath9k_tx99_deinit(sc); - return count; + goto out; } r = ath9k_tx99_init(sc); - if (r) + if (r) { + mutex_unlock(&sc->mutex); return r; - + } +out: + mutex_unlock(&sc->mutex); return count; } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index bb69a5949aea..85bca557a339 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -330,18 +330,19 @@ static void wil_fw_error_worker(struct work_struct *work) wil->last_fw_recovery = jiffies; + wil_info(wil, "fw error recovery requested (try %d)...\n", + wil->recovery_count); + if (!no_fw_recovery) + wil->recovery_state = fw_recovery_running; + if (wil_wait_for_recovery(wil) != 0) + return; + mutex_lock(&wil->mutex); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: - wil_info(wil, "fw error recovery requested (try %d)...\n", - wil->recovery_count); - if (!no_fw_recovery) - wil->recovery_state = fw_recovery_running; - if (0 != wil_wait_for_recovery(wil)) - break; - + /* silent recovery, upper layers will see disconnect */ __wil_down(wil); __wil_up(wil); break; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c index 70a6985334d5..da5826d788d6 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c @@ -4472,6 +4472,11 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true, GFP_KERNEL); } else if (ieee80211_is_action(mgmt->frame_control)) { + if (len > BRCMF_FIL_ACTION_FRAME_SIZE + DOT11_MGMT_HDR_LEN) { + brcmf_err("invalid action frame length\n"); + err = -EINVAL; + goto exit; + } af_params = kzalloc(sizeof(*af_params), GFP_KERNEL); if (af_params == NULL) { brcmf_err("unable to allocate frame\n"); diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 44f059f7f34e..9ebe00ea8f81 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -71,7 +71,7 @@ * only support SPI for 12xx - this code should be reworked when 18xx * support is introduced */ -#define SPI_AGGR_BUFFER_SIZE (4 * PAGE_SIZE) +#define SPI_AGGR_BUFFER_SIZE (4 * SZ_4K) /* Maximum number of SPI write chunks */ #define WSPI_MAX_NUM_OF_CHUNKS \ diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 0333ab0fd926..34173b5e886f 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -201,6 +201,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned long remaining_credit; struct timer_list credit_timeout; u64 credit_window_start; + bool rate_limited; /* Statistics */ struct xenvif_stats stats; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index e7bd63eb2876..60b26f32d31d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -105,7 +105,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete(napi); - xenvif_napi_schedule_or_enable_events(queue); + /* If the queue is rate-limited, it shall be + * rescheduled in the timer callback. + */ + if (likely(!queue->rate_limited)) + xenvif_napi_schedule_or_enable_events(queue); } return work_done; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1049c34e7d43..72ee1c305cc4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -687,6 +687,7 @@ static void tx_add_credit(struct xenvif_queue *queue) max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ queue->remaining_credit = min(max_credit, max_burst); + queue->rate_limited = false; } void xenvif_tx_credit_callback(unsigned long data) @@ -1184,8 +1185,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) msecs_to_jiffies(queue->credit_usec / 1000); /* Timer could already be pending in rare cases. */ - if (timer_pending(&queue->credit_timeout)) + if (timer_pending(&queue->credit_timeout)) { + queue->rate_limited = true; return true; + } /* Passed the point where we can replenish credit? */ if (time_after_eq64(now, next_credit)) { @@ -1200,6 +1203,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) mod_timer(&queue->credit_timeout, next_credit); queue->credit_window_start = next_credit; + queue->rate_limited = true; return true; } diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index 532db28145c7..a5d7332dfce5 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -210,14 +210,14 @@ static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id) struct sk_buff *skb; int r; - client = phy->i2c_dev; - dev_dbg(&client->dev, "%s\n", __func__); - if (!phy || irq != phy->i2c_dev->irq) { WARN_ON_ONCE(1); return IRQ_NONE; } + client = phy->i2c_dev; + dev_dbg(&client->dev, "%s\n", __func__); + r = fdp_nci_i2c_read(phy, &skb); if (r == -EREMOTEIO) diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index f8dcdf4b24f6..af62c4c854f3 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -459,7 +459,7 @@ int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv) INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work); snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq", - dev_name(priv->dev)); + dev_name(&priv->ndev->nfc_dev->dev)); priv->fw_dnld.rx_wq = create_singlethread_workqueue(name); if (!priv->fw_dnld.rx_wq) return -ENOMEM; @@ -496,6 +496,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) { struct nfcmrvl_private *priv = nci_get_drvdata(ndev); struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld; + int res; if (!priv->support_fw_dnld) return -ENOTSUPP; @@ -511,7 +512,9 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) */ /* Retrieve FW binary */ - if (request_firmware(&fw_dnld->fw, firmware_name, priv->dev) < 0) { + res = request_firmware(&fw_dnld->fw, firmware_name, + &ndev->nfc_dev->dev); + if (res < 0) { nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name); return -ENOENT; } diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 51c8240a1672..a446590a71ca 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -124,12 +124,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, memcpy(&priv->config, pdata, sizeof(*pdata)); if (priv->config.reset_n_io) { - rc = devm_gpio_request_one(dev, - priv->config.reset_n_io, - GPIOF_OUT_INIT_LOW, - "nfcmrvl_reset_n"); - if (rc < 0) + rc = gpio_request_one(priv->config.reset_n_io, + GPIOF_OUT_INIT_LOW, + "nfcmrvl_reset_n"); + if (rc < 0) { + priv->config.reset_n_io = 0; nfc_err(dev, "failed to request reset_n io\n"); + } } if (phy == NFCMRVL_PHY_SPI) { @@ -154,7 +155,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, if (!priv->ndev) { nfc_err(dev, "nci_allocate_device failed\n"); rc = -ENOMEM; - goto error; + goto error_free_gpio; + } + + rc = nfcmrvl_fw_dnld_init(priv); + if (rc) { + nfc_err(dev, "failed to initialize FW download %d\n", rc); + goto error_free_dev; } nci_set_drvdata(priv->ndev, priv); @@ -162,24 +169,22 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, rc = nci_register_device(priv->ndev); if (rc) { nfc_err(dev, "nci_register_device failed %d\n", rc); - goto error_free_dev; + goto error_fw_dnld_deinit; } /* Ensure that controller is powered off */ nfcmrvl_chip_halt(priv); - rc = nfcmrvl_fw_dnld_init(priv); - if (rc) { - nfc_err(dev, "failed to initialize FW download %d\n", rc); - goto error_free_dev; - } - nfc_info(dev, "registered with nci successfully\n"); return priv; +error_fw_dnld_deinit: + nfcmrvl_fw_dnld_deinit(priv); error_free_dev: nci_free_device(priv->ndev); -error: +error_free_gpio: + if (priv->config.reset_n_io) + gpio_free(priv->config.reset_n_io); kfree(priv); return ERR_PTR(rc); } @@ -195,7 +200,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) nfcmrvl_fw_dnld_deinit(priv); if (priv->config.reset_n_io) - devm_gpio_free(priv->dev, priv->config.reset_n_io); + gpio_free(priv->config.reset_n_io); nci_unregister_device(ndev); nci_free_device(ndev); diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 83a99e38e7bd..6c0c301611c4 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -109,6 +109,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) struct nfcmrvl_private *priv; struct nfcmrvl_platform_data *pdata = NULL; struct nfcmrvl_platform_data config; + struct device *dev = nu->tty->dev; /* * Platform data cannot be used here since usually it is already used @@ -116,9 +117,8 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) * and check if DT entries were added. */ - if (nu->tty->dev->parent && nu->tty->dev->parent->of_node) - if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node, - &config) == 0) + if (dev && dev->parent && dev->parent->of_node) + if (nfcmrvl_uart_parse_dt(dev->parent->of_node, &config) == 0) pdata = &config; if (!pdata) { @@ -131,7 +131,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) } priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_UART, nu, &uart_ops, - nu->tty->dev, pdata); + dev, pdata); if (IS_ERR(priv)) return PTR_ERR(priv); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index efb2c1ceef98..957234272ef7 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1205,10 +1205,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, struct page *page, int rw) { struct btt *btt = bdev->bd_disk->private_data; + int rc; - btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, 0); - return 0; + rc = btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector); + if (rc == 0) + page_endio(page, rw & WRITE, 0); + + return rc; } diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index b7971d410b60..74e5360c53f0 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -88,7 +88,7 @@ static struct nvmem_config imx_ocotp_nvmem_config = { static const struct of_device_id imx_ocotp_dt_ids[] = { { .compatible = "fsl,imx6q-ocotp", (void *)128 }, - { .compatible = "fsl,imx6sl-ocotp", (void *)32 }, + { .compatible = "fsl,imx6sl-ocotp", (void *)64 }, { .compatible = "fsl,imx6sx-ocotp", (void *)128 }, { }, }; diff --git a/drivers/of/device.c b/drivers/of/device.c index e5f47cec75f3..97a280d50d6d 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -225,6 +225,7 @@ ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len) return tsize; } +EXPORT_SYMBOL_GPL(of_device_get_modalias); /** * of_device_uevent - Display OF related uevent information @@ -287,3 +288,4 @@ int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) return 0; } +EXPORT_SYMBOL_GPL(of_device_uevent_modalias); diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 8e11fb2831cd..34f1d6b41fb9 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -741,6 +741,8 @@ ccio_map_single(struct device *dev, void *addr, size_t size, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) + return DMA_ERROR_CODE; BUG_ON(size <= 0); @@ -805,6 +807,10 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long)iova, size); @@ -908,6 +914,8 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) + return 0; DBG_RUN_SG("%s() START %d entries\n", __func__, nents); @@ -980,6 +988,10 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } DBG_RUN_SG("%s() START %d entries, %p,%x\n", __func__, nents, sg_virt(sglist), sglist->length); diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index a0580afe1713..7b0ca1551d7b 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -154,7 +154,10 @@ struct dino_device }; /* Looks nice and keeps the compiler happy */ -#define DINO_DEV(d) ((struct dino_device *) d) +#define DINO_DEV(d) ({ \ + void *__pdata = d; \ + BUG_ON(!__pdata); \ + (struct dino_device *)__pdata; }) /* diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index 42844c2bc065..d0c2759076a2 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -111,8 +111,10 @@ static u32 lba_t32; /* Looks nice and keeps the compiler happy */ -#define LBA_DEV(d) ((struct lba_device *) (d)) - +#define LBA_DEV(d) ({ \ + void *__pdata = d; \ + BUG_ON(!__pdata); \ + (struct lba_device *)__pdata; }) /* ** Only allow 8 subsidiary busses per LBA diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 225049b492e5..d6326144ce01 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -691,6 +691,8 @@ static int sba_dma_supported( struct device *dev, u64 mask) return 0; ioc = GET_IOC(dev); + if (!ioc) + return 0; /* * check if mask is >= than the current max IO Virt Address @@ -722,6 +724,8 @@ sba_map_single(struct device *dev, void *addr, size_t size, int pide; ioc = GET_IOC(dev); + if (!ioc) + return DMA_ERROR_CODE; /* save offset bits */ offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK; @@ -803,6 +807,10 @@ sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } offset = iova & ~IOVP_MASK; iova ^= offset; /* clear offset bits */ size += offset; @@ -942,6 +950,8 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, DBG_RUN_SG("%s() START %d entries\n", __func__, nents); ioc = GET_IOC(dev); + if (!ioc) + return 0; /* Fast path single entry scatterlists. */ if (nents == 1) { @@ -1027,6 +1037,10 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, __func__, nents, sg_virt(sglist), sglist->length); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } #ifdef SBA_COLLECT_STATS ioc->usg_calls++; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d7ffd66814bb..fca925543fae 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -945,6 +945,7 @@ static int pci_pm_thaw_noirq(struct device *dev) return pci_legacy_resume_early(dev); pci_update_current_state(pci_dev, PCI_D0); + pci_restore_state(pci_dev); if (drv && drv->pm && drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 71ccf6a90b22..2551e4adb33f 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -194,8 +194,6 @@ static int exynos_irq_request_resources(struct irq_data *irqd) spin_unlock_irqrestore(&bank->slock, flags); - exynos_irq_unmask(irqd); - return 0; } @@ -216,8 +214,6 @@ static void exynos_irq_release_resources(struct irq_data *irqd) shift = irqd->hwirq * bank_type->fld_width[PINCFG_TYPE_FUNC]; mask = (1 << bank_type->fld_width[PINCFG_TYPE_FUNC]) - 1; - exynos_irq_mask(irqd); - spin_lock_irqsave(&bank->slock, flags); con = readl(d->virt_base + reg_con); diff --git a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c index 862a096c5dba..be5c71df148d 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c @@ -811,6 +811,7 @@ static const struct sunxi_desc_pin sun4i_a10_pins[] = { SUNXI_FUNCTION(0x2, "lcd1"), /* D16 */ SUNXI_FUNCTION(0x3, "pata"), /* ATAD12 */ SUNXI_FUNCTION(0x4, "keypad"), /* IN6 */ + SUNXI_FUNCTION(0x5, "sim"), /* DET */ SUNXI_FUNCTION_IRQ(0x6, 16), /* EINT16 */ SUNXI_FUNCTION(0x7, "csi1")), /* D16 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 17), diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index ce129e595b55..5c935847599c 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -248,6 +248,7 @@ struct fnic { struct completion *remove_wait; /* device remove thread blocks */ atomic_t in_flight; /* io counter */ + bool internal_reset_inprogress; u32 _reserved; /* fill hole */ unsigned long state_flags; /* protected by host lock */ enum fnic_state state; diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 266b909fe854..82e4bc8c11c5 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2533,6 +2533,19 @@ int fnic_host_reset(struct scsi_cmnd *sc) unsigned long wait_host_tmo; struct Scsi_Host *shost = sc->device->host; struct fc_lport *lp = shost_priv(shost); + struct fnic *fnic = lport_priv(lp); + unsigned long flags; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->internal_reset_inprogress == 0) { + fnic->internal_reset_inprogress = 1; + } else { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "host reset in progress skipping another host reset\n"); + return SUCCESS; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); /* * If fnic_reset is successful, wait for fabric login to complete @@ -2553,6 +2566,9 @@ int fnic_host_reset(struct scsi_cmnd *sc) } } + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->internal_reset_inprogress = 0; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); return ret; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 5b2c37f1e908..9b5367294116 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -4981,15 +4981,14 @@ _base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, int sleep_flag, static int _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc, int sleep_flag) { - int r, i; + int r, i, index; unsigned long flags; u32 reply_address; u16 smid; struct _tr_list *delayed_tr, *delayed_tr_next; u8 hide_flag; struct adapter_reply_queue *reply_q; - long reply_post_free; - u32 reply_post_free_sz, index = 0; + Mpi2ReplyDescriptorsUnion_t *reply_post_free_contig; dinitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name, __func__)); @@ -5061,27 +5060,27 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc, int sleep_flag) _base_assign_reply_queues(ioc); /* initialize Reply Post Free Queue */ - reply_post_free_sz = ioc->reply_post_queue_depth * - sizeof(Mpi2DefaultReplyDescriptor_t); - reply_post_free = (long)ioc->reply_post[index].reply_post_free; + index = 0; + reply_post_free_contig = ioc->reply_post[0].reply_post_free; list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { + /* + * If RDPQ is enabled, switch to the next allocation. + * Otherwise advance within the contiguous region. + */ + if (ioc->rdpq_array_enable) { + reply_q->reply_post_free = + ioc->reply_post[index++].reply_post_free; + } else { + reply_q->reply_post_free = reply_post_free_contig; + reply_post_free_contig += ioc->reply_post_queue_depth; + } + reply_q->reply_post_host_index = 0; - reply_q->reply_post_free = (Mpi2ReplyDescriptorsUnion_t *) - reply_post_free; for (i = 0; i < ioc->reply_post_queue_depth; i++) reply_q->reply_post_free[i].Words = cpu_to_le64(ULLONG_MAX); if (!_base_is_controller_msix_enabled(ioc)) goto skip_init_reply_post_free_queue; - /* - * If RDPQ is enabled, switch to the next allocation. - * Otherwise advance within the contiguous region. - */ - if (ioc->rdpq_array_enable) - reply_post_free = (long) - ioc->reply_post[++index].reply_post_free; - else - reply_post_free += reply_post_free_sz; } skip_init_reply_post_free_queue: diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 6b942d9e5b74..1ed85dfc008d 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -329,12 +329,15 @@ qla2x00_sysfs_read_optrom(struct file *filp, struct kobject *kobj, struct qla_hw_data *ha = vha->hw; ssize_t rval = 0; + mutex_lock(&ha->optrom_mutex); + if (ha->optrom_state != QLA_SREADING) - return 0; + goto out; - mutex_lock(&ha->optrom_mutex); rval = memory_read_from_buffer(buf, count, &off, ha->optrom_buffer, ha->optrom_region_size); + +out: mutex_unlock(&ha->optrom_mutex); return rval; @@ -349,14 +352,19 @@ qla2x00_sysfs_write_optrom(struct file *filp, struct kobject *kobj, struct device, kobj))); struct qla_hw_data *ha = vha->hw; - if (ha->optrom_state != QLA_SWRITING) + mutex_lock(&ha->optrom_mutex); + + if (ha->optrom_state != QLA_SWRITING) { + mutex_unlock(&ha->optrom_mutex); return -EINVAL; - if (off > ha->optrom_region_size) + } + if (off > ha->optrom_region_size) { + mutex_unlock(&ha->optrom_mutex); return -ERANGE; + } if (off + count > ha->optrom_region_size) count = ha->optrom_region_size - off; - mutex_lock(&ha->optrom_mutex); memcpy(&ha->optrom_buffer[off], buf, count); mutex_unlock(&ha->optrom_mutex); diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c index 2b3c25371d76..8175f997e82c 100644 --- a/drivers/scsi/snic/snic_main.c +++ b/drivers/scsi/snic/snic_main.c @@ -584,6 +584,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "dflt sgl pool creation failed\n"); + ret = -ENOMEM; goto err_free_res; } @@ -594,6 +595,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "max sgl pool creation failed\n"); + ret = -ENOMEM; goto err_free_dflt_sgl_pool; } @@ -604,6 +606,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "snic tmreq info pool creation failed.\n"); + ret = -ENOMEM; goto err_free_max_sgl_pool; } diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 882cd6618cd5..87a0e47eeae6 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -107,7 +107,10 @@ static const struct file_operations dw_spi_regs_ops = { static int dw_spi_debugfs_init(struct dw_spi *dws) { - dws->debugfs = debugfs_create_dir("dw_spi", NULL); + char name[128]; + + snprintf(name, 128, "dw_spi-%s", dev_name(&dws->master->dev)); + dws->debugfs = debugfs_create_dir(name, NULL); if (!dws->debugfs) return -ENOMEM; diff --git a/drivers/spmi/spmi.c b/drivers/spmi/spmi.c index 6b3da1bb0d63..2db681722d2c 100644 --- a/drivers/spmi/spmi.c +++ b/drivers/spmi/spmi.c @@ -364,11 +364,23 @@ static int spmi_drv_remove(struct device *dev) return 0; } +static int spmi_drv_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + int ret; + + ret = of_device_uevent_modalias(dev, env); + if (ret != -ENODEV) + return ret; + + return 0; +} + static struct bus_type spmi_bus_type = { .name = "spmi", .match = spmi_device_match, .probe = spmi_drv_probe, .remove = spmi_drv_remove, + .uevent = spmi_drv_uevent, }; /** diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index c3b0faa57863..6a8d07915c25 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -178,11 +178,11 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) mark_oom_victim(selected); task_unlock(selected); trace_lowmemory_kill(selected, cache_size, cache_limit, free); - lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \ + lowmem_print(1, "Killing '%s' (%d) (tgid %d), adj %hd,\n" \ " to free %ldkB on behalf of '%s' (%d) because\n" \ " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \ " Free memory is %ldkB above reserved\n", - selected->comm, selected->pid, + selected->comm, selected->pid, selected->tgid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 8fed55342b0f..b831f08e2769 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2901,9 +2901,6 @@ static int __init comedi_init(void) comedi_class->dev_groups = comedi_dev_groups; - /* XXX requires /proc interface */ - comedi_proc_init(); - /* create devices files for legacy/manual use */ for (i = 0; i < comedi_num_legacy_minors; i++) { struct comedi_device *dev; @@ -2921,6 +2918,9 @@ static int __init comedi_init(void) mutex_unlock(&dev->mutex); } + /* XXX requires /proc interface */ + comedi_proc_init(); + return 0; } module_init(comedi_init); diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c index d97aa2827412..8eb7179da342 100644 --- a/drivers/staging/iio/resolver/ad2s1210.c +++ b/drivers/staging/iio/resolver/ad2s1210.c @@ -468,7 +468,7 @@ static int ad2s1210_read_raw(struct iio_dev *indio_dev, long m) { struct ad2s1210_state *st = iio_priv(indio_dev); - bool negative; + u16 negative; int ret = 0; u16 pos; s16 vel; diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 82a7c27c517f..02c3feef4e36 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -47,6 +47,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {} /* Terminating entry */ }; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index a180c000e246..1ff1c83e2df5 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -418,6 +418,7 @@ int iscsit_reset_np_thread( return 0; } np->np_thread_state = ISCSI_NP_THREAD_RESET; + atomic_inc(&np->np_reset_count); if (np->np_thread) { spin_unlock_bh(&np->np_thread_lock); @@ -1996,6 +1997,7 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); cmd->data_direction = DMA_NONE; + kfree(cmd->text_in_ptr); cmd->text_in_ptr = NULL; return 0; @@ -3965,6 +3967,8 @@ int iscsi_target_tx_thread(void *arg) { int ret = 0; struct iscsi_conn *conn = arg; + bool conn_freed = false; + /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. @@ -3990,12 +3994,14 @@ get_immediate: goto transport_err; ret = iscsit_handle_response_queue(conn); - if (ret == 1) + if (ret == 1) { goto get_immediate; - else if (ret == -ECONNRESET) + } else if (ret == -ECONNRESET) { + conn_freed = true; goto out; - else if (ret < 0) + } else if (ret < 0) { goto transport_err; + } } transport_err: @@ -4005,8 +4011,13 @@ transport_err: * responsible for cleaning up the early connection failure. */ if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } return 0; } @@ -4105,6 +4116,7 @@ int iscsi_target_rx_thread(void *arg) u32 checksum = 0, digest = 0; struct iscsi_conn *conn = arg; struct kvec iov; + bool conn_freed = false; /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. @@ -4116,7 +4128,7 @@ int iscsi_target_rx_thread(void *arg) */ rc = wait_for_completion_interruptible(&conn->rx_login_comp); if (rc < 0 || iscsi_target_check_conn_state(conn)) - return 0; + goto out; if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { struct completion comp; @@ -4201,7 +4213,13 @@ int iscsi_target_rx_thread(void *arg) transport_err: if (!signal_pending(current)) atomic_set(&conn->transport_failed, 1); - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); +out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } return 0; } @@ -4575,8 +4593,11 @@ static void iscsit_logout_post_handler_closesession( * always sleep waiting for RX/TX thread shutdown to complete * within iscsit_close_connection(). */ - if (conn->conn_transport->transport_type == ISCSI_TCP) + if (conn->conn_transport->transport_type == ISCSI_TCP) { sleep = cmpxchg(&conn->tx_thread_active, true, false); + if (!sleep) + return; + } atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4592,8 +4613,11 @@ static void iscsit_logout_post_handler_samecid( { int sleep = 1; - if (conn->conn_transport->transport_type == ISCSI_TCP) + if (conn->conn_transport->transport_type == ISCSI_TCP) { sleep = cmpxchg(&conn->tx_thread_active, true, false); + if (!sleep) + return; + } atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index dc1bd1f1bdfe..634ad3662ed6 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -868,6 +868,7 @@ DEF_TPG_ATTRIB(default_erl); DEF_TPG_ATTRIB(t10_pi); DEF_TPG_ATTRIB(fabric_prot_type); DEF_TPG_ATTRIB(tpg_enabled_sendtargets); +DEF_TPG_ATTRIB(login_keys_workaround); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_attr_authentication, @@ -883,6 +884,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_attr_t10_pi, &iscsi_tpg_attrib_attr_fabric_prot_type, &iscsi_tpg_attrib_attr_tpg_enabled_sendtargets, + &iscsi_tpg_attrib_attr_login_keys_workaround, NULL, }; diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 210f6e4830e3..6c88fb021444 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -930,8 +930,10 @@ static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn) } } -void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) +void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn, bool *conn_freed) { + *conn_freed = false; + spin_lock_bh(&conn->state_lock); if (atomic_read(&conn->connection_exit)) { spin_unlock_bh(&conn->state_lock); @@ -942,6 +944,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { spin_unlock_bh(&conn->state_lock); iscsit_close_connection(conn); + *conn_freed = true; return; } @@ -955,4 +958,5 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) spin_unlock_bh(&conn->state_lock); iscsit_handle_connection_cleanup(conn); + *conn_freed = true; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index a9e2f9497fb2..fbc1d84a63c3 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -9,6 +9,6 @@ extern int iscsit_stop_time2retain_timer(struct iscsi_session *); extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); extern void iscsit_fall_back_to_erl0(struct iscsi_session *); -extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *, bool *); #endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 4a137b0ae3dc..bc2cbffec27e 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1219,9 +1219,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) flush_signals(current); spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); + return 1; } else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) { spin_unlock_bh(&np->np_thread_lock); goto exit; @@ -1254,7 +1256,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto exit; } else if (rc < 0) { spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); iscsit_put_transport(conn->conn_transport); @@ -1436,5 +1439,9 @@ int iscsi_target_login_thread(void *arg) break; } + while (!kthread_should_stop()) { + msleep(100); + } + return 0; } diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 9fc9117d0f22..58c629aec73c 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -489,14 +489,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); -static bool iscsi_target_sk_state_check(struct sock *sk) +static bool __iscsi_target_sk_check_close(struct sock *sk) { if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { - pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE," "returning FALSE\n"); - return false; + return true; } - return true; + return false; +} + +static bool iscsi_target_sk_check_close(struct iscsi_conn *conn) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = test_bit(flag, &conn->login_flags); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + if (!state) + clear_bit(flag, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + return state; } static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) @@ -536,6 +582,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work) pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", conn, current->comm, current->pid); + /* + * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready() + * before initial PDU processing in iscsi_target_start_negotiation() + * has completed, go ahead and retry until it's cleared. + * + * Otherwise if the TCP connection drops while this is occuring, + * iscsi_target_start_negotiation() will detect the failure, call + * cancel_delayed_work_sync(&conn->login_work), and cleanup the + * remaining iscsi connection resources from iscsi_np process context. + */ + if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) { + schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10)); + return; + } spin_lock(&tpg->tpg_state_lock); state = (tpg->tpg_state == TPG_STATE_ACTIVE); @@ -543,26 +603,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work) if (!state) { pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; + goto err; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (iscsi_target_sk_check_close(conn)) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + goto err; } conn->login_kworker = current; @@ -580,34 +626,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work) flush_signals(current); conn->login_kworker = NULL; - if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (rc < 0) + goto err; pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", conn, current->comm, current->pid); rc = iscsi_target_do_login(conn, login); if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); + goto err; } else if (!rc) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } + if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE)) + goto err; } else if (rc == 1) { iscsi_target_nego_release(conn); iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } + return; + +err: + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); } static void iscsi_target_do_cleanup(struct work_struct *work) @@ -655,31 +696,54 @@ static void iscsi_target_sk_state_change(struct sock *sk) orig_state_change(sk); return; } + state = __iscsi_target_sk_check_close(sk); + pr_debug("__iscsi_target_sk_close_change: state: %d\n", state); + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" " conn: %p\n", conn); + if (state) + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", conn); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } + /* + * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED, + * but only queue conn->login_work -> iscsi_target_do_login_rx() + * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared. + * + * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close() + * will detect the dropped TCP connection from delayed workqueue context. + * + * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial + * iscsi_target_start_negotiation() is running, iscsi_target_do_login() + * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation() + * via iscsi_target_sk_check_and_clear() is responsible for detecting the + * dropped TCP connection in iscsi_np process context, and cleaning up + * the remaining iscsi connection resources. + */ + if (state) { + pr_debug("iscsi_target_sk_state_change got failed state\n"); + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); + state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - write_unlock_bh(&sk->sk_callback_lock); - - pr_debug("iscsi_target_sk_state_change: state: %d\n", state); + orig_state_change(sk); - if (!state) { - pr_debug("iscsi_target_sk_state_change got failed state\n"); - schedule_delayed_work(&conn->login_cleanup_work, 0); + if (!state) + schedule_delayed_work(&conn->login_work, 0); return; } + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); } @@ -818,7 +882,8 @@ static int iscsi_target_handle_csg_zero( SENDER_TARGET, login->rsp_buf, &login->rsp_length, - conn->param_list); + conn->param_list, + conn->tpg->tpg_attrib.login_keys_workaround); if (ret < 0) return -1; @@ -888,7 +953,8 @@ static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_log SENDER_TARGET, login->rsp_buf, &login->rsp_length, - conn->param_list); + conn->param_list, + conn->tpg->tpg_attrib.login_keys_workaround); if (ret < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, ISCSI_LOGIN_STATUS_INIT_ERR); @@ -942,6 +1008,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (iscsi_target_handle_csg_one(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + /* + * Check to make sure the TCP connection has not + * dropped asynchronously while session reinstatement + * was occuring in this kthread context, before + * transitioning to full feature phase operation. + */ + if (iscsi_target_sk_check_close(conn)) + return -1; + login->tsih = conn->sess->tsih; login->login_complete = 1; iscsi_target_restore_sock_callbacks(conn); @@ -968,21 +1043,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - bool state; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login() failed state for" - " conn: %p\n", conn); - return -1; - } - } - return 0; } @@ -1246,16 +1306,28 @@ int iscsi_target_start_negotiation( { int ret; + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + /* + * If iscsi_target_do_login returns zero to signal more PDU + * exchanges are required to complete the login, go ahead and + * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection + * is still active. + * + * Otherwise if TCP connection dropped asynchronously, go ahead + * and perform connection cleanup now. + */ ret = iscsi_target_do_login(conn, login); - if (!ret) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; + if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) + ret = -1; - write_lock_bh(&sk->sk_callback_lock); - set_bit(LOGIN_FLAGS_READY, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } - } else if (ret < 0) { + if (ret < 0) { cancel_delayed_work_sync(&conn->login_work); cancel_delayed_work_sync(&conn->login_cleanup_work); iscsi_target_restore_sock_callbacks(conn); diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 6d1b0acbc5b3..76bde76edad1 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -764,7 +764,8 @@ static int iscsi_check_for_auth_key(char *key) return 0; } -static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) +static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param, + bool keys_workaround) { if (IS_TYPE_BOOL_AND(param)) { if (!strcmp(param->value, NO)) @@ -772,19 +773,31 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) } else if (IS_TYPE_BOOL_OR(param)) { if (!strcmp(param->value, YES)) SET_PSTATE_REPLY_OPTIONAL(param); - /* - * Required for gPXE iSCSI boot client - */ - if (!strcmp(param->name, IMMEDIATEDATA)) - SET_PSTATE_REPLY_OPTIONAL(param); + + if (keys_workaround) { + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, IMMEDIATEDATA)) + SET_PSTATE_REPLY_OPTIONAL(param); + } } else if (IS_TYPE_NUMBER(param)) { if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) SET_PSTATE_REPLY_OPTIONAL(param); - /* - * Required for gPXE iSCSI boot client - */ - if (!strcmp(param->name, MAXCONNECTIONS)) - SET_PSTATE_REPLY_OPTIONAL(param); + + if (keys_workaround) { + /* + * Required for Mellanox Flexboot PXE boot ROM + */ + if (!strcmp(param->name, FIRSTBURSTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, MAXCONNECTIONS)) + SET_PSTATE_REPLY_OPTIONAL(param); + } } else if (IS_PHASE_DECLARATIVE(param)) SET_PSTATE_REPLY_OPTIONAL(param); } @@ -1421,7 +1434,8 @@ int iscsi_encode_text_output( u8 sender, char *textbuf, u32 *length, - struct iscsi_param_list *param_list) + struct iscsi_param_list *param_list, + bool keys_workaround) { char *output_buf = NULL; struct iscsi_extra_response *er; @@ -1457,7 +1471,8 @@ int iscsi_encode_text_output( *length += 1; output_buf = textbuf + *length; SET_PSTATE_PROPOSER(param); - iscsi_check_proposer_for_optional_reply(param); + iscsi_check_proposer_for_optional_reply(param, + keys_workaround); pr_debug("Sending key: %s=%s\n", param->name, param->value); } diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index a0751e3f0813..17a58c2913f2 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -40,7 +40,7 @@ extern int iscsi_extract_key_value(char *, char **, char **); extern int iscsi_update_param_value(struct iscsi_param *, char *); extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_conn *); extern int iscsi_encode_text_output(u8, u8, char *, u32 *, - struct iscsi_param_list *); + struct iscsi_param_list *, bool); extern int iscsi_check_negotiated_keys(struct iscsi_param_list *); extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *, struct iscsi_param_list *); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 205a509b0dfb..63e1dcc5914d 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -227,6 +227,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a->t10_pi = TA_DEFAULT_T10_PI; a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE; a->tpg_enabled_sendtargets = TA_DEFAULT_TPG_ENABLED_SENDTARGETS; + a->login_keys_workaround = TA_DEFAULT_LOGIN_KEYS_WORKAROUND; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -899,3 +900,21 @@ int iscsit_ta_tpg_enabled_sendtargets( return 0; } + +int iscsit_ta_login_keys_workaround( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->login_keys_workaround = flag; + pr_debug("iSCSI_TPG[%hu] - TPG enabled bit for login keys workaround: %s ", + tpg->tpgt, (a->login_keys_workaround) ? "ON" : "OFF"); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 2da211920c18..901a712180f0 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -39,5 +39,6 @@ extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32); extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32); extern int iscsit_ta_tpg_enabled_sendtargets(struct iscsi_portal_group *, u32); +extern int iscsit_ta_login_keys_workaround(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index f916d18ccb48..b070ddf1dc37 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -92,6 +92,11 @@ static int target_fabric_mappedlun_link( pr_err("Source se_lun->lun_se_dev does not exist\n"); return -EINVAL; } + if (lun->lun_shutdown) { + pr_err("Unable to create mappedlun symlink because" + " lun->lun_shutdown=true\n"); + return -EINVAL; + } se_tpg = lun->lun_tpg; nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 899c33b3c734..f69f4902dc07 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -673,6 +673,8 @@ void core_tpg_remove_lun( */ struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); + lun->lun_shutdown = true; + core_clear_lun_from_tpg(lun, tpg); /* * Wait for any active I/O references to percpu se_lun->lun_ref to @@ -694,6 +696,8 @@ void core_tpg_remove_lun( } if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_del_rcu(&lun->link); + + lun->lun_shutdown = false; mutex_unlock(&tpg->tpg_lun_mutex); percpu_ref_exit(&lun->lun_ref); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 37c77db6e737..f71bedea973a 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -728,6 +728,15 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) if (cmd->transport_state & CMD_T_ABORTED || cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); + /* + * If COMPARE_AND_WRITE was stopped by __transport_wait_for_tasks(), + * release se_device->caw_sem obtained by sbc_compare_and_write() + * since target_complete_ok_work() or target_complete_failure_work() + * won't be called to invoke the normal CAW completion callbacks. + */ + if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) { + up(&dev->caw_sem); + } complete_all(&cmd->t_transport_stop_comp); return; } else if (!success) { diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 8814eca06021..8c1ab49c83c3 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -16,6 +16,7 @@ #include <linux/delay.h> #include <linux/device.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/tee_drv.h> #include "optee_private.h" diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 5b4b47ed948b..87d87ac1c8a0 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -191,8 +191,10 @@ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) mutex_lock(&cooling_list_lock); list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { + unsigned long level = get_level(cpufreq_dev, freq); + mutex_unlock(&cooling_list_lock); - return get_level(cpufreq_dev, freq); + return level; } } mutex_unlock(&cooling_list_lock); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 5ab54ef4f304..e4f69bddcfb1 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2708,13 +2708,13 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) * related to the kernel should not use this. */ data = vt_get_shift_state(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_GETMOUSEREPORTING: console_lock(); /* May be overkill */ data = mouse_reporting(); console_unlock(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_SETVESABLANK: console_lock(); @@ -2723,7 +2723,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) break; case TIOCL_GETKMSGREDIRECT: data = vt_get_kmsg_redirect(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_SETKMSGREDIRECT: if (!capable(CAP_SYS_ADMIN)) { diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0b7194086c5a..df96f5f88c15 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1759,6 +1759,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */ + .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index a43ade50b9fa..87a83d925eea 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1851,7 +1851,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev, /* No more submits can occur */ spin_lock_irq(&hcd_urb_list_lock); rescan: - list_for_each_entry (urb, &ep->urb_list, urb_list) { + list_for_each_entry_reverse(urb, &ep->urb_list, urb_list) { int is_in; if (urb->unlinked) @@ -2448,6 +2448,8 @@ void usb_hc_died (struct usb_hcd *hcd) } if (usb_hcd_is_primary_hcd(hcd) && hcd->shared_hcd) { hcd = hcd->shared_hcd; + clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags); + set_bit(HCD_FLAG_DEAD, &hcd->flags); if (hcd->rh_registered) { clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 34817fa65e19..fb9223c8cbae 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4662,7 +4662,8 @@ hub_power_remaining(struct usb_hub *hub) static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) { - int status, i; + int status = -ENODEV; + int i; unsigned unit_load; struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); @@ -4866,9 +4867,10 @@ loop: done: hub_port_disable(hub, port1, 1); - if (hcd->driver->relinquish_port && !hub->hdev->parent) - hcd->driver->relinquish_port(hcd, port1); - + if (hcd->driver->relinquish_port && !hub->hdev->parent) { + if (status != -ENOTCONN && status != -ENODEV) + hcd->driver->relinquish_port(hcd, port1); + } } /* Handle physical or logical connection change events. diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 3116edfcdc18..574da2b4529c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -150,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* appletouch */ { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ + { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, + /* Avision AV600U */ { USB_DEVICE(0x0638, 0x0a13), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -249,6 +252,7 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { { USB_DEVICE(0x093a, 0x2500), .driver_info = USB_QUIRK_RESET_RESUME }, { USB_DEVICE(0x093a, 0x2510), .driver_info = USB_QUIRK_RESET_RESUME }, { USB_DEVICE(0x093a, 0x2521), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x03f0, 0x2b4a), .driver_info = USB_QUIRK_RESET_RESUME }, /* Logitech Optical Mouse M90/M100 */ { USB_DEVICE(0x046d, 0xc05a), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 99285b416308..ee579ba2b59e 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -539,7 +539,7 @@ static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) } status = usb_ep_enable(hidg->out_ep); if (status < 0) { - ERROR(cdev, "Enable IN endpoint FAILED!\n"); + ERROR(cdev, "Enable OUT endpoint FAILED!\n"); goto fail; } hidg->out_ep->driver_data = hidg; diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index f9400564cb72..03b9a372636f 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -89,6 +89,7 @@ enum amd_chipset_gen { AMD_CHIPSET_HUDSON2, AMD_CHIPSET_BOLTON, AMD_CHIPSET_YANGTZE, + AMD_CHIPSET_TAISHAN, AMD_CHIPSET_UNKNOWN, }; @@ -132,6 +133,11 @@ static int amd_chipset_sb_type_init(struct amd_chipset_info *pinfo) pinfo->sb_type.gen = AMD_CHIPSET_SB700; else if (rev >= 0x40 && rev <= 0x4f) pinfo->sb_type.gen = AMD_CHIPSET_SB800; + } + pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, + 0x145c, NULL); + if (pinfo->smbus_dev) { + pinfo->sb_type.gen = AMD_CHIPSET_TAISHAN; } else { pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS, NULL); @@ -251,11 +257,12 @@ int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev) { /* Make sure amd chipset type has already been initialized */ usb_amd_find_chipset_info(); - if (amd_chipset.sb_type.gen != AMD_CHIPSET_YANGTZE) - return 0; - - dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n"); - return 1; + if (amd_chipset.sb_type.gen == AMD_CHIPSET_YANGTZE || + amd_chipset.sb_type.gen == AMD_CHIPSET_TAISHAN) { + dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n"); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(usb_hcd_amd_remote_wakeup_quirk); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 52997fbfc07c..1721a128999a 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -768,6 +768,9 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, clear_bit(wIndex, &bus_state->resuming_ports); set_bit(wIndex, &bus_state->rexit_ports); + + xhci_test_and_clear_bit(xhci, port_array, wIndex, + PORT_PLC); xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 408eccdd2ca9..abf58fe1d4f5 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -856,13 +856,16 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci, (ep->ep_state & EP_GETTING_NO_STREAMS)) { int stream_id; - for (stream_id = 0; stream_id < ep->stream_info->num_streams; + for (stream_id = 1; stream_id < ep->stream_info->num_streams; stream_id++) { + ring = ep->stream_info->stream_rings[stream_id]; + if (!ring) + continue; + xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Killing URBs for slot ID %u, ep index %u, stream %u", - slot_id, ep_index, stream_id + 1); - xhci_kill_ring_urbs(xhci, - ep->stream_info->stream_rings[stream_id]); + slot_id, ep_index, stream_id); + xhci_kill_ring_urbs(xhci, ring); } } else { ring = ep->ring; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 13d5614f37f1..0d843e0f8055 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -138,6 +138,7 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep) "Could not flush host TX%d fifo: csr: %04x\n", ep->epnum, csr)) return; + mdelay(1); } } diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index d82fa36c3465..005da0866836 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -733,8 +733,10 @@ static int usbhsc_resume(struct device *dev) struct usbhs_priv *priv = dev_get_drvdata(dev); struct platform_device *pdev = usbhs_priv_to_pdev(priv); - if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) { usbhsc_power_ctrl(priv, 1); + usbhs_mod_autonomy_mode(priv); + } usbhs_platform_call(priv, phy_reset, pdev); diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index efc4fae123a4..8647d2c2a8c4 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -37,6 +37,7 @@ struct usbhsg_gpriv; struct usbhsg_uep { struct usb_ep ep; struct usbhs_pipe *pipe; + spinlock_t lock; /* protect the pipe */ char ep_name[EP_NAME_SIZE]; @@ -638,10 +639,16 @@ usbhsg_ep_enable_end: static int usbhsg_ep_disable(struct usb_ep *ep) { struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep); - struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep); + struct usbhs_pipe *pipe; + unsigned long flags; + int ret = 0; - if (!pipe) - return -EINVAL; + spin_lock_irqsave(&uep->lock, flags); + pipe = usbhsg_uep_to_pipe(uep); + if (!pipe) { + ret = -EINVAL; + goto out; + } usbhsg_pipe_disable(uep); usbhs_pipe_free(pipe); @@ -649,6 +656,9 @@ static int usbhsg_ep_disable(struct usb_ep *ep) uep->pipe->mod_private = NULL; uep->pipe = NULL; +out: + spin_unlock_irqrestore(&uep->lock, flags); + return 0; } @@ -698,8 +708,11 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req) { struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep); struct usbhsg_request *ureq = usbhsg_req_to_ureq(req); - struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep); + struct usbhs_pipe *pipe; + unsigned long flags; + spin_lock_irqsave(&uep->lock, flags); + pipe = usbhsg_uep_to_pipe(uep); if (pipe) usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq)); @@ -708,6 +721,7 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req) * even if the pipe is NULL. */ usbhsg_queue_pop(uep, ureq, -ECONNRESET); + spin_unlock_irqrestore(&uep->lock, flags); return 0; } @@ -854,10 +868,10 @@ static int usbhsg_try_stop(struct usbhs_priv *priv, u32 status) { struct usbhsg_gpriv *gpriv = usbhsg_priv_to_gpriv(priv); struct usbhs_mod *mod = usbhs_mod_get_current(priv); - struct usbhsg_uep *dcp = usbhsg_gpriv_to_dcp(gpriv); + struct usbhsg_uep *uep; struct device *dev = usbhs_priv_to_dev(priv); unsigned long flags; - int ret = 0; + int ret = 0, i; /******************** spin lock ********************/ usbhs_lock(priv, flags); @@ -889,7 +903,9 @@ static int usbhsg_try_stop(struct usbhs_priv *priv, u32 status) usbhs_sys_set_test_mode(priv, 0); usbhs_sys_function_ctrl(priv, 0); - usbhsg_ep_disable(&dcp->ep); + /* disable all eps */ + usbhsg_for_each_uep_with_dcp(uep, gpriv, i) + usbhsg_ep_disable(&uep->ep); dev_dbg(dev, "stop gadget\n"); @@ -1072,6 +1088,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) ret = -ENOMEM; goto usbhs_mod_gadget_probe_err_gpriv; } + spin_lock_init(&uep->lock); gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index b0dc6da3d970..41a6513646de 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -135,6 +135,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ + { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ebe51f11105d..fe123153b1a5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2025,6 +2025,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 1db4b61bdf7b..a51b28379850 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC485) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 09d9be88209e..3b5a15d1dc0d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -27,6 +27,7 @@ #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 +#define ATEN_PRODUCT_UC485 0x2021 #define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 39afd7045c43..7bb5f8da5357 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1520,8 +1520,11 @@ static void isd200_ata_command(struct scsi_cmnd *srb, struct us_data *us) /* Make sure driver was initialized */ - if (us->extra == NULL) + if (us->extra == NULL) { usb_stor_dbg(us, "ERROR Driver not initialized\n"); + srb->result = DID_ERROR << 16; + return; + } scsi_set_resid(srb, 0); /* scsi_bufflen might change in protocol translation to ata */ diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 53341a77d89f..a37ed1e59e99 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -123,9 +123,9 @@ UNUSUAL_DEV(0x0bc2, 0xab2a, 0x0000, 0x9999, /* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", - "", + "INIC-3069", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_ATA_1X), + US_FL_NO_ATA_1X | US_FL_IGNORE_RESIDUE), /* Reported-by: Tom Arild Naess <tanaess@gmail.com> */ UNUSUAL_DEV(0x152d, 0x0539, 0x0000, 0x9999, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 830e2fd47642..b31b84f56e8f 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -902,6 +902,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) return ret; vdev->barmap[index] = pci_iomap(pdev, index, 0); + if (!vdev->barmap[index]) { + pci_release_selected_regions(pdev, 1 << index); + return -ENOMEM; + } } vma->vm_private_data = vdev; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 210db24d2204..4d39f7959adf 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -190,7 +190,10 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, if (!vdev->has_vga) return -EINVAL; - switch (pos) { + if (pos > 0xbfffful) + return -EINVAL; + + switch ((u32)pos) { case 0xa0000 ... 0xbffff: count = min(count, (size_t)(0xc0000 - pos)); iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 6070b793cbcb..1e01e28f40f3 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -296,6 +296,34 @@ static void vfio_group_put(struct vfio_group *group) kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); } +struct vfio_group_put_work { + struct work_struct work; + struct vfio_group *group; +}; + +static void vfio_group_put_bg(struct work_struct *work) +{ + struct vfio_group_put_work *do_work; + + do_work = container_of(work, struct vfio_group_put_work, work); + + vfio_group_put(do_work->group); + kfree(do_work); +} + +static void vfio_group_schedule_put(struct vfio_group *group) +{ + struct vfio_group_put_work *do_work; + + do_work = kmalloc(sizeof(*do_work), GFP_KERNEL); + if (WARN_ON(!do_work)) + return; + + INIT_WORK(&do_work->work, vfio_group_put_bg); + do_work->group = group; + schedule_work(&do_work->work); +} + /* Assume group_lock or group reference is held */ static void vfio_group_get(struct vfio_group *group) { @@ -620,7 +648,14 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, break; } - vfio_group_put(group); + /* + * If we're the last reference to the group, the group will be + * released, which includes unregistering the iommu group notifier. + * We hold a read-lock on that notifier list, unregistering needs + * a write-lock... deadlock. Release our reference asynchronously + * to avoid that situation. + */ + vfio_group_schedule_put(group); return NOTIFY_OK; } @@ -1552,6 +1587,15 @@ void vfio_group_put_external_user(struct vfio_group *group) } EXPORT_SYMBOL_GPL(vfio_group_put_external_user); +bool vfio_external_group_match_file(struct vfio_group *test_group, + struct file *filep) +{ + struct vfio_group *group = filep->private_data; + + return (filep->f_op == &vfio_group_fops) && (group == test_group); +} +EXPORT_SYMBOL_GPL(vfio_external_group_match_file); + int vfio_external_user_iommu_id(struct vfio_group *group) { return iommu_group_id(group->iommu_group); diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c index 07675d6f323e..d4530b54479c 100644 --- a/drivers/video/fbdev/cobalt_lcdfb.c +++ b/drivers/video/fbdev/cobalt_lcdfb.c @@ -350,6 +350,11 @@ static int cobalt_lcdfb_probe(struct platform_device *dev) info->screen_size = resource_size(res); info->screen_base = devm_ioremap(&dev->dev, res->start, info->screen_size); + if (!info->screen_base) { + framebuffer_release(info); + return -ENOMEM; + } + info->fbops = &cobalt_lcd_fbops; info->fix = cobalt_lcdfb_fix; info->fix.smem_start = res->start; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6c031dd1bc4e..8a0243efd359 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -905,17 +905,60 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr; + /* + * If we are loading ET_EXEC or we have already performed + * the ET_DYN load_addr calculations, proceed normally. + */ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { elf_flags |= MAP_FIXED; } else if (loc->elf_ex.e_type == ET_DYN) { - /* Try and get dynamic programs out of the way of the - * default mmap base, as well as whatever program they - * might try to exec. This is because the brk will - * follow the loader, and is not movable. */ - load_bias = ELF_ET_DYN_BASE - vaddr; - if (current->flags & PF_RANDOMIZE) - load_bias += arch_mmap_rnd(); - load_bias = ELF_PAGESTART(load_bias); + /* + * This logic is run once for the first LOAD Program + * Header for ET_DYN binaries to calculate the + * randomization (load_bias) for all the LOAD + * Program Headers, and to calculate the entire + * size of the ELF mapping (total_size). (Note that + * load_addr_set is set to true later once the + * initial mapping is performed.) + * + * There are effectively two types of ET_DYN + * binaries: programs (i.e. PIE: ET_DYN with INTERP) + * and loaders (ET_DYN without INTERP, since they + * _are_ the ELF interpreter). The loaders must + * be loaded away from programs since the program + * may otherwise collide with the loader (especially + * for ET_EXEC which does not have a randomized + * position). For example to handle invocations of + * "./ld.so someprog" to test out a new version of + * the loader, the subsequent program that the + * loader loads must avoid the loader itself, so + * they cannot share the same load range. Sufficient + * room for the brk must be allocated with the + * loader as well, since brk must be available with + * the loader. + * + * Therefore, programs are loaded offset from + * ELF_ET_DYN_BASE and loaders are loaded into the + * independently randomized mmap region (0 load_bias + * without MAP_FIXED). + */ + if (elf_interpreter) { + load_bias = ELF_ET_DYN_BASE; + if (current->flags & PF_RANDOMIZE) + load_bias += arch_mmap_rnd(); + elf_flags |= MAP_FIXED; + } else + load_bias = 0; + + /* + * Since load_bias is used for all subsequent loading + * calculations, we must lower it by the first vaddr + * so that the remaining calculations based on the + * ELF vaddrs will be correctly offset. The result + * is then page aligned. + */ + load_bias = ELF_PAGESTART(load_bias - vaddr); + total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a61926cb01c0..bebd6517355d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7521,11 +7521,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode, * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (dio_data->outstanding_extents) { + if (dio_data->outstanding_extents >= num_extents) { dio_data->outstanding_extents -= num_extents; } else { + /* + * If dio write length has been split due to no large enough + * contiguous space, we need to compensate our inode counter + * appropriately. + */ + u64 num_needed = num_extents - dio_data->outstanding_extents; + spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents += num_extents; + BTRFS_I(inode)->outstanding_extents += num_needed; spin_unlock(&BTRFS_I(inode)->lock); } } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 9314b4ea2375..be7d187d53fd 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -247,6 +247,11 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, if (ret < 0) err = ret; dput(last); + /* last_name no longer match cache index */ + if (fi->readdir_cache_idx >= 0) { + fi->readdir_cache_idx = -1; + fi->dir_release_count = 0; + } } return err; } diff --git a/fs/dcache.c b/fs/dcache.c index 7b8feb6d60c8..5bf7b4a188e9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,6 +269,33 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } +void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + if (unlikely(dname_external(dentry))) { + struct external_name *p = external_name(dentry); + atomic_inc(&p->u.count); + spin_unlock(&dentry->d_lock); + name->name = p->name; + } else { + memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN); + spin_unlock(&dentry->d_lock); + name->name = name->inline_name; + } +} +EXPORT_SYMBOL(take_dentry_name_snapshot); + +void release_dentry_name_snapshot(struct name_snapshot *name) +{ + if (unlikely(name->name != name->inline_name)) { + struct external_name *p; + p = container_of(name->name, struct external_name, name[0]); + if (unlikely(atomic_dec_and_test(&p->u.count))) + kfree_rcu(p, u.head); + } +} +EXPORT_SYMBOL(release_dentry_name_snapshot); + static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) @@ -1128,11 +1155,12 @@ void shrink_dcache_sb(struct super_block *sb) LIST_HEAD(dispose); freed = list_lru_walk(&sb->s_dentry_lru, - dentry_lru_isolate_shrink, &dispose, UINT_MAX); + dentry_lru_isolate_shrink, &dispose, 1024); this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); - } while (freed > 0); + cond_resched(); + } while (list_lru_count(&sb->s_dentry_lru) > 0); } EXPORT_SYMBOL(shrink_dcache_sb); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 31b7265f2e7d..6c2eaf690abf 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -705,7 +705,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, { int error; struct dentry *dentry = NULL, *trap; - const char *old_name; + struct name_snapshot old_name; trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ @@ -720,19 +720,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) goto exit; - old_name = fsnotify_oldname_init(old_dentry->d_name.name); + take_dentry_name_snapshot(&old_name, old_dentry); error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir), dentry); if (error) { - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); goto exit; } d_move(old_dentry, dentry); - fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name, + fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name, d_is_dir(old_dentry), NULL, old_dentry); - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); unlock_rename(new_dir, old_dir); dput(dentry); return old_dentry; diff --git a/fs/exec.c b/fs/exec.c index fcd94ef3a0c2..dd3a59420506 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -206,8 +206,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; - unsigned long ptr_size; - struct rlimit *rlim; + unsigned long ptr_size, limit; /* * Since the stack will hold pointers to the strings, we @@ -236,14 +235,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, return page; /* - * Limit to 1/4-th the stack size for the argv+env strings. + * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM + * (whichever is smaller) for the argv+env strings. * This ensures that: * - the remaining binfmt code will not run out of stack space, * - the program will have a reasonable amount of stack left * to work from. */ - rlim = current->signal->rlim; - if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + limit = _STK_LIM / 4 * 3; + limit = min(limit, rlimit(RLIMIT_STACK) / 4); + if (size > limit) goto fail; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8772bfc3415b..45ef9975caec 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -500,6 +500,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = page_offset(page); bh = head = page_buffers(page); do { + if (lastoff + bh->b_size <= startoff) + goto next; if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) @@ -514,6 +516,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); goto out; } +next: lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 34038e3598d5..74516efd874c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1926,7 +1926,8 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb); n_group--; /* set to last group number */ } diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index e9a8d676c6bc..83dcf7bfd7b8 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -213,7 +213,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { + if (acl && !ipage) { error = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (error) return error; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2ac3417d9412..4f666368aa85 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1091,20 +1091,18 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; - main_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + main_segs = le32_to_cpu(raw_super->segment_count_main); blocks_per_seg = sbi->blocks_per_seg; for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || - le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) { + le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; - } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || - le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) { + le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; - } } if (unlikely(f2fs_cp_error(sbi))) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 11538a8be9f0..1a063cbfe503 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; - ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); + ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); if (unlikely(!ff)) return NULL; diff --git a/fs/mount.h b/fs/mount.h index 13a4ebbbaa74..37c64bbe840c 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -57,6 +57,7 @@ struct mount { struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ + struct list_head mnt_umounting; /* list entry for umount propagation */ #ifdef CONFIG_FSNOTIFY struct hlist_head mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; diff --git a/fs/namei.c b/fs/namei.c index d185869dae93..f8eeea956503 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4248,11 +4248,11 @@ int vfs_rename2(struct vfsmount *mnt, { int error; bool is_dir = d_is_dir(old_dentry); - const unsigned char *old_name; struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; + struct name_snapshot old_name; /* * Check source == target. @@ -4306,7 +4306,7 @@ int vfs_rename2(struct vfsmount *mnt, if (error) return error; - old_name = fsnotify_oldname_init(old_dentry->d_name.name); + take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); if (!is_dir || (flags & RENAME_EXCHANGE)) lock_two_nondirectories(source, target); @@ -4367,14 +4367,14 @@ out: mutex_unlock(&target->i_mutex); dput(new_dentry); if (!error) { - fsnotify_move(old_dir, new_dir, old_name, is_dir, + fsnotify_move(old_dir, new_dir, old_name.name, is_dir, !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); if (flags & RENAME_EXCHANGE) { fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, new_is_dir, NULL, new_dentry); } } - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); return error; } diff --git a/fs/namespace.c b/fs/namespace.c index a22959c97384..15b91b36ecab 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -237,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); + INIT_LIST_HEAD(&mnt->mnt_umounting); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f31fd0dd92c6..b1daeafbea92 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM + depends on 64BIT || LBDAF default NFS_V4 config PNFS_OBJLAYOUT diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5b21b1ca2341..348e0a05bd18 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1135,11 +1135,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) /* Force a full look up iff the parent directory has changed */ if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { - - if (nfs_lookup_verify_inode(inode, flags)) { + error = nfs_lookup_verify_inode(inode, flags); + if (error) { if (flags & LOOKUP_RCU) return -ECHILD; - goto out_zap_parent; + if (error == -ESTALE) + goto out_zap_parent; + goto out_error; } goto out_valid; } @@ -1163,8 +1165,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) trace_nfs_lookup_revalidate_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); - if (error) + if (error == -ESTALE || error == -ENOENT) goto out_bad; + if (error) + goto out_error; if (nfs_compare_fh(NFS_FH(inode), fhandle)) goto out_bad; if ((error = nfs_refresh_inode(inode, fattr)) != 0) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e125e55de86d..2603d7589946 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -30,6 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { nfs4_print_deviceid(&mirror_ds->id_node.deviceid); nfs4_pnfs_ds_put(mirror_ds->ds); + kfree(mirror_ds->ds_versions); kfree_rcu(mirror_ds, id_node.rcu); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f714b98cfd74..668ac19af58f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1241,9 +1241,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; /* Has the inode gone and changed behind our back? */ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) - return -EIO; + return -ESTALE; if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) - return -EIO; + return -ESTALE; if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index db39de2dd4cb..a64adc2fced9 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -104,16 +104,20 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) if (unlikely(!fsnotify_inode_watches_children(p_inode))) __fsnotify_update_child_dentry_flags(p_inode); else if (p_inode->i_fsnotify_mask & mask) { + struct name_snapshot name; + /* we are notifying a parent so come up with the new mask which * specifies these are events which came from a child. */ mask |= FS_EVENT_ON_CHILD; + take_dentry_name_snapshot(&name, dentry); if (path) ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - dentry->d_name.name, 0); + name.name, 0); else ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - dentry->d_name.name, 0); + name.name, 0); + release_dentry_name_snapshot(&name); } dput(parent); diff --git a/fs/pnode.c b/fs/pnode.c index e4e428d621e9..ddb846f878b8 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p) return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); } +static inline struct mount *last_slave(struct mount *p) +{ + return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); +} + static inline struct mount *next_slave(struct mount *p) { return list_entry(p->mnt_slave.next, struct mount, mnt_slave); @@ -164,6 +169,19 @@ static struct mount *propagation_next(struct mount *m, } } +static struct mount *skip_propagation_subtree(struct mount *m, + struct mount *origin) +{ + /* + * Advance m such that propagation_next will not return + * the slaves of m. + */ + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) + m = last_slave(m); + + return m; +} + static struct mount *next_group(struct mount *m, struct mount *origin) { while (1) { @@ -415,65 +433,104 @@ void propagate_mount_unlock(struct mount *mnt) } } -/* - * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted. - */ -static void mark_umount_candidates(struct mount *mnt) +static void umount_one(struct mount *mnt, struct list_head *to_umount) { - struct mount *parent = mnt->mnt_parent; - struct mount *m; - - BUG_ON(parent == mnt); - - for (m = propagation_next(parent, parent); m; - m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint); - if (!child || (child->mnt.mnt_flags & MNT_UMOUNT)) - continue; - if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) { - SET_MNT_MARK(child); - } - } + CLEAR_MNT_MARK(mnt); + mnt->mnt.mnt_flags |= MNT_UMOUNT; + list_del_init(&mnt->mnt_child); + list_del_init(&mnt->mnt_umounting); + list_move_tail(&mnt->mnt_list, to_umount); } /* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ -static void __propagate_umount(struct mount *mnt) +static bool __propagate_umount(struct mount *mnt, + struct list_head *to_umount, + struct list_head *to_restore) { - struct mount *parent = mnt->mnt_parent; - struct mount *m; + bool progress = false; + struct mount *child; - BUG_ON(parent == mnt); + /* + * The state of the parent won't change if this mount is + * already unmounted or marked as without children. + */ + if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED)) + goto out; - for (m = propagation_next(parent, parent); m; - m = propagation_next(m, parent)) { - struct mount *topper; - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint); - /* - * umount the child only if the child has no children - * and the child is marked safe to unmount. - */ - if (!child || !IS_MNT_MARKED(child)) + /* Verify topper is the only grandchild that has not been + * speculatively unmounted. + */ + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { + if (child->mnt_mountpoint == mnt->mnt.mnt_root) continue; - CLEAR_MNT_MARK(child); + if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child)) + continue; + /* Found a mounted child */ + goto children; + } - /* If there is exactly one mount covering all of child - * replace child with that mount. - */ - topper = find_topper(child); - if (topper) - mnt_change_mountpoint(child->mnt_parent, child->mnt_mp, - topper); + /* Mark mounts that can be unmounted if not locked */ + SET_MNT_MARK(mnt); + progress = true; + + /* If a mount is without children and not locked umount it. */ + if (!IS_MNT_LOCKED(mnt)) { + umount_one(mnt, to_umount); + } else { +children: + list_move_tail(&mnt->mnt_umounting, to_restore); + } +out: + return progress; +} + +static void umount_list(struct list_head *to_umount, + struct list_head *to_restore) +{ + struct mount *mnt, *child, *tmp; + list_for_each_entry(mnt, to_umount, mnt_list) { + list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) { + /* topper? */ + if (child->mnt_mountpoint == mnt->mnt.mnt_root) + list_move_tail(&child->mnt_umounting, to_restore); + else + umount_one(child, to_umount); + } + } +} - if (list_empty(&child->mnt_mounts)) { - list_del_init(&child->mnt_child); - child->mnt.mnt_flags |= MNT_UMOUNT; - list_move_tail(&child->mnt_list, &mnt->mnt_list); +static void restore_mounts(struct list_head *to_restore) +{ + /* Restore mounts to a clean working state */ + while (!list_empty(to_restore)) { + struct mount *mnt, *parent; + struct mountpoint *mp; + + mnt = list_first_entry(to_restore, struct mount, mnt_umounting); + CLEAR_MNT_MARK(mnt); + list_del_init(&mnt->mnt_umounting); + + /* Should this mount be reparented? */ + mp = mnt->mnt_mp; + parent = mnt->mnt_parent; + while (parent->mnt.mnt_flags & MNT_UMOUNT) { + mp = parent->mnt_mp; + parent = parent->mnt_parent; } + if (parent != mnt->mnt_parent) + mnt_change_mountpoint(parent, mp, mnt); + } +} + +static void cleanup_umount_visitations(struct list_head *visited) +{ + while (!list_empty(visited)) { + struct mount *mnt = + list_first_entry(visited, struct mount, mnt_umounting); + list_del_init(&mnt->mnt_umounting); } } @@ -487,12 +544,69 @@ static void __propagate_umount(struct mount *mnt) int propagate_umount(struct list_head *list) { struct mount *mnt; + LIST_HEAD(to_restore); + LIST_HEAD(to_umount); + LIST_HEAD(visited); + + /* Find candidates for unmounting */ + list_for_each_entry_reverse(mnt, list, mnt_list) { + struct mount *parent = mnt->mnt_parent; + struct mount *m; + + /* + * If this mount has already been visited it is known that it's + * entire peer group and all of their slaves in the propagation + * tree for the mountpoint has already been visited and there is + * no need to visit them again. + */ + if (!list_empty(&mnt->mnt_umounting)) + continue; + + list_add_tail(&mnt->mnt_umounting, &visited); + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + struct mount *child = __lookup_mnt(&m->mnt, + mnt->mnt_mountpoint); + if (!child) + continue; + + if (!list_empty(&child->mnt_umounting)) { + /* + * If the child has already been visited it is + * know that it's entire peer group and all of + * their slaves in the propgation tree for the + * mountpoint has already been visited and there + * is no need to visit this subtree again. + */ + m = skip_propagation_subtree(m, parent); + continue; + } else if (child->mnt.mnt_flags & MNT_UMOUNT) { + /* + * We have come accross an partially unmounted + * mount in list that has not been visited yet. + * Remember it has been visited and continue + * about our merry way. + */ + list_add_tail(&child->mnt_umounting, &visited); + continue; + } + + /* Check the child and parents while progress is made */ + while (__propagate_umount(child, + &to_umount, &to_restore)) { + /* Is the parent a umount candidate? */ + child = child->mnt_parent; + if (list_empty(&child->mnt_umounting)) + break; + } + } + } - list_for_each_entry_reverse(mnt, list, mnt_list) - mark_umount_candidates(mnt); + umount_list(&to_umount, &to_restore); + restore_mounts(&to_restore); + cleanup_umount_visitations(&visited); + list_splice_tail(&to_umount, list); - list_for_each_entry(mnt, list, mnt_list) - __propagate_umount(mnt); return 0; } diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 8d1e5e2db6a1..c9e4bc47c79d 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -434,7 +434,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, for (i = 0; i < cxt->max_dump_cnt; i++) { cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0, &cxt->ecc_info, - cxt->memtype); + cxt->memtype, 0); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -471,7 +471,8 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, + cxt->memtype, 0); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3975deec02f8..e11672aa4575 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -48,16 +48,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) return atomic_read(&prz->buffer->start); } -static DEFINE_RAW_SPINLOCK(buffer_lock); - /* increase and wrap the start pointer, returning the old value */ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) { int old; int new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->start); new = old + a; @@ -65,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) new -= prz->buffer_size; atomic_set(&prz->buffer->start, new); - raw_spin_unlock_irqrestore(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); return old; } @@ -75,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) { size_t old; size_t new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->size); if (old == prz->buffer_size) @@ -89,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) atomic_set(&prz->buffer->size, new); exit: - raw_spin_unlock_irqrestore(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); } static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, @@ -491,6 +493,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, prz->buffer->sig); } + /* Rewind missing or invalid memory area. */ prz->buffer->sig = sig; persistent_ram_zap(prz); @@ -517,7 +520,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype) + unsigned int memtype, u32 flags) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -528,6 +531,10 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, goto err; } + /* Initialize general buffer state. */ + raw_spin_lock_init(&prz->buffer_lock); + prz->flags = flags; + ret = persistent_ram_buffer_map(start, size, prz, memtype); if (ret) goto err; diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index 6076c342dae6..5ac0b0bbb0ec 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -104,12 +104,19 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, { long err = -ENOTTY; struct file *lower_file; + const struct cred *saved_cred = NULL; + struct dentry *dentry = file->f_path.dentry; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); lower_file = sdcardfs_lower_file(file); /* XXX: use vfs_ioctl if/when VFS exports it */ if (!lower_file || !lower_file->f_op) goto out; + + /* save current_cred and override it */ + OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(file_inode(file))); + if (lower_file->f_op->unlocked_ioctl) err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); @@ -117,6 +124,7 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, if (!err) sdcardfs_copy_and_fix_attrs(file_inode(file), file_inode(lower_file)); + REVERT_CRED(saved_cred); out: return err; } @@ -127,15 +135,23 @@ static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd, { long err = -ENOTTY; struct file *lower_file; + const struct cred *saved_cred = NULL; + struct dentry *dentry = file->f_path.dentry; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); lower_file = sdcardfs_lower_file(file); /* XXX: use vfs_ioctl if/when VFS exports it */ if (!lower_file || !lower_file->f_op) goto out; + + /* save current_cred and override it */ + OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(file_inode(file))); + if (lower_file->f_op->compat_ioctl) err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); + REVERT_CRED(saved_cred); out: return err; } diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 60fea424835f..103dc45a131f 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -766,13 +766,9 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct * afterwards in the other cases: we fsstack_copy_inode_size from * the lower level. */ - if (current->mm) - down_write(¤t->mm->mmap_sem); if (ia->ia_valid & ATTR_SIZE) { err = inode_newsize_ok(&tmp, ia->ia_size); if (err) { - if (current->mm) - up_write(¤t->mm->mmap_sem); goto out; } truncate_setsize(inode, ia->ia_size); @@ -795,8 +791,6 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */ NULL); mutex_unlock(&d_inode(lower_dentry)->i_mutex); - if (current->mm) - up_write(¤t->mm->mmap_sem); if (err) goto out; diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 3c5b51d49d21..80825b287836 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -364,41 +364,34 @@ out: return err; } -/* A feature which supports mount_nodev() with options */ -static struct dentry *mount_nodev_with_options(struct vfsmount *mnt, - struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - int (*fill_super)(struct vfsmount *, struct super_block *, - const char *, void *, int)) +struct sdcardfs_mount_private { + struct vfsmount *mnt; + const char *dev_name; + void *raw_data; +}; +static int __sdcardfs_fill_super( + struct super_block *sb, + void *_priv, int silent) { - int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); - - if (IS_ERR(s)) - return ERR_CAST(s); - - s->s_flags = flags; + struct sdcardfs_mount_private *priv = _priv; - error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return dget(s->s_root); + return sdcardfs_read_super(priv->mnt, + sb, priv->dev_name, priv->raw_data, silent); } static struct dentry *sdcardfs_mount(struct vfsmount *mnt, struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - /* - * dev_name is a lower_path_name, - * raw_data is a option string. - */ - return mount_nodev_with_options(mnt, fs_type, flags, dev_name, - raw_data, sdcardfs_read_super); + struct sdcardfs_mount_private priv = { + .mnt = mnt, + .dev_name = dev_name, + .raw_data = raw_data + }; + + return mount_nodev(fs_type, flags, + &priv, __sdcardfs_fill_super); } static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, @@ -423,7 +416,7 @@ void sdcardfs_kill_sb(struct super_block *sb) list_del(&sbi->list); mutex_unlock(&sdcardfs_super_list_lock); } - generic_shutdown_super(sb); + kill_anon_super(sb); } static struct file_system_type sdcardfs_fs_type = { diff --git a/fs/seq_file.c b/fs/seq_file.c index d672e2fec459..6dc4296eed62 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -72,9 +72,10 @@ int seq_open(struct file *file, const struct seq_operations *op) mutex_init(&p->lock); p->op = op; -#ifdef CONFIG_USER_NS - p->user_ns = file->f_cred->user_ns; -#endif + + // No refcounting: the lifetime of 'p' is constrained + // to the lifetime of the file. + p->file = file; /* * Wrappers around seq_open(e.g. swaps_open) need to be diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7be3166ba553..0e659d9c69a1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1235,8 +1235,8 @@ int udf_setsize(struct inode *inode, loff_t newsize) return err; } set_size: - truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); + truncate_setsize(inode, newsize); } else { if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { down_write(&iinfo->i_data_sem); @@ -1253,9 +1253,9 @@ set_size: udf_get_block); if (err) return err; + truncate_setsize(inode, newsize); down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); - truncate_setsize(inode, newsize); udf_truncate_extents(inode); up_write(&iinfo->i_data_sem); } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 187b80267ff9..a9063ac50c4e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1426,6 +1426,26 @@ __xfs_get_blocks( if (error) goto out_unlock; + /* + * The only time we can ever safely find delalloc blocks on direct I/O + * is a dio write to post-eof speculative preallocation. All other + * scenarios are indicative of a problem or misuse (such as mixing + * direct and mapped I/O). + * + * The file may be unmapped by the time we get here so we cannot + * reliably fail the I/O based on mapping. Instead, fail the I/O if this + * is a read or a write within eof. Otherwise, carry on but warn as a + * precuation if the file happens to be mapped. + */ + if (direct && imap.br_startblock == DELAYSTARTBLOCK) { + if (!create || offset < i_size_read(VFS_I(ip))) { + WARN_ON_ONCE(1); + error = -EIO; + goto out_unlock; + } + WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping)); + } + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || @@ -1525,7 +1545,6 @@ __xfs_get_blocks( set_buffer_new(bh_result); if (imap.br_startblock == DELAYSTARTBLOCK) { - BUG_ON(direct); if (create) { set_buffer_uptodate(bh_result); set_buffer_mapped(bh_result); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 85a868ccb493..8397dc235e84 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -16,6 +16,7 @@ #ifdef CONFIG_CPUSETS +extern struct static_key cpusets_pre_enable_key; extern struct static_key cpusets_enabled_key; static inline bool cpusets_enabled(void) { @@ -30,12 +31,14 @@ static inline int nr_cpusets(void) static inline void cpuset_inc(void) { + static_key_slow_inc(&cpusets_pre_enable_key); static_key_slow_inc(&cpusets_enabled_key); } static inline void cpuset_dec(void) { static_key_slow_dec(&cpusets_enabled_key); + static_key_slow_dec(&cpusets_pre_enable_key); } extern int cpuset_init(void); @@ -104,7 +107,7 @@ extern void cpuset_print_current_mems_allowed(void); */ static inline unsigned int read_mems_allowed_begin(void) { - if (!cpusets_enabled()) + if (!static_key_false(&cpusets_pre_enable_key)) return 0; return read_seqcount_begin(¤t->mems_allowed_seq); @@ -118,7 +121,7 @@ static inline unsigned int read_mems_allowed_begin(void) */ static inline bool read_mems_allowed_retry(unsigned int seq) { - if (!cpusets_enabled()) + if (!static_key_false(&cpusets_enabled_key)) return false; return read_seqcount_retry(¤t->mems_allowed_seq, seq); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 702b6c53c12f..0acbb85ff9ff 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -616,5 +616,11 @@ static inline struct inode *d_real_inode(struct dentry *dentry) return d_backing_inode(d_real(dentry)); } +struct name_snapshot { + const char *name; + char inline_name[DNAME_INLINE_LEN]; +}; +void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); +void release_dentry_name_snapshot(struct name_snapshot *); #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/device.h b/include/linux/device.h index b8f411b57dcb..7075a2485ed3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -368,6 +368,7 @@ int subsys_virtual_register(struct bus_type *subsys, * @suspend: Used to put the device to sleep mode, usually to a low power * state. * @resume: Used to bring the device from the sleep mode. + * @shutdown: Called at shut-down time to quiesce the device. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. * @pm: The default device power management operations of this class. @@ -396,6 +397,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + int (*shutdown)(struct device *dev); const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(struct device *dev); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 7ee1774edee5..a7789559078b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -310,35 +310,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) } } -#if defined(CONFIG_FSNOTIFY) /* notify helpers */ - -/* - * fsnotify_oldname_init - save off the old filename before we change it - */ -static inline const unsigned char *fsnotify_oldname_init(const unsigned char *name) -{ - return kstrdup(name, GFP_KERNEL); -} - -/* - * fsnotify_oldname_free - free the name we got from fsnotify_oldname_init - */ -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ - kfree(old_name); -} - -#else /* CONFIG_FSNOTIFY */ - -static inline const char *fsnotify_oldname_init(const unsigned char *name) -{ - return NULL; -} - -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ -} - -#endif /* CONFIG_FSNOTIFY */ - #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1c1ff7e4faa4..021b1e9ff6cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,8 @@ #include <net/net_namespace.h> #include <linux/sched/rt.h> +#include <asm/thread_info.h> + #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), @@ -183,14 +185,21 @@ extern struct task_group root_task_group; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +#else +# define INIT_TASK_TI(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK(tsk) \ { \ + INIT_TASK_TI(tsk) \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ diff --git a/include/linux/kdb.h b/include/linux/kdb.h index a19bcf9e762e..410decacff8f 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -177,7 +177,7 @@ extern int kdb_get_kbd_char(void); static inline int kdb_process_cpu(const struct task_struct *p) { - unsigned int cpu = task_thread_info(p)->cpu; + unsigned int cpu = task_cpu(p); if (cpu > num_possible_cpus()) cpu = 0; return cpu; diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 2a6b9947aaa3..743b34f56f2b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -44,6 +44,7 @@ struct list_lru_node { /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg *memcg_lrus; #endif + long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index af25bc12472b..b32cb7add09c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -511,6 +511,10 @@ struct mm_struct { */ bool tlb_flush_pending; #endif +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; +#endif struct uprobes_state uprobes_state; #ifdef CONFIG_X86_INTEL_MPX /* address of the bounds directory */ diff --git a/include/linux/module.h b/include/linux/module.h index b229a9961d02..2bb0c3085706 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -302,6 +302,28 @@ struct mod_tree_node { struct latch_tree_node node; }; +struct module_layout { + /* The actual code + data. */ + void *base; + /* Total size. */ + unsigned int size; + /* The size of the executable code. */ + unsigned int text_size; + /* Size of RO section of the module (text+rodata) */ + unsigned int ro_size; + +#ifdef CONFIG_MODULES_TREE_LOOKUP + struct mod_tree_node mtn; +#endif +}; + +#ifdef CONFIG_MODULES_TREE_LOOKUP +/* Only touch one cacheline for common rbtree-for-core-layout case. */ +#define __module_layout_align ____cacheline_aligned +#else +#define __module_layout_align +#endif + struct mod_kallsyms { Elf_Sym *symtab; unsigned int num_symtab; @@ -372,37 +394,9 @@ struct module { /* Startup function. */ int (*init)(void); - /* - * If this is non-NULL, vfree() after init() returns. - * - * Cacheline align here, such that: - * module_init, module_core, init_size, core_size, - * init_text_size, core_text_size and mtn_core::{mod,node[0]} - * are on the same cacheline. - */ - void *module_init ____cacheline_aligned; - - /* Here is the actual code + data, vfree'd on unload. */ - void *module_core; - - /* Here are the sizes of the init and core sections */ - unsigned int init_size, core_size; - - /* The size of the executable code in each section. */ - unsigned int init_text_size, core_text_size; - -#ifdef CONFIG_MODULES_TREE_LOOKUP - /* - * We want mtn_core::{mod,node[0]} to be in the same cacheline as the - * above entries such that a regular lookup will only touch one - * cacheline. - */ - struct mod_tree_node mtn_core; - struct mod_tree_node mtn_init; -#endif - - /* Size of RO sections of the module (text+rodata) */ - unsigned int init_ro_size, core_ro_size; + /* Core layout: rbtree is accessed frequently, so keep together. */ + struct module_layout core_layout __module_layout_align; + struct module_layout init_layout; /* Arch-specific module values */ struct mod_arch_specific arch; @@ -420,7 +414,7 @@ struct module { /* Protected by RCU and/or module_mutex: use rcu_dereference() */ struct mod_kallsyms *kallsyms; struct mod_kallsyms core_kallsyms; - + /* Section attributes */ struct module_sect_attrs *sect_attrs; @@ -506,15 +500,15 @@ bool is_module_text_address(unsigned long addr); static inline bool within_module_core(unsigned long addr, const struct module *mod) { - return (unsigned long)mod->module_core <= addr && - addr < (unsigned long)mod->module_core + mod->core_size; + return (unsigned long)mod->core_layout.base <= addr && + addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; } static inline bool within_module_init(unsigned long addr, const struct module *mod) { - return (unsigned long)mod->module_init <= addr && - addr < (unsigned long)mod->module_init + mod->init_size; + return (unsigned long)mod->init_layout.base <= addr && + addr < (unsigned long)mod->init_layout.base + mod->init_layout.size; } static inline bool within_module(unsigned long addr, const struct module *mod) @@ -769,9 +763,13 @@ extern int module_sysfs_initialized; #ifdef CONFIG_DEBUG_SET_MODULE_RONX extern void set_all_modules_text_rw(void); extern void set_all_modules_text_ro(void); +extern void module_enable_ro(const struct module *mod); +extern void module_disable_ro(const struct module *mod); #else static inline void set_all_modules_text_rw(void) { } static inline void set_all_modules_text_ro(void) { } +static inline void module_enable_ro(const struct module *mod) { } +static inline void module_disable_ro(const struct module *mod) { } #endif #ifdef CONFIG_GENERIC_BUG diff --git a/include/linux/phy.h b/include/linux/phy.h index f618083d8877..6f273dd5cb4b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -788,6 +788,10 @@ int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); +static inline int genphy_no_soft_reset(struct phy_device *phydev) +{ + return 0; +} void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver); diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 45ac5a0d29ee..7097a45dbc25 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -24,6 +24,13 @@ #include <linux/list.h> #include <linux/types.h> +/* + * Choose whether access to the RAM zone requires locking or not. If a zone + * can be written to from different CPUs like with ftrace for example, then + * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. + */ +#define PRZ_FLAG_NO_LOCK BIT(0) + struct persistent_ram_buffer; struct rs_control; @@ -40,6 +47,8 @@ struct persistent_ram_zone { void *vaddr; struct persistent_ram_buffer *buffer; size_t buffer_size; + u32 flags; + raw_spinlock_t buffer_lock; /* ECC correction */ char *par_buffer; @@ -55,7 +64,7 @@ struct persistent_ram_zone { struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype); + unsigned int memtype, u32 flags); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h new file mode 100644 index 000000000000..0d905d8ec553 --- /dev/null +++ b/include/linux/restart_block.h @@ -0,0 +1,51 @@ +/* + * Common syscall restarting data + */ +#ifndef __LINUX_RESTART_BLOCK_H +#define __LINUX_RESTART_BLOCK_H + +#include <linux/compiler.h> +#include <linux/types.h> + +struct timespec; +struct compat_timespec; +struct pollfd; + +/* + * System call restart block. + */ +struct restart_block { + long (*fn)(struct restart_block *); + union { + /* For futex_wait and futex_wait_requeue_pi */ + struct { + u32 __user *uaddr; + u32 val; + u32 flags; + u32 bitset; + u64 time; + u32 __user *uaddr2; + } futex; + /* For nanosleep */ + struct { + clockid_t clockid; + struct timespec __user *rmtp; +#ifdef CONFIG_COMPAT + struct compat_timespec __user *compat_rmtp; +#endif + u64 expires; + } nanosleep; + /* For poll */ + struct { + struct pollfd __user *ufds; + int nfds; + int has_timeout; + unsigned long tv_sec; + unsigned long tv_nsec; + } poll; + }; +}; + +extern long do_no_restart_syscall(struct restart_block *parm); + +#endif /* __LINUX_RESTART_BLOCK_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ad2c304b29b8..436c308bb1d5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -813,6 +813,16 @@ struct signal_struct { #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { @@ -1518,6 +1528,13 @@ struct tlbflush_unmap_batch { }; struct task_struct { +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* + * For reasons of header soup (see current_thread_info()), this + * must be the first element of task_struct. + */ + struct thread_info thread_info; +#endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; atomic_t usage; @@ -1527,6 +1544,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; +#ifdef CONFIG_THREAD_INFO_IN_TASK + unsigned int cpu; /* current CPU */ +#endif unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -2556,7 +2576,9 @@ extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; +#endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; @@ -2946,10 +2968,34 @@ static inline void threadgroup_change_end(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); } -#ifndef __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_THREAD_INFO_IN_TASK + +static inline struct thread_info *task_thread_info(struct task_struct *task) +{ + return &task->thread_info; +} + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} + +#define setup_thread_stack(new,old) do { } while(0) + +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) #define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((task)->stack) +#define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { @@ -2976,6 +3022,14 @@ static inline unsigned long *end_of_stack(struct task_struct *p) } #endif + +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) @@ -2986,7 +3040,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) @@ -3241,7 +3295,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) static inline unsigned int task_cpu(const struct task_struct *p) { +#ifdef CONFIG_THREAD_INFO_IN_TASK + return p->cpu; +#else return task_thread_info(p)->cpu; +#endif } static inline int task_node(const struct task_struct *p) diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dde00defbaa5..f3d45dd42695 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,13 +7,10 @@ #include <linux/mutex.h> #include <linux/cpumask.h> #include <linux/nodemask.h> +#include <linux/fs.h> +#include <linux/cred.h> struct seq_operations; -struct file; -struct path; -struct inode; -struct dentry; -struct user_namespace; struct seq_file { char *buf; @@ -27,9 +24,7 @@ struct seq_file { struct mutex lock; const struct seq_operations *op; int poll_event; -#ifdef CONFIG_USER_NS - struct user_namespace *user_ns; -#endif + const struct file *file; void *private; }; @@ -147,7 +142,7 @@ int seq_release_private(struct inode *, struct file *); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS - return seq->user_ns; + return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; diff --git a/include/linux/slab.h b/include/linux/slab.h index 4ef384b172e0..b4e739f04ee6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -215,7 +215,7 @@ static inline const char *__check_heap_object(const void *ptr, * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif @@ -228,7 +228,7 @@ static inline const char *__check_heap_object(const void *ptr, * be allocated from the same page. */ #define KMALLOC_SHIFT_HIGH PAGE_SHIFT -#define KMALLOC_SHIFT_MAX 30 +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 4cf89517783a..8933ecc2bc9f 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -9,46 +9,17 @@ #include <linux/types.h> #include <linux/bug.h> +#include <linux/restart_block.h> -struct timespec; -struct compat_timespec; - +#ifdef CONFIG_THREAD_INFO_IN_TASK /* - * System call restart block. + * For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the + * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels, + * including <asm/current.h> can cause a circular dependency on some platforms. */ -struct restart_block { - long (*fn)(struct restart_block *); - union { - /* For futex_wait and futex_wait_requeue_pi */ - struct { - u32 __user *uaddr; - u32 val; - u32 flags; - u32 bitset; - u64 time; - u32 __user *uaddr2; - } futex; - /* For nanosleep */ - struct { - clockid_t clockid; - struct timespec __user *rmtp; -#ifdef CONFIG_COMPAT - struct compat_timespec __user *compat_rmtp; +#include <asm/current.h> +#define current_thread_info() ((struct thread_info *)current) #endif - u64 expires; - } nanosleep; - /* For poll */ - struct { - struct pollfd __user *ufds; - int nfds; - int has_timeout; - unsigned long tv_sec; - unsigned long tv_nsec; - } poll; - }; -}; - -extern long do_no_restart_syscall(struct restart_block *parm); #include <linux/bitops.h> #include <asm/thread_info.h> diff --git a/include/linux/tick.h b/include/linux/tick.h index e312219ff823..af5ac7f91a3b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -103,6 +103,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern unsigned long tick_nohz_get_idle_calls(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ddb440975382..34851bf2e2c8 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -85,6 +85,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern bool vfio_external_group_match_file(struct vfio_group *group, + struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0197358f1e81..262d5c95dfc8 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -311,6 +311,7 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -408,7 +409,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ + __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 814a13d22df6..f9bdfb096579 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -21,6 +21,7 @@ struct route_info { #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> +#include <net/lwtunnel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> @@ -209,4 +210,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } +static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +{ + return a->dst.dev == b->dst.dev && + a->rt6i_idev == b->rt6i_idev && + ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && + !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); +} #endif diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index e0f4109e64c6..c2aa73e5e6bb 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -556,7 +556,8 @@ iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, memcpy(stream + lcp_len, ((char *) &iwe->u) + IW_EV_POINT_OFF, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); - memcpy(stream + point_len, extra, iwe->u.data.length); + if (iwe->u.data.length && extra) + memcpy(stream + point_len, extra, iwe->u.data.length); stream += event_len; } return stream; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ce13cf20f625..d33b17ba51d2 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -444,6 +444,8 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ + (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ pos.v += WORD_ROUND(ntohs(pos.p->length))) @@ -454,6 +456,8 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ + (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 373d3342002b..22f442ab85f9 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -64,6 +64,14 @@ #define TA_DEFAULT_FABRIC_PROT_TYPE 0 /* TPG status needs to be enabled to return sendtargets discovery endpoint info */ #define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1 +/* + * Used to control the sending of keys with optional to respond state bit, + * as a workaround for non RFC compliant initiators,that do not propose, + * nor respond to specific keys required for login to complete. + * + * See iscsi_check_proposer_for_optional_reply() for more details. + */ +#define TA_DEFAULT_LOGIN_KEYS_WORKAROUND 1 #define ISCSI_IOV_DATA_BUFFER 5 @@ -554,6 +562,7 @@ struct iscsi_conn { #define LOGIN_FLAGS_READ_ACTIVE 1 #define LOGIN_FLAGS_CLOSED 2 #define LOGIN_FLAGS_READY 4 +#define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; struct delayed_work login_cleanup_work; @@ -765,6 +774,7 @@ struct iscsi_tpg_attrib { u8 t10_pi; u32 fabric_prot_type; u32 tpg_enabled_sendtargets; + u32 login_keys_workaround; struct iscsi_portal_group *tpg; }; @@ -774,6 +784,7 @@ struct iscsi_np { int np_sock_type; enum np_thread_state_table np_thread_state; bool enabled; + atomic_t np_reset_count; enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index ed66414b91f0..1adf8739980c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -714,6 +714,7 @@ struct se_lun { #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; u32 lun_access; + bool lun_shutdown; u32 lun_index; /* RELATIVE TARGET PORT IDENTIFER */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 433d3919ba00..8b97464803f1 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -878,6 +878,48 @@ TRACE_EVENT(sched_boost_task, ); /* + * Tracepoint for find_best_target + */ +TRACE_EVENT(sched_find_best_target, + + TP_PROTO(struct task_struct *tsk, bool prefer_idle, + unsigned long min_util, int start_cpu, + int best_idle, int best_active, int target), + + TP_ARGS(tsk, prefer_idle, min_util, start_cpu, + best_idle, best_active, target), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, min_util ) + __field( bool, prefer_idle ) + __field( int, start_cpu ) + __field( int, best_idle ) + __field( int, best_active ) + __field( int, target ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->min_util = min_util; + __entry->prefer_idle = prefer_idle; + __entry->start_cpu = start_cpu; + __entry->best_idle = best_idle; + __entry->best_active = best_active; + __entry->target = target; + ), + + TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d " + "best_idle=%d best_active=%d target=%d", + __entry->pid, __entry->comm, + __entry->prefer_idle, __entry->start_cpu, + __entry->best_idle, __entry->best_active, + __entry->target) +); + +/* * Tracepoint for accounting sched group energy */ TRACE_EVENT(sched_energy_diff, diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 7668b5791c91..5539933b3491 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -37,9 +37,56 @@ enum { BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; -enum { +/** + * enum flat_binder_object_shifts: shift values for flat_binder_object_flags + * @FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT: shift for getting scheduler policy. + * + */ +enum flat_binder_object_shifts { + FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT = 9, +}; + +/** + * enum flat_binder_object_flags - flags for use in flat_binder_object.flags + */ +enum flat_binder_object_flags { + /** + * @FLAT_BINDER_FLAG_PRIORITY_MASK: bit-mask for min scheduler priority + * + * These bits can be used to set the minimum scheduler priority + * at which transactions into this node should run. Valid values + * in these bits depend on the scheduler policy encoded in + * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK. + * + * For SCHED_NORMAL/SCHED_BATCH, the valid range is between [-20..19] + * For SCHED_FIFO/SCHED_RR, the value can run between [1..99] + */ FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff, + /** + * @FLAT_BINDER_FLAG_ACCEPTS_FDS: whether the node accepts fds. + */ FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100, + /** + * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK: bit-mask for scheduling policy + * + * These two bits can be used to set the min scheduling policy at which + * transactions on this node should run. These match the UAPI + * scheduler policy values, eg: + * 00b: SCHED_NORMAL + * 01b: SCHED_FIFO + * 10b: SCHED_RR + * 11b: SCHED_BATCH + */ + FLAT_BINDER_FLAG_SCHED_POLICY_MASK = + 3U << FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT, + + /** + * @FLAT_BINDER_FLAG_INHERIT_RT: whether the node inherits RT policy + * + * Only when set, calls into this node will inherit a real-time + * scheduling policy from the caller (for synchronous transactions). + */ + FLAT_BINDER_FLAG_INHERIT_RT = 0x800, }; #ifdef BINDER_IPC_32BIT @@ -186,6 +233,19 @@ struct binder_version { #define BINDER_CURRENT_PROTOCOL_VERSION 8 #endif +/* + * Use with BINDER_GET_NODE_DEBUG_INFO, driver reads ptr, writes to all fields. + * Set ptr to NULL for the first call to get the info for the first node, and + * then repeat the call passing the previously returned value to get the next + * nodes. ptr will be 0 when there are no more nodes. + */ +struct binder_node_debug_info { + binder_uintptr_t ptr; + binder_uintptr_t cookie; + __u32 has_strong_ref; + __u32 has_weak_ref; +}; + #define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) #define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) @@ -193,6 +253,7 @@ struct binder_version { #define BINDER_SET_CONTEXT_MGR _IOW('b', 7, __s32) #define BINDER_THREAD_EXIT _IOW('b', 8, __s32) #define BINDER_VERSION _IOWR('b', 9, struct binder_version) +#define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) /* * NOTE: Two special error codes you should check for when calling diff --git a/init/Kconfig b/init/Kconfig index 445af1262134..f5500e552254 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -26,6 +26,16 @@ config IRQ_WORK config BUILDTIME_EXTABLE_SORT bool +config THREAD_INFO_IN_TASK + bool + help + Select this to move thread_info off the stack into task_struct. To + make this work, an arch will need to remove all thread_info fields + except flags and fix any runtime bugs. + + One subtle change that will be needed is to use try_get_task_stack() + and put_task_stack() in save_thread_stack_tsk() and get_wchan(). + menu "General setup" config BROKEN diff --git a/init/init_task.c b/init/init_task.c index ba0a7f362d9e..11f83be1fa79 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task); * Initial thread structure. Alignment of this is handled by a special * linker map entry. */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = { +#ifndef CONFIG_THREAD_INFO_IN_TASK + INIT_THREAD_INFO(init_task) +#endif +}; diff --git a/init/main.c b/init/main.c index fbafa271531c..86f5ce9ede86 100644 --- a/init/main.c +++ b/init/main.c @@ -468,7 +468,7 @@ void __init __weak smp_setup_processor_id(void) } # if THREAD_SIZE >= PAGE_SIZE -void __init __weak thread_info_cache_init(void) +void __init __weak thread_stack_cache_init(void) { } #endif @@ -645,7 +645,7 @@ asmlinkage __visible void __init start_kernel(void) /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif - thread_info_cache_init(); + thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 85de5094b936..c97bce6a0e0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -765,6 +765,11 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) if (err) return err; + if (is_pointer_value(env, insn->src_reg)) { + verbose("R%d leaks addr into mem\n", insn->src_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 146292b32673..f93a9f9b4b97 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -60,6 +60,7 @@ #include <linux/cgroup.h> #include <linux/wait.h> +struct static_key cpusets_pre_enable_key __read_mostly = STATIC_KEY_INIT_FALSE; struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; /* See "Frequency meter" comments, below. */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4121345498e0..2a20c0dfdafc 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2021,7 +2021,7 @@ static int kdb_lsmod(int argc, const char **argv) continue; kdb_printf("%-20s%8u 0x%p ", mod->name, - mod->core_size, (void *)mod); + mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); #endif @@ -2031,7 +2031,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->module_core); + kdb_printf(" 0x%p", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { diff --git a/kernel/events/core.c b/kernel/events/core.c index 062564729284..3421b1271970 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6540,21 +6540,6 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) -{ - /* - * Due to interrupt latency (AKA "skid"), we may enter the - * kernel before taking an overflow, even if the PMU is only - * counting user events. - * To avoid leaking information to userspace, we must always - * reject kernel samples when exclude_kernel is set. - */ - if (event->attr.exclude_kernel && !user_mode(regs)) - return false; - - return true; -} - /* * Generic event overflow handling, sampling. */ @@ -6602,12 +6587,6 @@ static int __perf_event_overflow(struct perf_event *event, } /* - * For security, drop the skid kernel samples if necessary. - */ - if (!sample_is_allowed(event, regs)) - return ret; - - /* * XXX event_limit might not quite work as expected on inherited * events */ diff --git a/kernel/extable.c b/kernel/extable.c index e820ccee9846..4f06fc34313f 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -66,7 +66,7 @@ static inline int init_kernel_text(unsigned long addr) return 0; } -int core_kernel_text(unsigned long addr) +int notrace core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr < (unsigned long)_etext) diff --git a/kernel/fork.c b/kernel/fork.c index 968917653c2c..5ee818516a1c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -147,18 +147,18 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_info(struct thread_info *ti) +void __weak arch_release_thread_stack(unsigned long *stack) { } -#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, @@ -167,29 +167,31 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, return page ? page_address(page) : NULL; } -static inline void free_thread_info(struct thread_info *ti) +static inline void free_thread_stack(unsigned long *stack) { - free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); + struct page *page = virt_to_page(stack); + + __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else -static struct kmem_cache *thread_info_cache; +static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_info(struct thread_info *ti) +static void free_stack(unsigned long *stack) { - kmem_cache_free(thread_info_cache, ti); + kmem_cache_free(thread_stack_cache, stack); } -void thread_info_cache_init(void) +void thread_stack_cache_init(void) { - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); + BUG_ON(thread_stack_cache == NULL); } # endif #endif @@ -212,9 +214,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(struct thread_info *ti, int account) +static void account_kernel_stack(unsigned long *stack, int account) { - struct zone *zone = page_zone(virt_to_page(ti)); + struct zone *zone = page_zone(virt_to_page(stack)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } @@ -222,8 +224,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); - arch_release_thread_info(tsk->stack); - free_thread_info(tsk->stack); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -334,7 +336,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - struct thread_info *ti; + unsigned long *stack; int err; if (node == NUMA_NO_NODE) @@ -343,15 +345,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - ti = alloc_thread_info_node(tsk, node); - if (!ti) + stack = alloc_thread_stack_node(tsk, node); + if (!stack) goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto free_ti; + goto free_stack; - tsk->stack = ti; + tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -383,12 +385,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(ti, 1); + account_kernel_stack(stack, 1); return tsk; -free_ti: - free_thread_info(ti); +free_stack: + free_thread_stack(stack); free_tsk: free_task_struct(tsk); return NULL; @@ -827,8 +829,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mm = get_task_mm(task); if (mm && mm != current->mm && - !ptrace_may_access(task, mode) && - !capable(CAP_SYS_RESOURCE)) { + !ptrace_may_access(task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 7080ae1eb6c1..2f9df37940a0 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -123,11 +123,6 @@ void gcov_enable_events(void) } #ifdef CONFIG_MODULES -static inline int within(void *addr, void *start, unsigned long size) -{ - return ((addr >= start) && (addr < start + size)); -} - /* Update list and generate events when modules are unloaded. */ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, void *data) @@ -142,7 +137,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, /* Remove entries located in module from linked list. */ while ((info = gcov_info_next(info))) { - if (within(info, mod->module_core, mod->core_size)) { + if (within_module((unsigned long)info, mod)) { gcov_info_unlink(prev, info); if (gcov_events_enabled) gcov_event(GCOV_REMOVE, info); diff --git a/kernel/kthread.c b/kernel/kthread.c index 698b8dec3074..d9b0be5c6a5f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -65,7 +65,7 @@ static inline struct kthread *to_kthread(struct task_struct *k) static struct kthread *to_live_kthread(struct task_struct *k) { struct completion *vfork = ACCESS_ONCE(k->vfork_done); - if (likely(vfork)) + if (likely(vfork) && try_get_task_stack(k)) return __to_kthread(vfork); return NULL; } @@ -427,8 +427,10 @@ void kthread_unpark(struct task_struct *k) { struct kthread *kthread = to_live_kthread(k); - if (kthread) + if (kthread) { __kthread_unpark(k, kthread); + put_task_stack(k); + } } EXPORT_SYMBOL_GPL(kthread_unpark); @@ -457,6 +459,7 @@ int kthread_park(struct task_struct *k) wait_for_completion(&kthread->parked); } } + put_task_stack(k); ret = 0; } return ret; @@ -492,6 +495,7 @@ int kthread_stop(struct task_struct *k) __kthread_unpark(k, kthread); wake_up_process(k); wait_for_completion(&kthread->exited); + put_task_stack(k); } ret = k->exit_code; put_task_struct(k); diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 3ef3736002d8..9c951fade415 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) } void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) + struct task_struct *task) { SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); /* Mark the current thread as blocked on the lock: */ - ti->task->blocked_on = waiter; + task->blocked_on = waiter; } void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) + struct task_struct *task) { DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); - DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); - DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); - ti->task->blocked_on = NULL; + DEBUG_LOCKS_WARN_ON(waiter->task != task); + DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); + task->blocked_on = NULL; list_del_init(&waiter->list); waiter->task = NULL; diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 0799fd3e4cfa..d06ae3bb46c5 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock, extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); extern void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); + struct task_struct *task); extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); + struct task_struct *task); extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 89350f924c85..f42f83a36506 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -537,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto skip_wait; debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + debug_mutex_add_waiter(lock, &waiter, task); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); @@ -584,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } __set_task_state(task, TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, current_thread_info()); + mutex_remove_waiter(lock, &waiter, task); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); @@ -605,7 +605,7 @@ skip_wait: return 0; err: - mutex_remove_waiter(lock, &waiter, task_thread_info(task)); + mutex_remove_waiter(lock, &waiter, task); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, 1, ip); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 5cda397607f2..a68bae5e852a 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -13,7 +13,7 @@ do { spin_lock(lock); (void)(flags); } while (0) #define spin_unlock_mutex(lock, flags) \ do { spin_unlock(lock); (void)(flags); } while (0) -#define mutex_remove_waiter(lock, waiter, ti) \ +#define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER diff --git a/kernel/module.c b/kernel/module.c index b14a4f31221f..63c54d644bd4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -80,15 +80,6 @@ # define debug_align(X) (X) #endif -/* - * Given BASE and SIZE this macro calculates the number of pages the - * memory regions occupies - */ -#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ - (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ - PFN_DOWN((unsigned long)BASE) + 1) \ - : (0UL)) - /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -108,13 +99,6 @@ static LIST_HEAD(modules); * Use a latched RB-tree for __module_address(); this allows us to use * RCU-sched lookups of the address from any context. * - * Because modules have two address ranges: init and core, we need two - * latch_tree_nodes entries. Therefore we need the back-pointer from - * mod_tree_node. - * - * Because init ranges are short lived we mark them unlikely and have placed - * them outside the critical cacheline in struct module. - * * This is conditional on PERF_EVENTS || TRACING because those can really hit * __module_address() hard by doing a lot of stack unwinding; potentially from * NMI context. @@ -122,24 +106,16 @@ static LIST_HEAD(modules); static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) { - struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); - struct module *mod = mtn->mod; + struct module_layout *layout = container_of(n, struct module_layout, mtn.node); - if (unlikely(mtn == &mod->mtn_init)) - return (unsigned long)mod->module_init; - - return (unsigned long)mod->module_core; + return (unsigned long)layout->base; } static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) { - struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); - struct module *mod = mtn->mod; - - if (unlikely(mtn == &mod->mtn_init)) - return (unsigned long)mod->init_size; + struct module_layout *layout = container_of(n, struct module_layout, mtn.node); - return (unsigned long)mod->core_size; + return (unsigned long)layout->size; } static __always_inline bool @@ -197,23 +173,23 @@ static void __mod_tree_remove(struct mod_tree_node *node) */ static void mod_tree_insert(struct module *mod) { - mod->mtn_core.mod = mod; - mod->mtn_init.mod = mod; + mod->core_layout.mtn.mod = mod; + mod->init_layout.mtn.mod = mod; - __mod_tree_insert(&mod->mtn_core); - if (mod->init_size) - __mod_tree_insert(&mod->mtn_init); + __mod_tree_insert(&mod->core_layout.mtn); + if (mod->init_layout.size) + __mod_tree_insert(&mod->init_layout.mtn); } static void mod_tree_remove_init(struct module *mod) { - if (mod->init_size) - __mod_tree_remove(&mod->mtn_init); + if (mod->init_layout.size) + __mod_tree_remove(&mod->init_layout.mtn); } static void mod_tree_remove(struct module *mod) { - __mod_tree_remove(&mod->mtn_core); + __mod_tree_remove(&mod->core_layout.mtn); mod_tree_remove_init(mod); } @@ -267,9 +243,9 @@ static void __mod_update_bounds(void *base, unsigned int size) static void mod_update_bounds(struct module *mod) { - __mod_update_bounds(mod->module_core, mod->core_size); - if (mod->init_size) - __mod_update_bounds(mod->module_init, mod->init_size); + __mod_update_bounds(mod->core_layout.base, mod->core_layout.size); + if (mod->init_layout.size) + __mod_update_bounds(mod->init_layout.base, mod->init_layout.size); } #ifdef CONFIG_KGDB_KDB @@ -1217,7 +1193,7 @@ struct module_attribute module_uevent = static ssize_t show_coresize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", mk->mod->core_size); + return sprintf(buffer, "%u\n", mk->mod->core_layout.size); } static struct module_attribute modinfo_coresize = @@ -1226,7 +1202,7 @@ static struct module_attribute modinfo_coresize = static ssize_t show_initsize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", mk->mod->init_size); + return sprintf(buffer, "%u\n", mk->mod->init_layout.size); } static struct module_attribute modinfo_initsize = @@ -1876,64 +1852,75 @@ static void mod_sysfs_teardown(struct module *mod) /* * LKM RO/NX protection: protect module's text/ro-data * from modification and any data from execution. + * + * General layout of module is: + * [text] [read-only-data] [writable data] + * text_size -----^ ^ ^ + * ro_size ------------------------| | + * size -------------------------------------------| + * + * These values are always page-aligned (as is base) */ -void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +static void frob_text(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - unsigned long begin_pfn = PFN_DOWN((unsigned long)start); - unsigned long end_pfn = PFN_DOWN((unsigned long)end); + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base, + layout->text_size >> PAGE_SHIFT); +} - if (end_pfn > begin_pfn) - set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +static void frob_rodata(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) +{ + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base + layout->text_size, + (layout->ro_size - layout->text_size) >> PAGE_SHIFT); } -static void set_section_ro_nx(void *base, - unsigned long text_size, - unsigned long ro_size, - unsigned long total_size) +static void frob_writable_data(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - /* begin and end PFNs of the current subsection */ - unsigned long begin_pfn; - unsigned long end_pfn; + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base + layout->ro_size, + (layout->size - layout->ro_size) >> PAGE_SHIFT); +} - /* - * Set RO for module text and RO-data: - * - Always protect first page. - * - Do not protect last partial page. - */ - if (ro_size > 0) - set_page_attributes(base, base + ro_size, set_memory_ro); +/* livepatching wants to disable read-only so it can frob module. */ +void module_disable_ro(const struct module *mod) +{ + frob_text(&mod->core_layout, set_memory_rw); + frob_rodata(&mod->core_layout, set_memory_rw); + frob_text(&mod->init_layout, set_memory_rw); + frob_rodata(&mod->init_layout, set_memory_rw); +} - /* - * Set NX permissions for module data: - * - Do not protect first partial page. - * - Always protect last page. - */ - if (total_size > text_size) { - begin_pfn = PFN_UP((unsigned long)base + text_size); - end_pfn = PFN_UP((unsigned long)base + total_size); - if (end_pfn > begin_pfn) - set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); - } +void module_enable_ro(const struct module *mod) +{ + frob_text(&mod->core_layout, set_memory_ro); + frob_rodata(&mod->core_layout, set_memory_ro); + frob_text(&mod->init_layout, set_memory_ro); + frob_rodata(&mod->init_layout, set_memory_ro); } -static void unset_module_core_ro_nx(struct module *mod) +static void module_enable_nx(const struct module *mod) { - set_page_attributes(mod->module_core + mod->core_text_size, - mod->module_core + mod->core_size, - set_memory_x); - set_page_attributes(mod->module_core, - mod->module_core + mod->core_ro_size, - set_memory_rw); + frob_rodata(&mod->core_layout, set_memory_nx); + frob_writable_data(&mod->core_layout, set_memory_nx); + frob_rodata(&mod->init_layout, set_memory_nx); + frob_writable_data(&mod->init_layout, set_memory_nx); } -static void unset_module_init_ro_nx(struct module *mod) +static void module_disable_nx(const struct module *mod) { - set_page_attributes(mod->module_init + mod->init_text_size, - mod->module_init + mod->init_size, - set_memory_x); - set_page_attributes(mod->module_init, - mod->module_init + mod->init_ro_size, - set_memory_rw); + frob_rodata(&mod->core_layout, set_memory_x); + frob_writable_data(&mod->core_layout, set_memory_x); + frob_rodata(&mod->init_layout, set_memory_x); + frob_writable_data(&mod->init_layout, set_memory_x); } /* Iterate through all modules and set each module's text as RW */ @@ -1945,16 +1932,9 @@ void set_all_modules_text_rw(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, - set_memory_rw); - } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, - set_memory_rw); - } + + frob_text(&mod->core_layout, set_memory_rw); + frob_text(&mod->init_layout, set_memory_rw); } mutex_unlock(&module_mutex); } @@ -1968,23 +1948,25 @@ void set_all_modules_text_ro(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, - set_memory_ro); - } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, - set_memory_ro); - } + + frob_text(&mod->core_layout, set_memory_ro); + frob_text(&mod->init_layout, set_memory_ro); } mutex_unlock(&module_mutex); } + +static void disable_ro_nx(const struct module_layout *layout) +{ + frob_text(layout, set_memory_rw); + frob_rodata(layout, set_memory_rw); + frob_rodata(layout, set_memory_x); + frob_writable_data(layout, set_memory_x); +} + #else -static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } -static void unset_module_core_ro_nx(struct module *mod) { } -static void unset_module_init_ro_nx(struct module *mod) { } +static void disable_ro_nx(const struct module_layout *layout) { } +static void module_enable_nx(const struct module *mod) { } +static void module_disable_nx(const struct module *mod) { } #endif void __weak module_memfree(void *module_region) @@ -2036,19 +2018,19 @@ static void free_module(struct module *mod) synchronize_sched(); mutex_unlock(&module_mutex); - /* This may be NULL, but that's OK */ - unset_module_init_ro_nx(mod); + /* This may be empty, but that's OK */ + disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); - module_memfree(mod->module_init); + module_memfree(mod->init_layout.base); kfree(mod->args); percpu_modfree(mod); /* Free lock-classes; relies on the preceding sync_rcu(). */ - lockdep_free_key_range(mod->module_core, mod->core_size); + lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); /* Finally, free the core (containing the module structure) */ - unset_module_core_ro_nx(mod); - module_memfree(mod->module_core); + disable_ro_nx(&mod->core_layout); + module_memfree(mod->core_layout.base); #ifdef CONFIG_MPU update_protections(current->mm); @@ -2251,20 +2233,20 @@ static void layout_sections(struct module *mod, struct load_info *info) || s->sh_entsize != ~0UL || strstarts(sname, ".init")) continue; - s->sh_entsize = get_offset(mod, &mod->core_size, s, i); + s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i); pr_debug("\t%s\n", sname); } switch (m) { case 0: /* executable */ - mod->core_size = debug_align(mod->core_size); - mod->core_text_size = mod->core_size; + mod->core_layout.size = debug_align(mod->core_layout.size); + mod->core_layout.text_size = mod->core_layout.size; break; case 1: /* RO: text and ro-data */ - mod->core_size = debug_align(mod->core_size); - mod->core_ro_size = mod->core_size; + mod->core_layout.size = debug_align(mod->core_layout.size); + mod->core_layout.ro_size = mod->core_layout.size; break; case 3: /* whole core */ - mod->core_size = debug_align(mod->core_size); + mod->core_layout.size = debug_align(mod->core_layout.size); break; } } @@ -2280,21 +2262,21 @@ static void layout_sections(struct module *mod, struct load_info *info) || s->sh_entsize != ~0UL || !strstarts(sname, ".init")) continue; - s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) + s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i) | INIT_OFFSET_MASK); pr_debug("\t%s\n", sname); } switch (m) { case 0: /* executable */ - mod->init_size = debug_align(mod->init_size); - mod->init_text_size = mod->init_size; + mod->init_layout.size = debug_align(mod->init_layout.size); + mod->init_layout.text_size = mod->init_layout.size; break; case 1: /* RO: text and ro-data */ - mod->init_size = debug_align(mod->init_size); - mod->init_ro_size = mod->init_size; + mod->init_layout.size = debug_align(mod->init_layout.size); + mod->init_layout.ro_size = mod->init_layout.size; break; case 3: /* whole init */ - mod->init_size = debug_align(mod->init_size); + mod->init_layout.size = debug_align(mod->init_layout.size); break; } } @@ -2404,7 +2386,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } if (sym->st_shndx == SHN_UNDEF) return 'U'; - if (sym->st_shndx == SHN_ABS) + if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu) return 'a'; if (sym->st_shndx >= SHN_LORESERVE) return '?'; @@ -2433,7 +2415,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, - unsigned int shnum) + unsigned int shnum, unsigned int pcpundx) { const Elf_Shdr *sec; @@ -2442,6 +2424,11 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, || !src->st_name) return false; +#ifdef CONFIG_KALLSYMS_ALL + if (src->st_shndx == pcpundx) + return true; +#endif + sec = sechdrs + src->st_shndx; if (!(sec->sh_flags & SHF_ALLOC) #ifndef CONFIG_KALLSYMS_ALL @@ -2469,7 +2456,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; - symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symsect->sh_entsize = get_offset(mod, &mod->init_layout.size, symsect, info->index.sym) | INIT_OFFSET_MASK; pr_debug("\t%s\n", info->secstrings + symsect->sh_name); @@ -2479,30 +2466,31 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { if (i == 0 || - is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, + info->index.pcpu)) { strtab_size += strlen(&info->strtab[src[i].st_name])+1; ndst++; } } /* Append room for core symbols at end of core part. */ - info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); - info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); - mod->core_size += strtab_size; - mod->core_size = debug_align(mod->core_size); + info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1); + info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym); + mod->core_layout.size += strtab_size; + mod->core_layout.size = debug_align(mod->core_layout.size); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; - strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect, info->index.str) | INIT_OFFSET_MASK; pr_debug("\t%s\n", info->secstrings + strsect->sh_name); /* We'll tack temporary mod_kallsyms on the end. */ - mod->init_size = ALIGN(mod->init_size, - __alignof__(struct mod_kallsyms)); - info->mod_kallsyms_init_off = mod->init_size; - mod->init_size += sizeof(struct mod_kallsyms); - mod->init_size = debug_align(mod->init_size); + mod->init_layout.size = ALIGN(mod->init_layout.size, + __alignof__(struct mod_kallsyms)); + info->mod_kallsyms_init_off = mod->init_layout.size; + mod->init_layout.size += sizeof(struct mod_kallsyms); + mod->init_layout.size = debug_align(mod->init_layout.size); } /* @@ -2519,7 +2507,7 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; /* Set up to point into init section. */ - mod->kallsyms = mod->module_init + info->mod_kallsyms_init_off; + mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off; mod->kallsyms->symtab = (void *)symsec->sh_addr; mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); @@ -2532,12 +2520,13 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) = elf_type(&mod->kallsyms->symtab[i], info); /* Now populate the cut down core kallsyms for after init. */ - mod->core_kallsyms.symtab = dst = mod->module_core + info->symoffs; - mod->core_kallsyms.strtab = s = mod->module_core + info->stroffs; + mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs; + mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs; src = mod->kallsyms->symtab; for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) { if (i == 0 || - is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, + info->index.pcpu)) { dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name], @@ -2988,7 +2977,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_size); + ptr = module_alloc(mod->core_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2998,11 +2987,11 @@ static int move_module(struct module *mod, struct load_info *info) if (!ptr) return -ENOMEM; - memset(ptr, 0, mod->core_size); - mod->module_core = ptr; + memset(ptr, 0, mod->core_layout.size); + mod->core_layout.base = ptr; - if (mod->init_size) { - ptr = module_alloc(mod->init_size); + if (mod->init_layout.size) { + ptr = module_alloc(mod->init_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. This block doesn't need to be @@ -3011,13 +3000,13 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_memfree(mod->module_core); + module_memfree(mod->core_layout.base); return -ENOMEM; } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; + memset(ptr, 0, mod->init_layout.size); + mod->init_layout.base = ptr; } else - mod->module_init = NULL; + mod->init_layout.base = NULL; /* Transfer each section which specifies SHF_ALLOC */ pr_debug("final section addresses:\n"); @@ -3029,10 +3018,10 @@ static int move_module(struct module *mod, struct load_info *info) continue; if (shdr->sh_entsize & INIT_OFFSET_MASK) - dest = mod->module_init + dest = mod->init_layout.base + (shdr->sh_entsize & ~INIT_OFFSET_MASK); else - dest = mod->module_core + shdr->sh_entsize; + dest = mod->core_layout.base + shdr->sh_entsize; if (shdr->sh_type != SHT_NOBITS) memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); @@ -3094,12 +3083,12 @@ static void flush_module_icache(const struct module *mod) * Do it before processing of module parameters, so the module * can provide parameter accessor functions of its own. */ - if (mod->module_init) - flush_icache_range((unsigned long)mod->module_init, - (unsigned long)mod->module_init - + mod->init_size); - flush_icache_range((unsigned long)mod->module_core, - (unsigned long)mod->module_core + mod->core_size); + if (mod->init_layout.base) + flush_icache_range((unsigned long)mod->init_layout.base, + (unsigned long)mod->init_layout.base + + mod->init_layout.size); + flush_icache_range((unsigned long)mod->core_layout.base, + (unsigned long)mod->core_layout.base + mod->core_layout.size); set_fs(old_fs); } @@ -3157,8 +3146,8 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); - module_memfree(mod->module_init); - module_memfree(mod->module_core); + module_memfree(mod->init_layout.base); + module_memfree(mod->core_layout.base); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3245,7 +3234,7 @@ static noinline int do_init_module(struct module *mod) ret = -ENOMEM; goto fail; } - freeinit->module_init = mod->module_init; + freeinit->module_init = mod->init_layout.base; /* * We want to find out whether @mod uses async during init. Clear @@ -3302,12 +3291,12 @@ static noinline int do_init_module(struct module *mod) rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif mod_tree_remove_init(mod); - unset_module_init_ro_nx(mod); + disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); - mod->module_init = NULL; - mod->init_size = 0; - mod->init_ro_size = 0; - mod->init_text_size = 0; + mod->init_layout.base = NULL; + mod->init_layout.size = 0; + mod->init_layout.ro_size = 0; + mod->init_layout.text_size = 0; /* * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we @@ -3396,17 +3385,9 @@ static int complete_formation(struct module *mod, struct load_info *info) /* This relies on module_mutex for list integrity. */ module_bug_finalize(info->hdr, info->sechdrs, mod); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); - - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); + /* Set RO and NX regions */ + module_enable_ro(mod); + module_enable_nx(mod); /* Mark state as coming so strong_try_module_get() ignores us, * but kallsyms etc. can see us. */ @@ -3571,8 +3552,8 @@ static int load_module(struct load_info *info, const char __user *uargs, MODULE_STATE_GOING, mod); /* we can't deallocate the module until we clear memory protection */ - unset_module_init_ro_nx(mod); - unset_module_core_ro_nx(mod); + module_disable_ro(mod); + module_disable_nx(mod); ddebug_cleanup: dynamic_debug_remove(info->debug); @@ -3601,7 +3582,7 @@ static int load_module(struct load_info *info, const char __user *uargs, */ ftrace_release_mod(mod); /* Free lock-classes; relies on the preceding sync_rcu() */ - lockdep_free_key_range(mod->module_core, mod->core_size); + lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); module_deallocate(mod, info); free_copy: @@ -3685,9 +3666,9 @@ static const char *get_ksymbol(struct module *mod, /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) - nextval = (unsigned long)mod->module_init+mod->init_text_size; + nextval = (unsigned long)mod->init_layout.base+mod->init_layout.text_size; else - nextval = (unsigned long)mod->module_core+mod->core_text_size; + nextval = (unsigned long)mod->core_layout.base+mod->core_layout.text_size; /* Scan for closest preceding symbol, and next symbol. (ELF starts real symbols at 1). */ @@ -3940,7 +3921,7 @@ static int m_show(struct seq_file *m, void *p) return 0; seq_printf(m, "%s %u", - mod->name, mod->init_size + mod->core_size); + mod->name, mod->init_layout.size + mod->core_layout.size); print_unload_info(m, mod); /* Informative for users. */ @@ -3949,7 +3930,7 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%pK", mod->module_core); + seq_printf(m, " 0x%pK", mod->core_layout.base); /* Taints info */ if (mod->taints) @@ -4092,8 +4073,8 @@ struct module *__module_text_address(unsigned long addr) struct module *mod = __module_address(addr); if (mod) { /* Make sure it's within the text section. */ - if (!within(addr, mod->module_init, mod->init_text_size) - && !within(addr, mod->module_core, mod->core_text_size)) + if (!within(addr, mod->init_layout.base, mod->init_layout.text_size) + && !within(addr, mod->core_layout.base, mod->core_layout.text_size)) mod = NULL; } return mod; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1a698158face..b4573b55b435 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3176,9 +3176,8 @@ void show_regs_print_info(const char *log_lvl) { dump_stack_print_info(log_lvl); - printk("%stask: %p ti: %p task.ti: %p\n", - log_lvl, current, current_thread_info(), - task_thread_info(current)); + printk("%stask: %p task.stack: %p\n", + log_lvl, current, task_stack_page(current)); } #endif diff --git a/kernel/resource.c b/kernel/resource.c index 249b1eb1e6e1..a4a94e700fb9 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -105,16 +105,25 @@ static int r_show(struct seq_file *m, void *v) { struct resource *root = m->private; struct resource *r = v, *p; + unsigned long long start, end; int width = root->end < 0x10000 ? 4 : 8; int depth; for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) if (p->parent == root) break; + + if (file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) { + start = r->start; + end = r->end; + } else { + start = end = 0; + } + seq_printf(m, "%*s%0*llx-%0*llx : %s\n", depth * 2, "", - width, (unsigned long long) r->start, - width, (unsigned long long) r->end, + width, start, + width, end, r->name ? r->name : "<BAD>"); return 0; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e353de860bfd..9307827cc7b1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5788,7 +5788,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) walt_set_window_start(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); rq->calc_load_update = calc_load_update; - account_reset_rq(rq); break; case CPU_ONLINE: @@ -6386,6 +6385,9 @@ enum s_alloc { * Build an iteration mask that can exclude certain CPUs from the upwards * domain traversal. * + * Only CPUs that can arrive at this group should be considered to continue + * balancing. + * * Asymmetric node setups can result in situations where the domain tree is of * unequal depth, make sure to skip domains that already cover the entire * range. @@ -6397,18 +6399,31 @@ enum s_alloc { */ static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) { - const struct cpumask *span = sched_domain_span(sd); + const struct cpumask *sg_span = sched_group_cpus(sg); struct sd_data *sdd = sd->private; struct sched_domain *sibling; int i; - for_each_cpu(i, span) { + for_each_cpu(i, sg_span) { sibling = *per_cpu_ptr(sdd->sd, i); - if (!cpumask_test_cpu(i, sched_domain_span(sibling))) + + /* + * Can happen in the asymmetric case, where these siblings are + * unused. The mask will not be empty because those CPUs that + * do have the top domain _should_ span the domain. + */ + if (!sibling->child) + continue; + + /* If we would not end up here, we can't continue from here */ + if (!cpumask_equal(sg_span, sched_domain_span(sibling->child))) continue; cpumask_set_cpu(i, sched_group_mask(sg)); } + + /* We must not have empty masks here */ + WARN_ON_ONCE(cpumask_empty(sched_group_mask(sg))); } /* @@ -8609,11 +8624,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); - sched_online_group(tg, parent); - return &tg->css; } +/* Expose task group only after completing cgroup initialization */ +static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + struct task_group *parent = css_tg(css->parent); + + if (parent) + sched_online_group(tg, parent); + return 0; +} + static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -8988,6 +9012,7 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, + .css_online = cpu_cgroup_css_online, .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, .fork = cpu_cgroup_fork, diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 75bfbb336722..e12309c1b07b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -47,6 +47,7 @@ struct sugov_policy { s64 up_rate_delay_ns; s64 down_rate_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -63,7 +64,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -72,6 +72,11 @@ struct sugov_cpu { unsigned long util; unsigned long max; unsigned int flags; + + /* The field below is for single-CPU policies only. */ +#ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +#endif }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -127,22 +132,20 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, if (sugov_up_down_rate_limit(sg_policy, time, next_freq)) return; + if (sg_policy->next_freq == next_freq) + return; + + sg_policy->next_freq = next_freq; + sg_policy->last_freq_update_time = time; + if (policy->fast_switch_enabled) { - if (sg_policy->next_freq == next_freq) { - trace_cpu_frequency(policy->cur, smp_processor_id()); - return; - } - sg_policy->next_freq = next_freq; - sg_policy->last_freq_update_time = time; next_freq = cpufreq_driver_fast_switch(policy, next_freq); if (next_freq == CPUFREQ_ENTRY_INVALID) return; policy->cur = next_freq; trace_cpu_frequency(next_freq, smp_processor_id()); - } else if (sg_policy->next_freq != next_freq) { - sg_policy->next_freq = next_freq; - sg_policy->last_freq_update_time = time; + } else { sg_policy->work_in_progress = true; irq_work_queue(&sg_policy->irq_work); } @@ -150,7 +153,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -170,19 +173,18 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -248,6 +250,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, sg_cpu->iowait_boost >>= 1; } +#ifdef CONFIG_NO_HZ_COMMON +static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) +{ + unsigned long idle_calls = tick_nohz_get_idle_calls(); + bool ret = idle_calls == sg_cpu->saved_idle_calls; + + sg_cpu->saved_idle_calls = idle_calls; + return ret; +} +#else +static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } +#endif /* CONFIG_NO_HZ_COMMON */ + static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int flags) { @@ -256,6 +271,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, struct cpufreq_policy *policy = sg_policy->policy; unsigned long util, max; unsigned int next_f; + bool busy; sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -263,40 +279,37 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; + busy = sugov_cpu_is_busy(sg_cpu); + if (flags & SCHED_CPUFREQ_DL) { next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max, time); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); + /* + * Do not reduce the frequency if the CPU has not been idle + * recently, as the reduction is likely to be premature then. + */ + if (busy && next_f < sg_policy->next_freq) + next_f = sg_policy->next_freq; } sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max, - unsigned int flags) +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned int max_f = policy->cpuinfo.max_freq; u64 last_freq_update_time = sg_policy->last_freq_update_time; + unsigned long util = 0, max = 1; unsigned int j; - if (flags & SCHED_CPUFREQ_DL) - return max_f; - - sugov_iowait_boost(sg_cpu, &util, &max); - for_each_cpu(j, policy->cpus) { - struct sugov_cpu *j_sg_cpu; + struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); unsigned long j_util, j_max; s64 delta_ns; - if (j == smp_processor_id()) - continue; - - j_sg_cpu = &per_cpu(sugov_cpu, j); /* * If the CPU utilization was last updated before the previous * frequency update and the time elapsed between the last update @@ -310,7 +323,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_DL) - return max_f; + return policy->cpuinfo.max_freq; j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; @@ -322,7 +335,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, @@ -345,7 +358,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); + if (flags & SCHED_CPUFREQ_DL) + next_f = sg_policy->policy->cpuinfo.max_freq; + else + next_f = sugov_next_freq_shared(sg_cpu); + sugov_update_commit(sg_policy, time, next_f); } @@ -371,15 +388,15 @@ static void sugov_irq_work(struct irq_work *irq_work) sg_policy = container_of(irq_work, struct sugov_policy, irq_work); /* - * For Real Time and Deadline tasks, schedutil governor shoots the - * frequency to maximum. And special care must be taken to ensure that - * this kthread doesn't result in that. + * For RT and deadline tasks, the schedutil governor shoots the + * frequency to maximum. Special care must be taken to ensure that this + * kthread doesn't result in the same behavior. * * This is (mostly) guaranteed by the work_in_progress flag. The flag is - * updated only at the end of the sugov_work() and before that schedutil - * rejects all other frequency scaling requests. + * updated only at the end of the sugov_work() function and before that + * the schedutil governor rejects all other frequency scaling requests. * - * Though there is a very rare case where the RT thread yields right + * There is a very rare case though, where the RT thread yields right * after the work_in_progress flag is cleared. The effects of that are * neglected for now. */ @@ -489,15 +506,12 @@ static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) return NULL; sg_policy->policy = policy; - init_irq_work(&sg_policy->irq_work, sugov_irq_work); - mutex_init(&sg_policy->work_lock); raw_spin_lock_init(&sg_policy->update_lock); return sg_policy; } static void sugov_policy_free(struct sugov_policy *sg_policy) { - mutex_destroy(&sg_policy->work_lock); kfree(sg_policy); } @@ -531,6 +545,9 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) sg_policy->thread = thread; kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); + mutex_init(&sg_policy->work_lock); + wake_up_process(thread); return 0; @@ -544,6 +561,7 @@ static void sugov_kthread_stop(struct sugov_policy *sg_policy) flush_kthread_worker(&sg_policy->worker); kthread_stop(sg_policy->thread); + mutex_destroy(&sg_policy->work_lock); } static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) @@ -578,9 +596,13 @@ static int sugov_init(struct cpufreq_policy *policy) if (policy->governor_data) return -EBUSY; + cpufreq_enable_fast_switch(policy); + sg_policy = sugov_policy_alloc(policy); - if (!sg_policy) - return -ENOMEM; + if (!sg_policy) { + ret = -ENOMEM; + goto disable_fast_switch; + } ret = sugov_kthread_create(sg_policy); if (ret) @@ -623,13 +645,11 @@ static int sugov_init(struct cpufreq_policy *policy) if (ret) goto fail; - out: +out: mutex_unlock(&global_tunables_lock); - - cpufreq_enable_fast_switch(policy); return 0; - fail: +fail: policy->governor_data = NULL; sugov_tunables_free(tunables); @@ -640,6 +660,10 @@ free_sg_policy: mutex_unlock(&global_tunables_lock); sugov_policy_free(sg_policy); + +disable_fast_switch: + cpufreq_disable_fast_switch(policy); + pr_err("initialization failed (error %d)\n", ret); return ret; } @@ -650,8 +674,6 @@ static int sugov_exit(struct cpufreq_policy *policy) struct sugov_tunables *tunables = sg_policy->tunables; unsigned int count; - cpufreq_disable_fast_switch(policy); - mutex_lock(&global_tunables_lock); count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); @@ -664,6 +686,7 @@ static int sugov_exit(struct cpufreq_policy *policy) sugov_kthread_stop(sg_policy); sugov_policy_free(sg_policy); + cpufreq_disable_fast_switch(policy); return 0; } @@ -681,25 +704,19 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_DL; - sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } + sg_cpu->flags = SCHED_CPUFREQ_DL; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + policy_is_shared(policy) ? + sugov_update_shared : + sugov_update_single); } return 0; } @@ -714,9 +731,10 @@ static int sugov_stop(struct cpufreq_policy *policy) synchronize_sched(); - irq_work_sync(&sg_policy->irq_work); - kthread_cancel_work_sync(&sg_policy->work); - + if (!policy->fast_switch_enabled) { + irq_work_sync(&sg_policy->irq_work); + kthread_cancel_work_sync(&sg_policy->work); + } return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 07864e77856c..4b7bbd32b486 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5396,9 +5396,11 @@ static int find_new_capacity(struct energy_env *eenv, return idx; } -static int group_idle_state(struct sched_group *sg) +static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) { int i, state = INT_MAX; + int src_in_grp, dst_in_grp; + long grp_util = 0; /* Find the shallowest idle state in the sched group. */ for_each_cpu(i, sched_group_cpus(sg)) @@ -5407,6 +5409,54 @@ static int group_idle_state(struct sched_group *sg) /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; + /* + * Try to estimate if a deeper idle state is + * achievable when we move the task. + */ + for_each_cpu(i, sched_group_cpus(sg)) + grp_util += cpu_util(i); + + src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg)); + dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg)); + if (src_in_grp == dst_in_grp) { + /* both CPUs under consideration are in the same group or not in + * either group, migration should leave idle state the same. + */ + goto end; + } + /* add or remove util as appropriate to indicate what group util + * will be (worst case - no concurrent execution) after moving the task + */ + grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta; + + if (grp_util <= + ((long)sg->sgc->max_capacity * (int)sg->group_weight)) { + /* after moving, this group is at most partly + * occupied, so it should have some idle time. + */ + int max_idle_state_idx = sg->sge->nr_idle_states - 2; + int new_state = grp_util * max_idle_state_idx; + if (grp_util <= 0) + /* group will have no util, use lowest state */ + new_state = max_idle_state_idx + 1; + else { + /* for partially idle, linearly map util to idle + * states, excluding the lowest one. This does not + * correspond to the state we expect to enter in + * reality, but an indication of what might happen. + */ + new_state = min(max_idle_state_idx, (int) + (new_state / sg->sgc->max_capacity)); + new_state = max_idle_state_idx - new_state; + } + state = new_state; + } else { + /* After moving, the group will be fully occupied + * so assume it will not be idle at all. + */ + state = 0; + } +end: return state; } @@ -5479,8 +5529,9 @@ static int sched_group_energy(struct energy_env *eenv) } } - idle_idx = group_idle_state(sg); + idle_idx = group_idle_state(eenv, sg); group_util = group_norm_util(eenv, sg); + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) >> SCHED_CAPACITY_SHIFT; sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) @@ -6191,48 +6242,59 @@ static int start_cpu(bool boosted) return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu; } -static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle) +static inline int find_best_target(struct task_struct *p, int *backup_cpu, + bool boosted, bool prefer_idle) { - int target_cpu = -1; - unsigned long target_util = prefer_idle ? ULONG_MAX : 0; - unsigned long backup_capacity = ULONG_MAX; - int best_idle_cpu = -1; - int best_idle_cstate = INT_MAX; - int backup_cpu = -1; + unsigned long best_idle_min_cap_orig = ULONG_MAX; unsigned long min_util = boosted_task_util(p); + unsigned long target_capacity = ULONG_MAX; + unsigned long min_wake_util = ULONG_MAX; + unsigned long target_max_spare_cap = 0; + unsigned long target_util = ULONG_MAX; + unsigned long best_active_util = ULONG_MAX; + int best_idle_cstate = INT_MAX; struct sched_domain *sd; struct sched_group *sg; - int cpu = start_cpu(boosted); + int best_active_cpu = -1; + int best_idle_cpu = -1; + int target_cpu = -1; + int cpu, i; + + *backup_cpu = -1; schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts); schedstat_inc(this_rq(), eas_stats.fbt_attempts); + /* Find start CPU based on boost value */ + cpu = start_cpu(boosted); if (cpu < 0) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu); schedstat_inc(this_rq(), eas_stats.fbt_no_cpu); - return target_cpu; + return -1; } + /* Find SD for the start CPU */ sd = rcu_dereference(per_cpu(sd_ea, cpu)); - if (!sd) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_sd); schedstat_inc(this_rq(), eas_stats.fbt_no_sd); - return target_cpu; + return -1; } + /* Scan CPUs in all SDs */ sg = sd->groups; - do { - int i; - for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { - unsigned long cur_capacity, new_util, wake_util; - unsigned long min_wake_util = ULONG_MAX; + unsigned long capacity_curr = capacity_curr_of(i); + unsigned long capacity_orig = capacity_orig_of(i); + unsigned long wake_util, new_util; if (!cpu_online(i)) continue; + if (walt_cpu_high_irqload(i)) + continue; + /* * p's blocked utilization is still accounted for on prev_cpu * so prev_cpu will receive a negative bias due to the double @@ -6247,70 +6309,204 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre * than the one required to boost the task. */ new_util = max(min_util, new_util); - - if (new_util > capacity_orig_of(i)) + if (new_util > capacity_orig) continue; -#ifdef CONFIG_SCHED_WALT - if (walt_cpu_high_irqload(i)) - continue; -#endif - /* - * Unconditionally favoring tasks that prefer idle cpus to + * Case A) Latency sensitive tasks + * + * Unconditionally favoring tasks that prefer idle CPU to * improve latency. + * + * Looking for: + * - an idle CPU, whatever its idle_state is, since + * the first CPUs we explore are more likely to be + * reserved for latency sensitive tasks. + * - a non idle CPU where the task fits in its current + * capacity and has the maximum spare capacity. + * - a non idle CPU with lower contention from other + * tasks and running at the lowest possible OPP. + * + * The last two goals tries to favor a non idle CPU + * where the task can run as if it is "almost alone". + * A maximum spare capacity CPU is favoured since + * the task already fits into that CPU's capacity + * without waiting for an OPP chance. + * + * The following code path is the only one in the CPUs + * exploration loop which is always used by + * prefer_idle tasks. It exits the loop with wither a + * best_active_cpu or a target_cpu which should + * represent an optimal choice for latency sensitive + * tasks. */ - if (idle_cpu(i) && prefer_idle) { - schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); - schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - return i; - } + if (prefer_idle) { - cur_capacity = capacity_curr_of(i); - - if (new_util < cur_capacity) { - if (cpu_rq(i)->nr_running) { - /* - * Find a target cpu with the lowest/highest - * utilization if prefer_idle/!prefer_idle. - */ - if (prefer_idle) { - /* Favor the CPU that last ran the task */ - if (new_util > target_util || - wake_util > min_wake_util) - continue; - min_wake_util = wake_util; - target_util = new_util; - target_cpu = i; - } else if (target_util < new_util) { - target_util = new_util; - target_cpu = i; - } - } else if (!prefer_idle) { - int idle_idx = idle_get_state_idx(cpu_rq(i)); + /* + * Case A.1: IDLE CPU + * Return the first IDLE CPU we find. + */ + if (idle_cpu(i)) { + schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); + schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - if (best_idle_cpu < 0 || - (sysctl_sched_cstate_aware && - best_idle_cstate > idle_idx)) { - best_idle_cstate = idle_idx; - best_idle_cpu = i; - } + trace_sched_find_best_target(p, + prefer_idle, min_util, + cpu, best_idle_cpu, + best_active_cpu, i); + + return i; } - } else if (backup_capacity > cur_capacity) { - /* Find a backup cpu with least capacity. */ - backup_capacity = cur_capacity; - backup_cpu = i; + + /* + * Case A.2: Target ACTIVE CPU + * Favor CPUs with max spare capacity. + */ + if ((capacity_curr > new_util) && + (capacity_orig - new_util > target_max_spare_cap)) { + target_max_spare_cap = capacity_orig - new_util; + target_cpu = i; + continue; + } + if (target_cpu != -1) + continue; + + + /* + * Case A.3: Backup ACTIVE CPU + * Favor CPUs with: + * - lower utilization due to other tasks + * - lower utilization with the task in + */ + if (wake_util > min_wake_util) + continue; + if (new_util > best_active_util) + continue; + min_wake_util = wake_util; + best_active_util = new_util; + best_active_cpu = i; + continue; } + + /* + * Case B) Non latency sensitive tasks on IDLE CPUs. + * + * Find an optimal backup IDLE CPU for non latency + * sensitive tasks. + * + * Looking for: + * - minimizing the capacity_orig, + * i.e. preferring LITTLE CPUs + * - favoring shallowest idle states + * i.e. avoid to wakeup deep-idle CPUs + * + * The following code path is used by non latency + * sensitive tasks if IDLE CPUs are available. If at + * least one of such CPUs are available it sets the + * best_idle_cpu to the most suitable idle CPU to be + * selected. + * + * If idle CPUs are available, favour these CPUs to + * improve performances by spreading tasks. + * Indeed, the energy_diff() computed by the caller + * will take care to ensure the minimization of energy + * consumptions without affecting performance. + */ + if (idle_cpu(i)) { + int idle_idx = idle_get_state_idx(cpu_rq(i)); + + /* Select idle CPU with lower cap_orig */ + if (capacity_orig > best_idle_min_cap_orig) + continue; + + /* + * Skip CPUs in deeper idle state, but only + * if they are also less energy efficient. + * IOW, prefer a deep IDLE LITTLE CPU vs a + * shallow idle big CPU. + */ + if (sysctl_sched_cstate_aware && + best_idle_cstate <= idle_idx) + continue; + + /* Keep track of best idle CPU */ + best_idle_min_cap_orig = capacity_orig; + best_idle_cstate = idle_idx; + best_idle_cpu = i; + continue; + } + + /* + * Case C) Non latency sensitive tasks on ACTIVE CPUs. + * + * Pack tasks in the most energy efficient capacities. + * + * This task packing strategy prefers more energy + * efficient CPUs (i.e. pack on smaller maximum + * capacity CPUs) while also trying to spread tasks to + * run them all at the lower OPP. + * + * This assumes for example that it's more energy + * efficient to run two tasks on two CPUs at a lower + * OPP than packing both on a single CPU but running + * that CPU at an higher OPP. + * + * Thus, this case keep track of the CPU with the + * smallest maximum capacity and highest spare maximum + * capacity. + */ + + /* Favor CPUs with smaller capacity */ + if (capacity_orig > target_capacity) + continue; + + /* Favor CPUs with maximum spare capacity */ + if ((capacity_orig - new_util) < target_max_spare_cap) + continue; + + target_max_spare_cap = capacity_orig - new_util; + target_capacity = capacity_orig; + target_util = new_util; + target_cpu = i; } + } while (sg = sg->next, sg != sd->groups); - if (target_cpu < 0) - target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu; + /* + * For non latency sensitive tasks, cases B and C in the previous loop, + * we pick the best IDLE CPU only if we was not able to find a target + * ACTIVE CPU. + * + * Policies priorities: + * + * - prefer_idle tasks: + * + * a) IDLE CPU available, we return immediately + * b) ACTIVE CPU where task fits and has the bigger maximum spare + * capacity (i.e. target_cpu) + * c) ACTIVE CPU with less contention due to other tasks + * (i.e. best_active_cpu) + * + * - NON prefer_idle tasks: + * + * a) ACTIVE CPU: target_cpu + * b) IDLE CPU: best_idle_cpu + */ + if (target_cpu == -1) + target_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; + else + *backup_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; - if (target_cpu >= 0) { - schedstat_inc(p, se.statistics.nr_wakeups_fbt_count); - schedstat_inc(this_rq(), eas_stats.fbt_count); - } + trace_sched_find_best_target(p, prefer_idle, min_util, cpu, + best_idle_cpu, best_active_cpu, + target_cpu); + + schedstat_inc(p, se.statistics.nr_wakeups_fbt_count); + schedstat_inc(this_rq(), eas_stats.fbt_count); return target_cpu; } @@ -6342,7 +6538,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync) { struct sched_domain *sd; - int target_cpu = prev_cpu, tmp_target; + int target_cpu = prev_cpu, tmp_target, tmp_backup; bool boosted, prefer_idle; schedstat_inc(p, se.statistics.nr_wakeups_secb_attempts); @@ -6367,9 +6563,11 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync prefer_idle = 0; #endif + sync_entity_load_avg(&p->se); + sd = rcu_dereference(per_cpu(sd_ea, prev_cpu)); /* Find a cpu with sufficient capacity */ - tmp_target = find_best_target(p, boosted, prefer_idle); + tmp_target = find_best_target(p, &tmp_backup, boosted, prefer_idle); if (!sd) goto unlock; @@ -6398,10 +6596,15 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync } if (energy_diff(&eenv) >= 0) { - schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); - schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); - target_cpu = prev_cpu; - goto unlock; + /* No energy saving for target_cpu, try backup */ + target_cpu = tmp_backup; + eenv.dst_cpu = target_cpu; + if (tmp_backup < 0 || energy_diff(&eenv) >= 0) { + schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); + schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); + target_cpu = prev_cpu; + goto unlock; + } } schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav); @@ -6439,16 +6642,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int want_affine = 0; int sync = wake_flags & WF_SYNC; - if (sd_flag & SD_BALANCE_WAKE) { - /* - * do wake_cap unconditionally as it causes task and cpu - * utilization to be synced, and we need that for energy - * aware wakeups - */ - int _wake_cap = wake_cap(p, cpu, prev_cpu); - want_affine = !wake_wide(p) && !_wake_cap + if (sd_flag & SD_BALANCE_WAKE) + want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); - } if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized)) return select_energy_cpu_brute(p, prev_cpu, sync); @@ -7919,6 +8115,38 @@ group_type group_classify(struct sched_group *group, return group_other; } +#ifdef CONFIG_NO_HZ_COMMON +/* + * idle load balancing data + * - used by the nohz balance, but we want it available here + * so that we can see which CPUs have no tick. + */ +static struct { + cpumask_var_t idle_cpus_mask; + atomic_t nr_cpus; + unsigned long next_balance; /* in jiffy units */ +} nohz ____cacheline_aligned; + +static inline void update_cpu_stats_if_tickless(struct rq *rq) +{ + /* only called from update_sg_lb_stats when irqs are disabled */ + if (cpumask_test_cpu(rq->cpu, nohz.idle_cpus_mask)) { + /* rate limit updates to once-per-jiffie at most */ + if (READ_ONCE(jiffies) <= rq->last_load_update_tick) + return; + + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + update_idle_cpu_load(rq); + update_cfs_rq_load_avg(rq->clock_task, &rq->cfs, false); + raw_spin_unlock(&rq->lock); + } +} + +#else +static inline void update_cpu_stats_if_tickless(struct rq *rq) { } +#endif + /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. @@ -7942,6 +8170,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { struct rq *rq = cpu_rq(i); + /* if we are entering idle and there are CPUs with + * their tick stopped, do an update for them + */ + if (env->idle == CPU_NEWLY_IDLE) + update_cpu_stats_if_tickless(rq); + /* Bias balancing toward cpus of our domain */ if (local_group) load = target_load(i, load_idx); @@ -9182,12 +9416,6 @@ static inline int on_null_domain(struct rq *rq) * needed, they will kick the idle load balancer, which then does idle * load balancing for all the idle CPUs. */ -static struct { - cpumask_var_t idle_cpus_mask; - atomic_t nr_cpus; - unsigned long next_balance; /* in jiffy units */ -} nohz ____cacheline_aligned; - static inline int find_new_ilb(void) { int ilb = cpumask_first(nohz.idle_cpus_mask); @@ -9537,6 +9765,10 @@ static inline bool nohz_kick_needed(struct rq *rq) (!energy_aware() || cpu_overutilized(cpu))) return true; + /* Do idle load balance if there have misfit task */ + if (energy_aware() && rq->misfit_task) + return true; + rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_busy, cpu)); if (sd && !energy_aware()) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ce364ddbb72c..029cf2bbeda2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1028,7 +1028,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) * per-task data have been completed by this moment. */ smp_wmb(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + p->cpu = cpu; +#else task_thread_info(p)->cpu = cpu; +#endif p->wake_cpu = cpu; #endif } @@ -2036,19 +2040,6 @@ static inline u64 irq_time_read(int cpu) #endif /* CONFIG_64BIT */ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ -static inline void account_reset_rq(struct rq *rq) -{ -#ifdef CONFIG_IRQ_TIME_ACCOUNTING - rq->prev_irq_time = 0; -#endif -#ifdef CONFIG_PARAVIRT - rq->prev_steal_time = 0; -#endif -#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - rq->prev_steal_time_rq = 0; -#endif -} - #ifdef CONFIG_CPU_FREQ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 6e053bd9830c..92c3aae8e056 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -72,7 +72,15 @@ static cpumask_t mpc_mask = CPU_MASK_ALL; __read_mostly unsigned int walt_ravg_window = 20000000; /* Min window size (in ns) = 10ms */ +#ifdef CONFIG_HZ_300 +/* + * Tick interval becomes to 3333333 due to + * rounding error when HZ=300. + */ +#define MIN_SCHED_RAVG_WINDOW (3333333 * 6) +#else #define MIN_SCHED_RAVG_WINDOW 10000000 +#endif /* Max window size (in ns) = 1s */ #define MAX_SCHED_RAVG_WINDOW 1000000000 diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index e181c87a928d..f56c4da16d0b 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -55,6 +55,8 @@ static inline void walt_migrate_sync_cpu(int cpu) { } static inline void walt_init_cpu_efficiency(void) { } static inline u64 walt_ktime_clock(void) { return 0; } +#define walt_cpu_high_irqload(cpu) false + #endif /* CONFIG_SCHED_WALT */ extern unsigned int walt_disabled; diff --git a/kernel/signal.c b/kernel/signal.c index 9793fee761e2..4671da7e2222 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task) * fresh group stop. Read comment in do_signal_stop() for details. */ if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { - sig->flags = SIGNAL_STOP_STOPPED; + signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED); return true; } return false; @@ -845,7 +845,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) * will take ->siglock, notice SIGNAL_CLD_MASK, and * notify its parent. See get_signal_to_deliver(). */ - signal->flags = why | SIGNAL_STOP_CONTINUED; + signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED); signal->group_stop_count = 0; signal->group_exit_code = 0; } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 2c3a23d77704..6fcc367ad531 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -616,7 +616,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, * Rate limit to the tick as a hot fix to prevent DOS. Will be * mopped up later. */ - if (ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) + if (timr->it.alarm.interval.tv64 && + ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) timr->it.alarm.interval = ktime_set(0, TICK_NSEC); exp = timespec_to_ktime(new_setting->it_value); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 22c57e191a23..34f9a9c417d9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -870,6 +870,18 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls - return the current idle calls counter value + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls(void) +{ + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); + + return ts->idle_calls; +} + static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 34b2a0d5cf1a..eba904bae48c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3535,7 +3535,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) int exclude_mod = 0; int found = 0; int ret; - int clear_filter; + int clear_filter = 0; if (func) { func_g.type = filter_parse_regex(func, len, &func_g.search, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6300318ebf79..70f519e8489e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1681,7 +1681,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, TRACE_FLAG_IRQS_NOSUPPORT | #endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); } @@ -6827,6 +6827,7 @@ static int instance_rmdir(const char *name) } kfree(tr->topts); + free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2c2f971f3e75..23231237f2e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3647,8 +3647,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, return -EINVAL; /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + if (!list_empty(&wq->pwqs)) { + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) + return -EINVAL; + + wq->flags &= ~__WQ_ORDERED; + } ctx = apply_wqattrs_prepare(wq, attrs); @@ -3834,6 +3838,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, struct workqueue_struct *wq; struct pool_workqueue *pwq; + /* + * Unbound && max_active == 1 used to imply ordered, which is no + * longer the case on NUMA machines due to per-node pools. While + * alloc_ordered_workqueue() is the right way to create an ordered + * workqueue, keep the previous behavior to avoid subtle breakages + * on NUMA. + */ + if ((flags & WQ_UNBOUND) && max_active == 1) + flags |= __WQ_ORDERED; + /* see the comment above the definition of WQ_POWER_EFFICIENT */ if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) flags |= WQ_UNBOUND; @@ -4022,13 +4036,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) struct pool_workqueue *pwq; /* disallow meddling with max_active for ordered workqueues */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return; max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); mutex_lock(&wq->mutex); + wq->flags &= ~__WQ_ORDERED; wq->saved_max_active = max_active; for_each_pwq(pwq, wq) @@ -5154,7 +5169,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) * attributes breaks ordering guarantee. Disallow exposing ordered * workqueues. */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return -EINVAL; wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 63d14d9b51d8..c879d72bf9f7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -145,7 +145,7 @@ config DEBUG_INFO_REDUCED config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" - depends on DEBUG_INFO + depends on DEBUG_INFO && !FRV help Generate debug info into separate .dwo files. This significantly reduces the build directory size for builds with DEBUG_INFO, diff --git a/mm/internal.h b/mm/internal.h index 6979b2bd3227..f63f4393d633 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -453,6 +453,7 @@ struct tlbflush_unmap_batch; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); +void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { @@ -460,6 +461,8 @@ static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush_dirty(void) { } - +static inline void flush_tlb_batched_pending(struct mm_struct *mm) +{ +} #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ #endif /* __MM_INTERNAL_H */ diff --git a/mm/list_lru.c b/mm/list_lru.c index 5d8dffd5b57c..786176b1a0ee 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) l = list_lru_from_kmem(nlru, item); list_add_tail(item, &l->list); l->nr_items++; + nlru->nr_items++; spin_unlock(&nlru->lock); return true; } @@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) l = list_lru_from_kmem(nlru, item); list_del_init(item); l->nr_items--; + nlru->nr_items--; spin_unlock(&nlru->lock); return true; } @@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one); unsigned long list_lru_count_node(struct list_lru *lru, int nid) { - long count = 0; - int memcg_idx; + struct list_lru_node *nlru; - count += __list_lru_count_one(lru, nid, -1); - if (list_lru_memcg_aware(lru)) { - for_each_memcg_cache_index(memcg_idx) - count += __list_lru_count_one(lru, nid, memcg_idx); - } - return count; + nlru = &lru->node[nid]; + return nlru->nr_items; } EXPORT_SYMBOL_GPL(list_lru_count_node); @@ -226,6 +223,7 @@ restart: assert_spin_locked(&nlru->lock); case LRU_REMOVED: isolated++; + nlru->nr_items--; /* * If the lru lock has been dropped, our list * traversal is now invalid and so we have to diff --git a/mm/memory.c b/mm/memory.c index e6fa13484447..9ac55172aa7b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1127,6 +1127,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; diff --git a/mm/mempool.c b/mm/mempool.c index 004d42b1dfaf..7924f4f58a6d 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -135,8 +135,8 @@ static void *remove_element(mempool_t *pool) void *element = pool->elements[--pool->curr_nr]; BUG_ON(pool->curr_nr < 0); - check_element(pool, element); kasan_unpoison_element(pool, element); + check_element(pool, element); return element; } diff --git a/mm/mmap.c b/mm/mmap.c index 86c03d3fb9f5..19823fc3dcfa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2197,7 +2197,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; - if (address >= TASK_SIZE) + if (address >= (TASK_SIZE & PAGE_MASK)) return -ENOMEM; address += PAGE_SIZE; diff --git a/mm/mprotect.c b/mm/mprotect.c index bddb2c75492d..b8849a3930a0 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -72,6 +72,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (!pte) return 0; + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { oldpte = *pte; diff --git a/mm/mremap.c b/mm/mremap.c index c25bc6268e46..fe7b7f65f4f4 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -135,6 +135,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f143d9b0f20..4ae77db917f2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1532,14 +1532,14 @@ int move_freepages(struct zone *zone, #endif for (page = start_page; page <= end_page;) { - /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); - if (!pfn_valid_within(page_to_pfn(page))) { page++; continue; } + /* Make sure we are not inadvertently changing nodes */ + VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); + if (!PageBuddy(page)) { page++; continue; @@ -5852,8 +5852,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s) } if (pages && s) - pr_info("Freeing %s memory: %ldK (%p - %p)\n", - s, pages << (PAGE_SHIFT - 10), start, end); + pr_info("Freeing %s memory: %ldK\n", + s, pages << (PAGE_SHIFT - 10)); return pages; } @@ -6807,7 +6807,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { - pr_info("%s: [%lx, %lx) PFNs busy\n", + pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", __func__, outer_start, end); ret = -EBUSY; goto done; diff --git a/mm/rmap.c b/mm/rmap.c index b577fbb98d4b..ede183c32f45 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -649,6 +649,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, tlb_ubc->flush_required = true; /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * before the page is queued for IO. @@ -675,6 +682,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, struct page *page, bool writable) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e20ae2d3c498..5e4199d5a388 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -292,6 +292,10 @@ static void vlan_sync_address(struct net_device *dev, if (ether_addr_equal(vlan->real_dev_addr, dev->dev_addr)) return; + /* vlan continues to inherit address of lower device */ + if (vlan_dev_inherit_address(vlandev, dev)) + goto out; + /* vlan address was different from the old address and is equal to * the new address */ if (!ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) && @@ -304,6 +308,7 @@ static void vlan_sync_address(struct net_device *dev, !ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_add(dev, vlandev->dev_addr); +out: ether_addr_copy(vlan->real_dev_addr, dev->dev_addr); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 9d010a09ab98..cc1557978066 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -109,6 +109,8 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +bool vlan_dev_inherit_address(struct net_device *dev, + struct net_device *real_dev); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index fded86508117..ca4dc9031073 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -244,6 +244,17 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23); } +bool vlan_dev_inherit_address(struct net_device *dev, + struct net_device *real_dev) +{ + if (dev->addr_assign_type != NET_ADDR_STOLEN) + return false; + + ether_addr_copy(dev->dev_addr, real_dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return true; +} + static int vlan_dev_open(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); @@ -254,7 +265,8 @@ static int vlan_dev_open(struct net_device *dev) !(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) return -ENETDOWN; - if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) { + if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr) && + !vlan_dev_inherit_address(dev, real_dev)) { err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) goto out; @@ -558,8 +570,10 @@ static int vlan_dev_init(struct net_device *dev) /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; - if (is_zero_ether_addr(dev->dev_addr)) - eth_hw_addr_inherit(dev, real_dev); + if (is_zero_ether_addr(dev->dev_addr)) { + ether_addr_copy(dev->dev_addr, real_dev->dev_addr); + dev->addr_assign_type = NET_ADDR_STOLEN; + } if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4b175df35184..906f88550cd8 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -23,6 +23,7 @@ #include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/scatterlist.h> +#include <crypto/algapi.h> #include <crypto/b128ops.h> #include <net/bluetooth/bluetooth.h> @@ -524,7 +525,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], if (err) return false; - return !memcmp(bdaddr->b, hash, 3); + return !crypto_memneq(bdaddr->b, hash, 3); } int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) @@ -577,7 +578,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } smp->debug_key = false; @@ -991,7 +992,7 @@ static u8 smp_random(struct smp_chan *smp) if (ret) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { + if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { BT_ERR("Pairing failed (confirmation values mismatch)"); return SMP_CONFIRM_FAILED; } @@ -1491,7 +1492,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) smp->rrnd, r, cfm)) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; smp->passkey_round++; @@ -1875,7 +1876,7 @@ static u8 sc_send_public_key(struct smp_chan *smp) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } } @@ -2140,7 +2141,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; } else { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), @@ -2621,7 +2622,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(cfm.confirm_val, smp->pcnf, 16)) + if (crypto_memneq(cfm.confirm_val, smp->pcnf, 16)) return SMP_CONFIRM_FAILED; } @@ -2654,7 +2655,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) else hcon->pending_sec_level = BT_SECURITY_FIPS; - if (!memcmp(debug_pk, smp->remote_pk, 64)) + if (!crypto_memneq(debug_pk, smp->remote_pk, 64)) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (smp->method == DSP_PASSKEY) { @@ -2753,7 +2754,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(check->e, e, 16)) + if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; if (!hcon->out) { @@ -3463,7 +3464,7 @@ static int __init test_ah(struct crypto_blkcipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 3)) + if (crypto_memneq(res, exp, 3)) return -EINVAL; return 0; @@ -3493,7 +3494,7 @@ static int __init test_c1(struct crypto_blkcipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3518,7 +3519,7 @@ static int __init test_s1(struct crypto_blkcipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3550,7 +3551,7 @@ static int __init test_f4(struct crypto_hash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3584,10 +3585,10 @@ static int __init test_f5(struct crypto_hash *tfm_cmac) if (err) return err; - if (memcmp(mackey, exp_mackey, 16)) + if (crypto_memneq(mackey, exp_mackey, 16)) return -EINVAL; - if (memcmp(ltk, exp_ltk, 16)) + if (crypto_memneq(ltk, exp_ltk, 16)) return -EINVAL; return 0; @@ -3620,7 +3621,7 @@ static int __init test_f6(struct crypto_hash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3674,7 +3675,7 @@ static int __init test_h6(struct crypto_hash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; diff --git a/net/core/dev.c b/net/core/dev.c index 524d8b28e690..24d243084aab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2550,9 +2550,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment); static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; - else - return skb->ip_summed == CHECKSUM_NONE; + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_UNNECESSARY; + + return skb->ip_summed == CHECKSUM_NONE; } /** @@ -2571,11 +2572,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) { + struct sk_buff *segs; + if (unlikely(skb_needs_check(skb, tx_path))) { int err; - skb_warn_bad_offload(skb); - + /* We're going to init ->check field in TCP or UDP header */ err = skb_cow_head(skb, 0); if (err < 0) return ERR_PTR(err); @@ -2590,7 +2592,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_reset_mac_len(skb); - return skb_mac_gso_segment(skb, features); + segs = skb_mac_gso_segment(skb, features); + + if (unlikely(skb_needs_check(skb, tx_path))) + skb_warn_bad_offload(skb); + + return segs; } EXPORT_SYMBOL(__skb_gso_segment); @@ -4375,6 +4382,12 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); +static void napi_skb_free_stolen_head(struct sk_buff *skb) +{ + skb_dst_drop(skb); + kmem_cache_free(skbuff_head_cache, skb); +} + static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { @@ -4388,12 +4401,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { - skb_dst_drop(skb); - kmem_cache_free(skbuff_head_cache, skb); - } else { + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else __kfree_skb(skb); - } break; case GRO_HELD: @@ -4459,10 +4470,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_DROP: - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else + napi_reuse_skb(napi, skb); + break; + case GRO_MERGED: break; } @@ -7052,8 +7069,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } else { netdev_stats_to_stats64(storage, &dev->stats); } - storage->rx_dropped += atomic_long_read(&dev->rx_dropped); - storage->tx_dropped += atomic_long_read(&dev->tx_dropped); + storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b94b1d293506..151e047ce072 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; + ifr.ifr_name[IFNAMSIZ-1] = 0; error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); if (error) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2ec5324a7ff7..5b3d611d8b5f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1742,7 +1742,8 @@ static int do_setlink(const struct sk_buff *skb, struct sockaddr *sa; int len; - len = sizeof(sa_family_t) + dev->addr_len; + len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, + sizeof(*sa)); sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 1704948e6a12..f227f002c73d 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk) * singleton values (which always leads to failure). * These settings can still (later) be overridden via sockopts. */ - if (ccid_get_builtin_ccids(&tx.val, &tx.len) || - ccid_get_builtin_ccids(&rx.val, &rx.len)) + if (ccid_get_builtin_ccids(&tx.val, &tx.len)) return -ENOBUFS; + if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { + kfree(tx.val); + return -ENOBUFS; + } if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6467bf392e1b..e217f17997a4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -635,6 +635,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3470ad1843bb..09a9ab65f4e1 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -376,6 +376,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7c4c881a7187..ee94bd32d6dc 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1320,13 +1320,14 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + fib_trie_init(); register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_trie_init(); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 661bda968594..62e41d38da78 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -922,10 +922,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : fragheaderlen)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1241,6 +1243,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EINVAL; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { if (skb->ip_summed != CHECKSUM_PARTIAL) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2dc982b15df8..a2e1142145df 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -337,6 +337,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->txhash = net_tx_rndhash(); req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c31eff75732c..48e6509426b0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2263,6 +2263,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + dst_release(sk->sk_rx_dst); + sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 197a3d2a6242..b6d99c308bef 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2504,8 +2504,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2ca323b68efd..4e88f93f71c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3256,6 +3256,9 @@ int tcp_connect(struct sock *sk) struct sk_buff *buff; int err; + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + tcp_connect_init(sk); if (unlikely(tp->repair)) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ebb34d0c5e80..1ec12a4f327e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -606,7 +606,8 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) + if (!sock_flag(sk, SOCK_KEEPOPEN) || + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ad3d1534c524..9ee5087b9b5e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -819,7 +819,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6396f1c80ae9..6dfc3daf7c21 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -231,7 +231,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 77a5c327cbed..2d2241006d35 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1799,17 +1799,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { - if (ifp->flags&IFA_F_PERMANENT) { - spin_lock_bh(&ifp->lock); - addrconf_del_dad_work(ifp); - ifp->flags |= IFA_F_TENTATIVE; - if (dad_failed) - ifp->flags |= IFA_F_DADFAILED; - spin_unlock_bh(&ifp->lock); - if (dad_failed) - ipv6_ifa_notify(0, ifp); - in6_ifa_put(ifp); - } else if (ifp->flags&IFA_F_TEMPORARY) { + if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); ifpub = ifp->ifpub; @@ -1822,6 +1812,16 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); + } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) { + spin_lock_bh(&ifp->lock); + addrconf_del_dad_work(ifp); + ifp->flags |= IFA_F_TENTATIVE; + if (dad_failed) + ifp->flags |= IFA_F_DADFAILED; + spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); } else { ipv6_del_addr(ifp); } @@ -3195,6 +3195,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); + struct net *net = dev_net(dev); int run_pending = 0; int err; @@ -3210,7 +3211,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEMTU: /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) { - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); break; } @@ -3323,7 +3324,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); } break; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1ac06723f0d7..f60e8caea767 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -767,10 +767,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, goto next_iter; } - if (iter->dst.dev == rt->dst.dev && - iter->rt6i_idev == rt->rt6i_idev && - ipv6_addr_equal(&iter->rt6i_gateway, - &rt->rt6i_gateway)) { + if (rt6_duplicate_nexthop(iter, rt)) { if (rt->rt6i_nsiblings) rt->rt6i_nsiblings = 0; if (!(iter->rt6i_flags & RTF_EXPIRES)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7d339fc1057f..e22339fad10b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -647,8 +647,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -767,8 +765,6 @@ slow_path: frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + hroom + troom, GFP_ATOMIC); if (!frag) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -1361,11 +1357,12 @@ emsgsize: */ cork->length += length; - if (((length > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : headersize)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 8b56c5240429..f9f02581c4ca 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { - u16 offset = sizeof(struct ipv6hdr); + unsigned int offset = sizeof(struct ipv6hdr); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; @@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; + unsigned int len; switch (**nexthdr) { @@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - offset += ipv6_optlen(exthdr); + len = ipv6_optlen(exthdr); + if (len + offset >= IPV6_MAXPLEN) + return -EINVAL; + offset += len; *nexthdr = &exthdr->nexthdr; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1d0595be4dcd..d3f87ceb3408 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2825,17 +2825,11 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, struct rt6_info *rt, struct fib6_config *r_cfg) { struct rt6_nh *nh; - struct rt6_info *rtnh; int err = -EEXIST; list_for_each_entry(nh, rt6_nh_list, next) { /* check if rt6_info already exists */ - rtnh = nh->rt6_info; - - if (rtnh->dst.dev == rt->dst.dev && - rtnh->rt6i_idev == rt->rt6i_idev && - ipv6_addr_equal(&rtnh->rt6i_gateway, - &rt->rt6i_gateway)) + if (rt6_duplicate_nexthop(nh->rt6_info, rt)) return err; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 336843ca4e6b..7f3667635431 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -210,6 +210,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->txhash = net_tx_rndhash(); /* * We need to lookup the dst_entry to get the correct window size. diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 01582966ffa0..2e3c12eeca07 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -86,7 +86,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* Check if there is enough headroom to insert fragment header. */ tnl_hlen = skb_tnl_header_len(skb); diff --git a/net/key/af_key.c b/net/key/af_key.c index e67c28e614b9..2e1050ec2cf0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -63,8 +63,13 @@ struct pfkey_sock { } u; struct sk_buff *skb; } dump; + struct mutex dump_lock; }; +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, + xfrm_address_t *saddr, xfrm_address_t *daddr, + u16 *family); + static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; @@ -139,6 +144,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; + struct pfkey_sock *pfk; int err; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -153,6 +159,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, if (sk == NULL) goto out; + pfk = pfkey_sk(sk); + mutex_init(&pfk->dump_lock); + sock->ops = &pfkey_ops; sock_init_data(sock, sk); @@ -281,13 +290,23 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) struct sadb_msg *hdr; int rc; + mutex_lock(&pfk->dump_lock); + if (!pfk->dump.dump) { + rc = 0; + goto out; + } + rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; + if (rc == -ENOBUFS) { + rc = 0; + goto out; + } if (pfk->dump.skb) { - if (!pfkey_can_dump(&pfk->sk)) - return 0; + if (!pfkey_can_dump(&pfk->sk)) { + rc = 0; + goto out; + } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; @@ -298,6 +317,9 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) } pfkey_terminate_dump(pfk); + +out: + mutex_unlock(&pfk->dump_lock); return rc; } @@ -1802,19 +1824,26 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); - if (pfk->dump.dump != NULL) + mutex_lock(&pfk->dump_lock); + if (pfk->dump.dump != NULL) { + mutex_unlock(&pfk->dump_lock); return -EBUSY; + } proto = pfkey_satype2proto(hdr->sadb_msg_satype); - if (proto == 0) + if (proto == 0) { + mutex_unlock(&pfk->dump_lock); return -EINVAL; + } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; filter = kmalloc(sizeof(*filter), GFP_KERNEL); - if (filter == NULL) + if (filter == NULL) { + mutex_unlock(&pfk->dump_lock); return -ENOMEM; + } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); @@ -1830,6 +1859,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); + mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } @@ -1922,19 +1952,14 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { - u8 *sa = (u8 *) (rq + 1); - int family, socklen; + int err; - family = pfkey_sockaddr_extract((struct sockaddr *)sa, - &t->saddr); - if (!family) - return -EINVAL; - - socklen = pfkey_sockaddr_len(family); - if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen), - &t->id.daddr) != family) - return -EINVAL; - t->encap_family = family; + err = parse_sockaddr_pair( + (struct sockaddr *)(rq + 1), + rq->sadb_x_ipsecrequest_len - sizeof(*rq), + &t->saddr, &t->id.daddr, &t->encap_family); + if (err) + return err; } else t->encap_family = xp->family; @@ -1954,7 +1979,11 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; - while (len >= sizeof(struct sadb_x_ipsecrequest)) { + while (len >= sizeof(*rq)) { + if (len < rq->sadb_x_ipsecrequest_len || + rq->sadb_x_ipsecrequest_len < sizeof(*rq)) + return -EINVAL; + if ((err = parse_ipsecrequest(xp, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; @@ -2417,7 +2446,6 @@ out: return err; } -#ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); @@ -2429,7 +2457,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, { int af, socklen; - if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) + if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); @@ -2445,6 +2473,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, return 0; } +#ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { @@ -2452,13 +2481,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct sadb_x_ipsecrequest *rq2; int mode; - if (len <= sizeof(struct sadb_x_ipsecrequest) || - len < rq1->sadb_x_ipsecrequest_len) + if (len < sizeof(*rq1) || + len < rq1->sadb_x_ipsecrequest_len || + rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), - rq1->sadb_x_ipsecrequest_len, + rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) @@ -2467,13 +2497,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; - if (len <= sizeof(struct sadb_x_ipsecrequest) || - len < rq2->sadb_x_ipsecrequest_len) + if (len <= sizeof(*rq2) || + len < rq2->sadb_x_ipsecrequest_len || + rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), - rq2->sadb_x_ipsecrequest_len, + rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) @@ -2688,14 +2719,18 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb { struct pfkey_sock *pfk = pfkey_sk(sk); - if (pfk->dump.dump != NULL) + mutex_lock(&pfk->dump_lock); + if (pfk->dump.dump != NULL) { + mutex_unlock(&pfk->dump_lock); return -EBUSY; + } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); + mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4da560005b0e..dd1649caa2b2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -845,10 +845,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, { unsigned int verdict = NF_DROP; - if (IP_VS_FWD_METHOD(cp) != 0) { - pr_err("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -882,6 +880,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); + +ignore_cp: verdict = NF_ACCEPT; out: @@ -1242,8 +1242,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in */ cp = pp->conn_out_get(ipvs, af, skb, &iph); - if (likely(cp)) + if (likely(cp)) { + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); + } if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1285,9 +1288,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } } + +out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; + +ignore_cp: + __ip_vs_conn_put(cp); + goto out; } /* diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index f95aba44e649..fbe4e1ecce6a 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1717,18 +1717,9 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) } MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n", par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par)); - if (sk != NULL) { - set_sk_callback_lock = true; - read_lock_bh(&sk->sk_callback_lock); - MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", - par->hooknum, sk, sk->sk_socket, - sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); - filp = sk->sk_socket ? sk->sk_socket->file : NULL; - MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", - par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1); - } - if (sk == NULL || sk->sk_socket == NULL) { + + if (sk == NULL) { /* * Here, the qtaguid_find_sk() using connection tracking * couldn't find the owner, so for now we just count them @@ -1741,9 +1732,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) */ if (!(info->match & XT_QTAGUID_UID)) account_for_uid(skb, sk, 0, par); - MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", - par->hooknum, - sk ? sk->sk_socket : NULL); + MT_DEBUG("qtaguid[%d]: leaving (sk=NULL)\n", par->hooknum); res = (info->match ^ info->invert) == 0; atomic64_inc(&qtu_events.match_no_sk); goto put_sock_ret_res; @@ -1751,16 +1740,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) res = false; goto put_sock_ret_res; } - filp = sk->sk_socket->file; - if (filp == NULL) { - MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); - account_for_uid(skb, sk, 0, par); - res = ((info->match ^ info->invert) & - (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; - atomic64_inc(&qtu_events.match_no_sk_file); - goto put_sock_ret_res; - } - sock_uid = filp->f_cred->fsuid; + sock_uid = sk->sk_uid; /* * TODO: unhack how to force just accounting. * For now we only do iface stats when the uid-owner is not requested @@ -1778,8 +1758,8 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); - if ((uid_gte(filp->f_cred->fsuid, uid_min) && - uid_lte(filp->f_cred->fsuid, uid_max)) ^ + if ((uid_gte(sk->sk_uid, uid_min) && + uid_lte(sk->sk_uid, uid_max)) ^ !(info->invert & XT_QTAGUID_UID)) { MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", par->hooknum); @@ -1790,7 +1770,19 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->match & XT_QTAGUID_GID) { kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); - + set_sk_callback_lock = true; + read_lock_bh(&sk->sk_callback_lock); + MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", + par->hooknum, sk, sk->sk_socket, + sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (!filp) { + res = ((info->match ^ info->invert) & XT_QTAGUID_GID) == 0; + atomic64_inc(&qtu_events.match_no_sk_gid); + goto put_sock_ret_res; + } + MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", + par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1); if ((gid_gte(filp->f_cred->fsgid, gid_min) && gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_QTAGUID_GID)) { @@ -1962,7 +1954,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) "match_found_sk_in_ct=%llu " "match_found_no_sk_in_ct=%llu " "match_no_sk=%llu " - "match_no_sk_file=%llu\n", + "match_no_sk_gid=%llu\n", (u64)atomic64_read(&qtu_events.sockets_tagged), (u64)atomic64_read(&qtu_events.sockets_untagged), (u64)atomic64_read(&qtu_events.counter_set_changes), @@ -1974,7 +1966,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) (u64)atomic64_read(&qtu_events.match_found_sk_in_ct), (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct), (u64)atomic64_read(&qtu_events.match_no_sk), - (u64)atomic64_read(&qtu_events.match_no_sk_file)); + (u64)atomic64_read(&qtu_events.match_no_sk_gid)); /* Count the following as part of the last item_index. No need * to lock the sock_tag_list here since it is already locked when diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h index 8178fbdfb036..c7052707a6a4 100644 --- a/net/netfilter/xt_qtaguid_internal.h +++ b/net/netfilter/xt_qtaguid_internal.h @@ -289,10 +289,10 @@ struct qtaguid_event_counts { */ atomic64_t match_no_sk; /* - * The file ptr in the sk_socket wasn't there. + * The file ptr in the sk_socket wasn't there and we couldn't get GID. * This might happen for traffic while the socket is being closed. */ - atomic64_t match_no_sk_file; + atomic64_t match_no_sk_gid; }; /* Track the set active_set for the given tag. */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 1fe3d3b362c0..c5a2c7e733b3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -969,6 +969,8 @@ static void nfc_release(struct device *d) kfree(se); } + ida_simple_remove(&nfc_index_ida, dev->idx); + kfree(dev); } @@ -1043,6 +1045,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; + int rc; if (!ops->start_poll || !ops->stop_poll || !ops->activate_target || !ops->deactivate_target || !ops->im_transceive) @@ -1055,6 +1058,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, if (!dev) return NULL; + rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); + if (rc < 0) + goto err_free_dev; + dev->idx = rc; + + dev->dev.class = &nfc_class; + dev_set_name(&dev->dev, "nfc%d", dev->idx); + device_initialize(&dev->dev); + dev->ops = ops; dev->supported_protocols = supported_protocols; dev->tx_headroom = tx_headroom; @@ -1077,6 +1089,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, } return dev; + +err_free_dev: + kfree(dev); + + return ERR_PTR(rc); } EXPORT_SYMBOL(nfc_allocate_device); @@ -1091,14 +1108,6 @@ int nfc_register_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); - if (dev->idx < 0) - return dev->idx; - - dev->dev.class = &nfc_class; - dev_set_name(&dev->dev, "nfc%d", dev->idx); - device_initialize(&dev->dev); - mutex_lock(&nfc_devlist_mutex); nfc_devlist_generation++; rc = device_add(&dev->dev); @@ -1136,12 +1145,10 @@ EXPORT_SYMBOL(nfc_register_device); */ void nfc_unregister_device(struct nfc_dev *dev) { - int rc, id; + int rc; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - id = dev->idx; - if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); @@ -1166,8 +1173,6 @@ void nfc_unregister_device(struct nfc_dev *dev) nfc_devlist_generation++; device_del(&dev->dev); mutex_unlock(&nfc_devlist_mutex); - - ida_simple_remove(&nfc_index_ida, id); } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ecf0a0196f18..9c222a106c7f 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -76,7 +76,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -150,7 +151,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -655,8 +657,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); - if (!addr || len < sizeof(struct sockaddr_nfc) || - addr->sa_family != AF_NFC) + if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC) return -EINVAL; if (addr->service_name_len == 0 && addr->dsap == 0) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 10c99a578421..67583ad7f610 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1084,8 +1084,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, return ndev; free_nfc: - kfree(ndev->nfc_dev); - + nfc_free_device(ndev->nfc_dev); free_nci: kfree(ndev); return NULL; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f58c1fba1026..12dfb457275d 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -873,7 +873,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) u32 device_idx, target_idx, protocol; int rc; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX] || + !info->attrs[NFC_ATTR_PROTOCOLS]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index ad58d2a6284e..6a2507f24b0f 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -577,8 +577,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, nla_for_each_nested(a, attr, rem) { int type = nla_type(a); - int maxlen = ovs_ct_attr_lens[type].maxlen; - int minlen = ovs_ct_attr_lens[type].minlen; + int maxlen; + int minlen; if (type > OVS_CT_ATTR_MAX) { OVS_NLERR(log, @@ -586,6 +586,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, type, OVS_CT_ATTR_MAX); return -EINVAL; } + + maxlen = ovs_ct_attr_lens[type].maxlen; + minlen = ovs_ct_attr_lens[type].minlen; if (nla_len(a) < minlen || nla_len(a) > maxlen) { OVS_NLERR(log, "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f8d6a0ca9c03..148ec130d99d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3622,14 +3622,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; - po->tp_reserve = val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_reserve = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_LOSS: { @@ -4225,7 +4230,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - if (closing && (po->tp_version > TPACKET_V2)) { + if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0936a4a32b47..e353e3255206 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,7 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp; - ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, + ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); if (ret) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d05869646515..0915d448ba23 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -42,8 +42,8 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int return PTR_ERR(target); t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); par.table = table; - par.entryinfo = NULL; par.target = target; par.targinfo = t->data; par.hook_mask = hook; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 95b560f0b253..6d340cd6e2a7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1004,6 +1004,9 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, return sch; } + /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ + if (ops->destroy) + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 13d6f83ec491..45d4b2f22f62 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -636,7 +636,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * sizeof(u32)); if (!q->hhf_arrays[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } @@ -647,7 +649,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / BITS_PER_BYTE); if (!q->hhf_valid_bits[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 3e82f047caaf..d9c84328e7eb 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) + if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { @@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); - if (qdisc == NULL) - goto err; + if (!qdisc) + return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mq_destroy(sch); - return -ENOMEM; } static void mq_attach(struct Qdisc *sch) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ad70ecf57ce7..66bccc5ff4ea 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -117,20 +117,17 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) { - err = -ENOMEM; - goto err; - } + if (!priv->qdiscs) + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); - if (qdisc == NULL) { - err = -ENOMEM; - goto err; - } + if (!qdisc) + return -ENOMEM; + priv->qdiscs[i] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -143,7 +140,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); if (err) - goto err; + return err; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -157,10 +154,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mqprio_destroy(sch); - return err; } static void mqprio_attach(struct Qdisc *sch) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 498f0a2cb47f..4431e2833e45 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -742,9 +742,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); if (!q->ht || !q->slots) { - sfq_destroy(sch); + /* Note: sfq_destroy() will be called by our caller */ return -ENOMEM; } + for (i = 0; i < q->divisor; i++) q->ht[i] = SFQ_EMPTY_SLOT; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9d0953e5734f..de10e3c0e2a4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -302,8 +302,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, - [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, - .len = WLAN_PMKID_LEN }, + [NL80211_ATTR_PMKID] = { .len = WLAN_PMKID_LEN }, [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, @@ -359,6 +358,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = {. type = NLA_U32 }, [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, @@ -5705,6 +5705,10 @@ static int validate_scan_freqs(struct nlattr *freqs) struct nlattr *attr1, *attr2; int n_channels = 0, tmp1, tmp2; + nla_for_each_nested(attr1, freqs, tmp1) + if (nla_len(attr1) != sizeof(u32)) + return 0; + nla_for_each_nested(attr1, freqs, tmp1) { n_channels++; /* diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8a0fdd870395..77055a362041 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1216,7 +1216,7 @@ static inline int policy_to_flow_dir(int dir) } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, - const struct flowi *fl) + const struct flowi *fl, u16 family) { struct xfrm_policy *pol; struct net *net = sock_net(sk); @@ -1225,8 +1225,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, read_lock_bh(&net->xfrm.xfrm_policy_lock); pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); + bool match = xfrm_selector_match(&pol->selector, fl, family); int err = 0; if (match) { @@ -2174,7 +2173,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2453,7 +2452,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e70147742cce..fabdf0e5c28c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3253,7 +3253,7 @@ sub process { $fixedline =~ s/\s*=\s*$/ = {/; fix_insert_line($fixlinenr, $fixedline); $fixedline = $line; - $fixedline =~ s/^(.\s*){\s*/$1/; + $fixedline =~ s/^(.\s*)\{\s*/$1/; fix_insert_line($fixlinenr, $fixedline); } } @@ -3603,7 +3603,7 @@ sub process { my $fixedline = rtrim($prevrawline) . " {"; fix_insert_line($fixlinenr, $fixedline); $fixedline = $rawline; - $fixedline =~ s/^(.\s*){\s*/$1\t/; + $fixedline =~ s/^(.\s*)\{\s*/$1\t/; if ($fixedline !~ /^\+\s*$/) { fix_insert_line($fixlinenr, $fixedline); } @@ -4092,7 +4092,7 @@ sub process { if (ERROR("SPACING", "space required before the open brace '{'\n" . $herecurr) && $fix) { - $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/; + $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/; } } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 46a34039ecdc..5cab24f52825 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2233,6 +2233,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), + SND_PCI_QUIRK(0x104d, 0x9060, "Sony Vaio VPCL14M1R", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index c1b87c5800b1..b3fddba4c084 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -936,7 +936,8 @@ static void nau8825_fll_apply(struct nau8825 *nau8825, NAU8825_FLL_INTEGER_MASK, fll_param->fll_int); /* FLL pre-scaler */ regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL4, - NAU8825_FLL_REF_DIV_MASK, fll_param->clk_ref_div); + NAU8825_FLL_REF_DIV_MASK, + fll_param->clk_ref_div << NAU8825_FLL_REF_DIV_SFT); /* select divided VCO input */ regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL5, NAU8825_FLL_FILTER_SW_MASK, 0x0000); diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h index dff8edb83bfd..a0b220726a63 100644 --- a/sound/soc/codecs/nau8825.h +++ b/sound/soc/codecs/nau8825.h @@ -114,7 +114,8 @@ #define NAU8825_FLL_INTEGER_MASK (0x3ff << 0) /* FLL4 (0x07) */ -#define NAU8825_FLL_REF_DIV_MASK (0x3 << 10) +#define NAU8825_FLL_REF_DIV_SFT 10 +#define NAU8825_FLL_REF_DIV_MASK (0x3 << NAU8825_FLL_REF_DIV_SFT) /* FLL5 (0x08) */ #define NAU8825_FLL_FILTER_SW_MASK (0x1 << 14) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index a564759845f9..5a3f544bb3a8 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -126,6 +126,16 @@ static const struct reg_default aic3x_reg[] = { { 108, 0x00 }, { 109, 0x00 }, }; +static bool aic3x_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AIC3X_RESET: + return true; + default: + return false; + } +} + static const struct regmap_config aic3x_regmap = { .reg_bits = 8, .val_bits = 8, @@ -133,6 +143,9 @@ static const struct regmap_config aic3x_regmap = { .max_register = DAC_ICC_ADJ, .reg_defaults = aic3x_reg, .num_reg_defaults = ARRAY_SIZE(aic3x_reg), + + .volatile_reg = aic3x_volatile_reg, + .cache_type = REGCACHE_RBTREE, }; diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index bb82bb966000..c1addf49c4f2 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -68,7 +68,8 @@ out: static int soc_compr_open_fe(struct snd_compr_stream *cstream) { struct snd_soc_pcm_runtime *fe = cstream->private_data; - struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_pcm_substream *fe_substream = + fe->pcm->streams[cstream->direction].substream; struct snd_soc_platform *platform = fe->platform; struct snd_soc_dpcm *dpcm; struct snd_soc_dapm_widget_list *list; @@ -412,7 +413,8 @@ static int soc_compr_set_params_fe(struct snd_compr_stream *cstream, struct snd_compr_params *params) { struct snd_soc_pcm_runtime *fe = cstream->private_data; - struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_pcm_substream *fe_substream = + fe->pcm->streams[cstream->direction].substream; struct snd_soc_platform *platform = fe->platform; int ret = 0, stream; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 65b936e251ea..977066ba1769 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -181,6 +181,10 @@ int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, dev_dbg(be->dev, "ASoC: BE %s event %d dir %d\n", be->dai_link->name, event, dir); + if ((event == SND_SOC_DAPM_STREAM_STOP) && + (be->dpcm[dir].users >= 1)) + continue; + snd_soc_dapm_stream_event(be, dir, event); } @@ -2073,9 +2077,11 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP; break; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED; + break; } out: diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index be1f511e4f54..ae2981460cd8 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -384,6 +384,9 @@ static void snd_complete_urb(struct urb *urb) if (unlikely(atomic_read(&ep->chip->shutdown))) goto exit_clear; + if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + goto exit_clear; + if (usb_pipeout(ep->pipe)) { retire_outbound_urb(ep, ctx); /* can be stopped during retire callback */ diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/lib/lockdep/uinclude/linux/lockdep.h index c808c7d02d21..e69118b2077e 100644 --- a/tools/lib/lockdep/uinclude/linux/lockdep.h +++ b/tools/lib/lockdep/uinclude/linux/lockdep.h @@ -8,7 +8,7 @@ #include <linux/utsname.h> #include <linux/compiler.h> -#define MAX_LOCK_DEPTH 2000UL +#define MAX_LOCK_DEPTH 255UL #define asmlinkage #define __visible diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index f1ce60065258..ec30c2fcbac0 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld ", val); if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) - trace_seq_printf(s, "[%lld] ", val); + trace_seq_printf(s, "[%d] ", (int) val); if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) write_state(s, val); @@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld", val); if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) - trace_seq_printf(s, " [%lld]", val); + trace_seq_printf(s, " [%d]", (int) val); return 0; } diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 74c265e0ffa0..fb1c9ddc3478 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -566,9 +566,9 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' -install-bin: install-tools install-tests +install-bin: install-tools install-tests install-traceevent-plugins -install: install-bin try-install-man install-traceevent-plugins +install: install-bin try-install-man install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index e9703c0829f1..07b5f5951b25 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -702,7 +702,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser, ui_browser__gotorc(browser, row, column + 1); SLsmg_draw_hline(2); - if (row++ == 0) + if (++row == 0) goto out; } else row = 0; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 933a509a90f8..eeeae0629ad3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -91,6 +110,7 @@ struct intel_pt_decoder { bool have_tma; bool have_cyc; bool fixup_last_mtc; + bool have_last_ip; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -98,6 +118,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -125,8 +146,6 @@ struct intel_pt_decoder { bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; - uint64_t sign_bit; - uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; uint64_t tot_insn_cnt; @@ -140,6 +159,7 @@ struct intel_pt_decoder { unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -193,9 +213,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; - decoder->sign_bit = (uint64_t)1 << 47; - decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); - decoder->period = params->period; decoder->period_type = params->period_type; @@ -364,21 +381,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen) return 0; } -static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, - const struct intel_pt_pkt *packet, +static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, uint64_t last_ip) { uint64_t ip; switch (packet->count) { - case 2: + case 1: ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | packet->payload; break; - case 4: + case 2: ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | packet->payload; break; + case 3: + ip = packet->payload; + /* Sign-extend 6-byte ip */ + if (ip & (uint64_t)0x800000000000ULL) + ip |= (uint64_t)0xffff000000000000ULL; + break; + case 4: + ip = (last_ip & (uint64_t)0xffff000000000000ULL) | + packet->payload; + break; case 6: ip = packet->payload; break; @@ -386,16 +412,13 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, return 0; } - if (ip & decoder->sign_bit) - return ip | decoder->sign_bits; - return ip; } static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { - decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, - decoder->last_ip); + decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); + decoder->have_last_ip = true; } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -896,6 +919,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -1414,7 +1438,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - intel_pt_set_last_ip(decoder); + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); break; case INTEL_PT_MODE_TSX: @@ -1618,6 +1643,8 @@ next: break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); if (err == -EAGAIN) @@ -1696,6 +1723,13 @@ next: } } +static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) +{ + return decoder->packet.count && + (decoder->have_last_ip || decoder->packet.count == 3 || + decoder->packet.count == 6); +} + /* Walk PSB+ packets to get in sync. */ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { @@ -1717,8 +1751,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) { + if (intel_pt_have_ip(decoder)) { uint64_t current_ip = decoder->ip; intel_pt_set_ip(decoder); @@ -1810,24 +1843,17 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; break; case INTEL_PT_FUP: - if (decoder->overflow) { - if (decoder->last_ip || - decoder->packet.count == 6 || - decoder->packet.count == 0) - intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - } - if (decoder->packet.count) - intel_pt_set_last_ip(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; break; case INTEL_PT_MTC: @@ -1876,6 +1902,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) return err; @@ -1901,6 +1930,8 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) { int err; + decoder->set_fup_tx_flags = false; + intel_pt_log("Scanning for full IP\n"); err = intel_pt_walk_to_ip(decoder); if (err) @@ -2009,6 +2040,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; + decoder->have_last_ip = false; decoder->last_ip = 0; decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); @@ -2017,6 +2049,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (err) return err; + decoder->have_last_ip = true; decoder->pkt_state = INTEL_PT_STATE_NO_IP; err = intel_pt_walk_psb(decoder); @@ -2035,7 +2068,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2043,7 +2076,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2059,7 +2092,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_sync(decoder); break; case INTEL_PT_STATE_NO_IP: + decoder->have_last_ip = false; decoder->last_ip = 0; + decoder->ip = 0; /* Fall through */ case INTEL_PT_STATE_ERR_RESYNC: err = intel_pt_sync_ip(decoder); @@ -2096,15 +2131,24 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } } while (err == -ENOLINK); - decoder->state.err = err ? intel_pt_ext_err(err) : 0; - decoder->state.timestamp = decoder->timestamp; + if (err) { + decoder->state.err = intel_pt_ext_err(err); + decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } else { + decoder->state.err = 0; + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } + } + + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; - if (err) - decoder->state.from_ip = decoder->ip; - return &decoder->state; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 9b2fce25162b..7528ae4f7e28 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -293,36 +293,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { - switch (byte >> 5) { + int ip_len; + + packet->count = byte >> 5; + + switch (packet->count) { case 0: - packet->count = 0; + ip_len = 0; break; case 1: if (len < 3) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 2; + ip_len = 2; packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); break; case 2: if (len < 5) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 4; + ip_len = 4; packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); break; case 3: - case 6: + case 4: if (len < 7) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 6; + ip_len = 6; memcpy_le64(&packet->payload, buf + 1, 6); break; + case 6: + if (len < 9) + return INTEL_PT_NEED_MORE_BYTES; + ip_len = 8; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1)); + break; default: return INTEL_PT_BAD_PACKET; } packet->type = type; - return packet->count + 1; + return ip_len + 1; } static int intel_pt_get_mode(const unsigned char *buf, size_t len, diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 27ae382feb2d..7c97ecaeae48 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -488,6 +488,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) break; } else { int n = namesz + descsz; + + if (n > (int)sizeof(bf)) { + n = sizeof(bf); + pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n", + __func__, filename, nhdr.n_namesz, nhdr.n_descsz); + } if (read(fd, bf, n) != n) break; } diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index 10a21a958aaf..763f37fecfb8 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -138,9 +138,6 @@ static void chdir_to_tmpfs(void) if (chdir(cwd) != 0) err(1, "chdir to private tmpfs"); - - if (umount2(".", MNT_DETACH) != 0) - err(1, "detach private tmpfs"); } static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) @@ -248,7 +245,7 @@ static int do_tests(int uid, const char *our_path) err(1, "chown"); if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) err(1, "chmod"); -} + } capng_get_caps_process(); @@ -384,7 +381,7 @@ static int do_tests(int uid, const char *our_path) } else { printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); exec_other_validate_cap("./validate_cap_sgidnonroot", - false, false, true, false); + false, false, true, false); if (fork_wait()) { printf("[RUN]\tNon-root +ia, sgidroot => i\n"); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 1dd087da6f31..111e09c3f4bf 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -47,6 +47,22 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) return vfio_group; } +static bool kvm_vfio_external_group_match_file(struct vfio_group *group, + struct file *filep) +{ + bool ret, (*fn)(struct vfio_group *, struct file *); + + fn = symbol_get(vfio_external_group_match_file); + if (!fn) + return false; + + ret = fn(group, filep); + + symbol_put(vfio_external_group_match_file); + + return ret; +} + static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) { void (*fn)(struct vfio_group *); @@ -171,18 +187,13 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) if (!f.file) return -EBADF; - vfio_group = kvm_vfio_group_get_external_user(f.file); - fdput(f); - - if (IS_ERR(vfio_group)) - return PTR_ERR(vfio_group); - ret = -ENOENT; mutex_lock(&kv->lock); list_for_each_entry(kvg, &kv->group_list, node) { - if (kvg->vfio_group != vfio_group) + if (!kvm_vfio_external_group_match_file(kvg->vfio_group, + f.file)) continue; list_del(&kvg->node); @@ -196,7 +207,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); - kvm_vfio_group_put_external_user(vfio_group); + fdput(f); kvm_vfio_update_coherency(dev); |