summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/armv7m_systick.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/st,stm32-timer.txt22
-rw-r--r--Kbuild34
-rw-r--r--arch/s390/include/asm/timex.h5
-rw-r--r--arch/s390/kernel/debug.c11
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c5
-rw-r--r--drivers/clocksource/Kconfig17
-rw-r--r--drivers/clocksource/Makefile3
-rw-r--r--drivers/clocksource/armv7m_systick.c79
-rw-r--r--drivers/clocksource/asm9260_timer.c2
-rw-r--r--drivers/clocksource/exynos_mct.c22
-rw-r--r--drivers/clocksource/qcom-timer.c59
-rw-r--r--drivers/clocksource/time-lpc32xx.c272
-rw-r--r--drivers/clocksource/timer-integrator-ap.c2
-rw-r--r--drivers/clocksource/timer-stm32.c184
-rw-r--r--drivers/clocksource/timer-sun5i.c2
-rw-r--r--drivers/power/reset/ltc2952-poweroff.c1
-rw-r--r--fs/dcache.c16
-rw-r--r--include/linux/alarmtimer.h4
-rw-r--r--include/linux/clockchips.h37
-rw-r--r--include/linux/clocksource.h1
-rw-r--r--include/linux/hrtimer.h167
-rw-r--r--include/linux/interrupt.h9
-rw-r--r--include/linux/jiffies.h130
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--include/linux/rcupdate.h6
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/sched/sysctl.h12
-rw-r--r--include/linux/seqlock.h47
-rw-r--r--include/linux/time64.h2
-rw-r--r--include/linux/timekeeper_internal.h19
-rw-r--r--include/linux/timekeeping.h2
-rw-r--r--include/linux/timer.h63
-rw-r--r--include/linux/timerqueue.h8
-rw-r--r--include/trace/events/timer.h12
-rw-r--r--kernel/events/core.c117
-rw-r--r--kernel/futex.c5
-rw-r--r--kernel/locking/rtmutex.c5
-rw-r--r--kernel/rcu/tree_plugin.h18
-rw-r--r--kernel/sched/core.c44
-rw-r--r--kernel/sched/deadline.c12
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/fair.c76
-rw-r--r--kernel/sched/rt.c22
-rw-r--r--kernel/sched/sched.h7
-rw-r--r--kernel/sysctl.c18
-rw-r--r--kernel/time/Makefile17
-rw-r--r--kernel/time/alarmtimer.c17
-rw-r--r--kernel/time/clockevents.c67
-rw-r--r--kernel/time/clocksource.c24
-rw-r--r--kernel/time/hrtimer.c699
-rw-r--r--kernel/time/ntp.c61
-rw-r--r--kernel/time/ntp_internal.h1
-rw-r--r--kernel/time/posix-timers.c17
-rw-r--r--kernel/time/tick-broadcast-hrtimer.c18
-rw-r--r--kernel/time/tick-broadcast.c97
-rw-r--r--kernel/time/tick-common.c18
-rw-r--r--kernel/time/tick-internal.h31
-rw-r--r--kernel/time/tick-oneshot.c22
-rw-r--r--kernel/time/tick-sched.c320
-rw-r--r--kernel/time/tick-sched.h2
-rw-r--r--kernel/time/time.c78
-rw-r--r--kernel/time/timeconst.bc3
-rw-r--r--kernel/time/timekeeping.c153
-rw-r--r--kernel/time/timekeeping.h11
-rw-r--r--kernel/time/timer.c362
-rw-r--r--kernel/time/timer_list.c51
-rw-r--r--kernel/time/timer_stats.c10
-rw-r--r--lib/timerqueue.c10
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/sched/sch_api.c5
-rw-r--r--sound/core/hrtimer.c9
-rw-r--r--sound/drivers/pcsp/pcsp.c17
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c77
78 files changed, 2292 insertions, 1563 deletions
diff --git a/Documentation/devicetree/bindings/arm/armv7m_systick.txt b/Documentation/devicetree/bindings/arm/armv7m_systick.txt
new file mode 100644
index 000000000000..7cf4a24601eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armv7m_systick.txt
@@ -0,0 +1,26 @@
+* ARMv7M System Timer
+
+ARMv7-M includes a system timer, known as SysTick. Current driver only
+implements the clocksource feature.
+
+Required properties:
+- compatible : Should be "arm,armv7m-systick"
+- reg : The address range of the timer
+
+Required clocking property, have to be one of:
+- clocks : The input clock of the timer
+- clock-frequency : The rate in HZ in input of the ARM SysTick
+
+Examples:
+
+systick: timer@e000e010 {
+ compatible = "arm,armv7m-systick";
+ reg = <0xe000e010 0x10>;
+ clocks = <&clk_systick>;
+};
+
+systick: timer@e000e010 {
+ compatible = "arm,armv7m-systick";
+ reg = <0xe000e010 0x10>;
+ clock-frequency = <90000000>;
+};
diff --git a/Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt b/Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt
new file mode 100644
index 000000000000..51b05a0e70d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt
@@ -0,0 +1,26 @@
+* NXP LPC3220 timer
+
+The NXP LPC3220 timer is used on a wide range of NXP SoCs. This
+includes LPC32xx, LPC178x, LPC18xx and LPC43xx parts.
+
+Required properties:
+- compatible:
+ Should be "nxp,lpc3220-timer".
+- reg:
+ Address and length of the register set.
+- interrupts:
+ Reference to the timer interrupt
+- clocks:
+ Should contain a reference to timer clock.
+- clock-names:
+ Should contain "timerclk".
+
+Example:
+
+timer1: timer@40085000 {
+ compatible = "nxp,lpc3220-timer";
+ reg = <0x40085000 0x1000>;
+ interrupts = <13>;
+ clocks = <&ccu1 CLK_CPU_TIMER1>;
+ clock-names = "timerclk";
+};
diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.txt b/Documentation/devicetree/bindings/timer/st,stm32-timer.txt
new file mode 100644
index 000000000000..8ef28e70d6e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/st,stm32-timer.txt
@@ -0,0 +1,22 @@
+. STMicroelectronics STM32 timer
+
+The STM32 MCUs family has several general-purpose 16 and 32 bits timers.
+
+Required properties:
+- compatible : Should be "st,stm32-timer"
+- reg : Address and length of the register set
+- clocks : Reference on the timer input clock
+- interrupts : Reference to the timer interrupt
+
+Optional properties:
+- resets: Reference to a reset controller asserting the timer
+
+Example:
+
+timer5: timer@40000c00 {
+ compatible = "st,stm32-timer";
+ reg = <0x40000c00 0x400>;
+ interrupts = <50>;
+ resets = <&rrc 259>;
+ clocks = <&clk_pmtr1>;
+};
diff --git a/Kbuild b/Kbuild
index 6f0d82a9245d..df99a5f53beb 100644
--- a/Kbuild
+++ b/Kbuild
@@ -2,8 +2,9 @@
# Kbuild for top-level directory of the kernel
# This file takes care of the following:
# 1) Generate bounds.h
-# 2) Generate asm-offsets.h (may need bounds.h)
-# 3) Check for missing system calls
+# 2) Generate timeconst.h
+# 3) Generate asm-offsets.h (may need bounds.h and timeconst.h)
+# 4) Check for missing system calls
# Default sed regexp - multiline due to syntax constraints
define sed-y
@@ -47,7 +48,26 @@ $(obj)/$(bounds-file): kernel/bounds.s FORCE
$(call filechk,offsets,__LINUX_BOUNDS_H__)
#####
-# 2) Generate asm-offsets.h
+# 2) Generate timeconst.h
+
+timeconst-file := include/generated/timeconst.h
+
+#always += $(timeconst-file)
+targets += $(timeconst-file)
+
+quiet_cmd_gentimeconst = GEN $@
+define cmd_gentimeconst
+ (echo $(CONFIG_HZ) | bc -q $< ) > $@
+endef
+define filechk_gentimeconst
+ (echo $(CONFIG_HZ) | bc -q $< )
+endef
+
+$(obj)/$(timeconst-file): kernel/time/timeconst.bc FORCE
+ $(call filechk,gentimeconst)
+
+#####
+# 3) Generate asm-offsets.h
#
offsets-file := include/generated/asm-offsets.h
@@ -57,7 +77,7 @@ targets += arch/$(SRCARCH)/kernel/asm-offsets.s
# We use internal kbuild rules to avoid the "is up to date" message from make
arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \
- $(obj)/$(bounds-file) FORCE
+ $(obj)/$(timeconst-file) $(obj)/$(bounds-file) FORCE
$(Q)mkdir -p $(dir $@)
$(call if_changed_dep,cc_s_c)
@@ -65,7 +85,7 @@ $(obj)/$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
$(call filechk,offsets,__ASM_OFFSETS_H__)
#####
-# 3) Check for missing system calls
+# 4) Check for missing system calls
#
always += missing-syscalls
@@ -77,5 +97,5 @@ quiet_cmd_syscalls = CALL $<
missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE
$(call cmd,syscalls)
-# Keep these two files during make clean
-no-clean-files := $(bounds-file) $(offsets-file)
+# Keep these three files during make clean
+no-clean-files := $(bounds-file) $(offsets-file) $(timeconst-file)
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 98eb2a579223..dcb6312a0b91 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,6 +10,7 @@
#define _ASM_S390_TIMEX_H
#include <asm/lowcore.h>
+#include <linux/time64.h>
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -108,10 +109,10 @@ int get_sync_clock(unsigned long long *clock);
void init_cpu_timer(void);
unsigned long long monotonic_clock(void);
-void tod_to_timeval(__u64, struct timespec *);
+void tod_to_timeval(__u64 todval, struct timespec64 *xt);
static inline
-void stck_to_timespec(unsigned long long stck, struct timespec *ts)
+void stck_to_timespec64(unsigned long long stck, struct timespec64 *ts)
{
tod_to_timeval(stck - TOD_UNIX_EPOCH, ts);
}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c1f21aca76e7..6fca0e46464e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1457,23 +1457,24 @@ int
debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
int area, debug_entry_t * entry, char *out_buf)
{
- struct timespec time_spec;
+ struct timespec64 time_spec;
char *except_str;
unsigned long caller;
int rc = 0;
unsigned int level;
level = entry->id.fields.level;
- stck_to_timespec(entry->id.stck, &time_spec);
+ stck_to_timespec64(entry->id.stck, &time_spec);
if (entry->id.fields.exception)
except_str = "*";
else
except_str = "-";
caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
- rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ",
- area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level,
- except_str, entry->id.fields.cpuid, (void *) caller);
+ rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ",
+ area, (long long)time_spec.tv_sec,
+ time_spec.tv_nsec / 1000, level, except_str,
+ entry->id.fields.cpuid, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 170ddd2018b3..9e733d965e08 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -76,7 +76,7 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);
-void tod_to_timeval(__u64 todval, struct timespec *xt)
+void tod_to_timeval(__u64 todval, struct timespec64 *xt)
{
unsigned long long sec;
@@ -181,12 +181,12 @@ static void timing_alert_interrupt(struct ext_code ext_code,
static void etr_reset(void);
static void stp_reset(void);
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
}
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
{
tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 358c54ad20d4..5cbd4e64feb5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -204,9 +204,8 @@ again:
static void rapl_start_hrtimer(struct rapl_pmu *pmu)
{
- __hrtimer_start_range_ns(&pmu->hrtimer,
- pmu->timer_interval, 0,
- HRTIMER_MODE_REL_PINNED, 0);
+ hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
+ HRTIMER_MODE_REL_PINNED);
}
static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 7c1de1610178..21b5e38c921b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -233,9 +233,8 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
{
- __hrtimer_start_range_ns(&box->hrtimer,
- ns_to_ktime(box->hrtimer_duration), 0,
- HRTIMER_MODE_REL_PINNED, 0);
+ hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
+ HRTIMER_MODE_REL_PINNED);
}
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 51d7865fdddb..32164ba3d36a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -106,6 +106,16 @@ config CLKSRC_EFM32
Support to use the timers of EFM32 SoCs as clock source and clock
event device.
+config CLKSRC_LPC32XX
+ bool
+ select CLKSRC_MMIO
+ select CLKSRC_OF
+
+config CLKSRC_STM32
+ bool "Clocksource for STM32 SoCs" if !ARCH_STM32
+ depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST)
+ select CLKSRC_MMIO
+
config ARM_ARCH_TIMER
bool
select CLKSRC_OF if OF
@@ -139,6 +149,13 @@ config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
help
Use ARM global timer clock source as sched_clock
+config ARMV7M_SYSTICK
+ bool
+ select CLKSRC_OF if OF
+ select CLKSRC_MMIO
+ help
+ This options enables support for the ARMv7M system timer unit
+
config ATMEL_PIT
select CLKSRC_OF if OF
def_bool SOC_AT91SAM9 || SOC_SAMA5
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 5b85f6adb258..1831a588b988 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -36,7 +36,9 @@ obj-$(CONFIG_ARCH_NSPIRE) += zevio-timer.o
obj-$(CONFIG_ARCH_BCM_MOBILE) += bcm_kona_timer.o
obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o
obj-$(CONFIG_CLKSRC_EFM32) += time-efm32.o
+obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o
obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o
+obj-$(CONFIG_CLKSRC_LPC32XX) += time-lpc32xx.o
obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o
obj-$(CONFIG_FSL_FTM_TIMER) += fsl_ftm_timer.o
obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o
@@ -45,6 +47,7 @@ obj-$(CONFIG_MTK_TIMER) += mtk_timer.o
obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o
obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o
+obj-$(CONFIG_ARMV7M_SYSTICK) += armv7m_systick.o
obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o
obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o
obj-$(CONFIG_ARCH_KEYSTONE) += timer-keystone.o
diff --git a/drivers/clocksource/armv7m_systick.c b/drivers/clocksource/armv7m_systick.c
new file mode 100644
index 000000000000..addfd2c64f54
--- /dev/null
+++ b/drivers/clocksource/armv7m_systick.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * License terms: GNU General Public License (GPL), version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+
+#define SYST_CSR 0x00
+#define SYST_RVR 0x04
+#define SYST_CVR 0x08
+#define SYST_CALIB 0x0c
+
+#define SYST_CSR_ENABLE BIT(0)
+
+#define SYSTICK_LOAD_RELOAD_MASK 0x00FFFFFF
+
+static void __init system_timer_of_register(struct device_node *np)
+{
+ struct clk *clk = NULL;
+ void __iomem *base;
+ u32 rate;
+ int ret;
+
+ base = of_iomap(np, 0);
+ if (!base) {
+ pr_warn("system-timer: invalid base address\n");
+ return;
+ }
+
+ ret = of_property_read_u32(np, "clock-frequency", &rate);
+ if (ret) {
+ clk = of_clk_get(np, 0);
+ if (IS_ERR(clk))
+ goto out_unmap;
+
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ goto out_clk_put;
+
+ rate = clk_get_rate(clk);
+ if (!rate)
+ goto out_clk_disable;
+ }
+
+ writel_relaxed(SYSTICK_LOAD_RELOAD_MASK, base + SYST_RVR);
+ writel_relaxed(SYST_CSR_ENABLE, base + SYST_CSR);
+
+ ret = clocksource_mmio_init(base + SYST_CVR, "arm_system_timer", rate,
+ 200, 24, clocksource_mmio_readl_down);
+ if (ret) {
+ pr_err("failed to init clocksource (%d)\n", ret);
+ if (clk)
+ goto out_clk_disable;
+ else
+ goto out_unmap;
+ }
+
+ pr_info("ARM System timer initialized as clocksource\n");
+
+ return;
+
+out_clk_disable:
+ clk_disable_unprepare(clk);
+out_clk_put:
+ clk_put(clk);
+out_unmap:
+ iounmap(base);
+ pr_warn("ARM System timer register failed (%d)\n", ret);
+}
+
+CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick",
+ system_timer_of_register);
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index 2c9c993727c8..4c2ba59897e8 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -178,7 +178,7 @@ static void __init asm9260_timer_init(struct device_node *np)
unsigned long rate;
priv.base = of_io_request_and_map(np, 0, np->name);
- if (!priv.base)
+ if (IS_ERR(priv.base))
panic("%s: unable to map resource", np->name);
clk = of_clk_get(np, 0);
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 83564c9cfdbe..935b05936dbd 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -209,7 +209,7 @@ static void exynos4_frc_resume(struct clocksource *cs)
exynos4_mct_frc_start();
}
-struct clocksource mct_frc = {
+static struct clocksource mct_frc = {
.name = "mct-frc",
.rating = 400,
.read = exynos4_frc_read,
@@ -413,7 +413,7 @@ static inline void exynos4_tick_set_mode(enum clock_event_mode mode,
}
}
-static int exynos4_mct_tick_clear(struct mct_clock_event_device *mevt)
+static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt)
{
struct clock_event_device *evt = &mevt->evt;
@@ -426,12 +426,8 @@ static int exynos4_mct_tick_clear(struct mct_clock_event_device *mevt)
exynos4_mct_tick_stop(mevt);
/* Clear the MCT tick interrupt */
- if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) {
+ if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1)
exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET);
- return 1;
- } else {
- return 0;
- }
}
static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id)
@@ -564,18 +560,6 @@ out_irq:
free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick);
}
-void __init mct_init(void __iomem *base, int irq_g0, int irq_l0, int irq_l1)
-{
- mct_irqs[MCT_G0_IRQ] = irq_g0;
- mct_irqs[MCT_L0_IRQ] = irq_l0;
- mct_irqs[MCT_L1_IRQ] = irq_l1;
- mct_int_type = MCT_INT_SPI;
-
- exynos4_timer_resources(NULL, base);
- exynos4_clocksource_init();
- exynos4_clockevent_init();
-}
-
static void __init mct_init_dt(struct device_node *np, unsigned int int_type)
{
u32 nr_irqs, i;
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c
index 098c542e5c53..cba2d015564c 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/qcom-timer.c
@@ -40,8 +40,6 @@
#define GPT_HZ 32768
-#define MSM_DGT_SHIFT 5
-
static void __iomem *event_base;
static void __iomem *sts_base;
@@ -232,7 +230,6 @@ err:
register_current_timer_delay(&msm_delay_timer);
}
-#ifdef CONFIG_ARCH_QCOM
static void __init msm_dt_timer_init(struct device_node *np)
{
u32 freq;
@@ -285,59 +282,3 @@ static void __init msm_dt_timer_init(struct device_node *np)
}
CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
-#else
-
-static int __init msm_timer_map(phys_addr_t addr, u32 event, u32 source,
- u32 sts)
-{
- void __iomem *base;
-
- base = ioremap(addr, SZ_256);
- if (!base) {
- pr_err("Failed to map timer base\n");
- return -ENOMEM;
- }
- event_base = base + event;
- source_base = base + source;
- if (sts)
- sts_base = base + sts;
-
- return 0;
-}
-
-static notrace cycle_t msm_read_timer_count_shift(struct clocksource *cs)
-{
- /*
- * Shift timer count down by a constant due to unreliable lower bits
- * on some targets.
- */
- return msm_read_timer_count(cs) >> MSM_DGT_SHIFT;
-}
-
-void __init msm7x01_timer_init(void)
-{
- struct clocksource *cs = &msm_clocksource;
-
- if (msm_timer_map(0xc0100000, 0x0, 0x10, 0x0))
- return;
- cs->read = msm_read_timer_count_shift;
- cs->mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT));
- /* 600 KHz */
- msm_timer_init(19200000 >> MSM_DGT_SHIFT, 32 - MSM_DGT_SHIFT, 7,
- false);
-}
-
-void __init msm7x30_timer_init(void)
-{
- if (msm_timer_map(0xc0100000, 0x4, 0x24, 0x80))
- return;
- msm_timer_init(24576000 / 4, 32, 1, false);
-}
-
-void __init qsd8x50_timer_init(void)
-{
- if (msm_timer_map(0xAC100000, 0x0, 0x10, 0x34))
- return;
- msm_timer_init(19200000 / 4, 32, 7, false);
-}
-#endif
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
new file mode 100644
index 000000000000..a1c06a2bc77c
--- /dev/null
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -0,0 +1,272 @@
+/*
+ * Clocksource driver for NXP LPC32xx/18xx/43xx timer
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * Based on:
+ * time-efm32 Copyright (C) 2013 Pengutronix
+ * mach-lpc32xx/timer.c Copyright (C) 2009 - 2010 NXP Semiconductors
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+
+#define LPC32XX_TIMER_IR 0x000
+#define LPC32XX_TIMER_IR_MR0INT BIT(0)
+#define LPC32XX_TIMER_TCR 0x004
+#define LPC32XX_TIMER_TCR_CEN BIT(0)
+#define LPC32XX_TIMER_TCR_CRST BIT(1)
+#define LPC32XX_TIMER_TC 0x008
+#define LPC32XX_TIMER_PR 0x00c
+#define LPC32XX_TIMER_MCR 0x014
+#define LPC32XX_TIMER_MCR_MR0I BIT(0)
+#define LPC32XX_TIMER_MCR_MR0R BIT(1)
+#define LPC32XX_TIMER_MCR_MR0S BIT(2)
+#define LPC32XX_TIMER_MR0 0x018
+#define LPC32XX_TIMER_CTCR 0x070
+
+struct lpc32xx_clock_event_ddata {
+ struct clock_event_device evtdev;
+ void __iomem *base;
+};
+
+/* Needed for the sched clock */
+static void __iomem *clocksource_timer_counter;
+
+static u64 notrace lpc32xx_read_sched_clock(void)
+{
+ return readl(clocksource_timer_counter);
+}
+
+static int lpc32xx_clkevt_next_event(unsigned long delta,
+ struct clock_event_device *evtdev)
+{
+ struct lpc32xx_clock_event_ddata *ddata =
+ container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
+
+ /*
+ * Place timer in reset and program the delta in the prescale
+ * register (PR). When the prescale counter matches the value
+ * in PR the counter register is incremented and the compare
+ * match will trigger. After setup the timer is released from
+ * reset and enabled.
+ */
+ writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR);
+ writel_relaxed(delta, ddata->base + LPC32XX_TIMER_PR);
+ writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR);
+
+ return 0;
+}
+
+static int lpc32xx_clkevt_shutdown(struct clock_event_device *evtdev)
+{
+ struct lpc32xx_clock_event_ddata *ddata =
+ container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
+
+ /* Disable the timer */
+ writel_relaxed(0, ddata->base + LPC32XX_TIMER_TCR);
+
+ return 0;
+}
+
+static int lpc32xx_clkevt_oneshot(struct clock_event_device *evtdev)
+{
+ /*
+ * When using oneshot, we must also disable the timer
+ * to wait for the first call to set_next_event().
+ */
+ return lpc32xx_clkevt_shutdown(evtdev);
+}
+
+static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id)
+{
+ struct lpc32xx_clock_event_ddata *ddata = dev_id;
+
+ /* Clear match on channel 0 */
+ writel_relaxed(LPC32XX_TIMER_IR_MR0INT, ddata->base + LPC32XX_TIMER_IR);
+
+ ddata->evtdev.event_handler(&ddata->evtdev);
+
+ return IRQ_HANDLED;
+}
+
+static struct lpc32xx_clock_event_ddata lpc32xx_clk_event_ddata = {
+ .evtdev = {
+ .name = "lpc3220 clockevent",
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .rating = 300,
+ .set_next_event = lpc32xx_clkevt_next_event,
+ .set_state_shutdown = lpc32xx_clkevt_shutdown,
+ .set_state_oneshot = lpc32xx_clkevt_oneshot,
+ },
+};
+
+static int __init lpc32xx_clocksource_init(struct device_node *np)
+{
+ void __iomem *base;
+ unsigned long rate;
+ struct clk *clk;
+ int ret;
+
+ clk = of_clk_get_by_name(np, "timerclk");
+ if (IS_ERR(clk)) {
+ pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
+ return PTR_ERR(clk);
+ }
+
+ ret = clk_prepare_enable(clk);
+ if (ret) {
+ pr_err("clock enable failed (%d)\n", ret);
+ goto err_clk_enable;
+ }
+
+ base = of_iomap(np, 0);
+ if (!base) {
+ pr_err("unable to map registers\n");
+ ret = -EADDRNOTAVAIL;
+ goto err_iomap;
+ }
+
+ /*
+ * Disable and reset timer then set it to free running timer
+ * mode (CTCR) with no prescaler (PR) or match operations (MCR).
+ * After setup the timer is released from reset and enabled.
+ */
+ writel_relaxed(LPC32XX_TIMER_TCR_CRST, base + LPC32XX_TIMER_TCR);
+ writel_relaxed(0, base + LPC32XX_TIMER_PR);
+ writel_relaxed(0, base + LPC32XX_TIMER_MCR);
+ writel_relaxed(0, base + LPC32XX_TIMER_CTCR);
+ writel_relaxed(LPC32XX_TIMER_TCR_CEN, base + LPC32XX_TIMER_TCR);
+
+ rate = clk_get_rate(clk);
+ ret = clocksource_mmio_init(base + LPC32XX_TIMER_TC, "lpc3220 timer",
+ rate, 300, 32, clocksource_mmio_readl_up);
+ if (ret) {
+ pr_err("failed to init clocksource (%d)\n", ret);
+ goto err_clocksource_init;
+ }
+
+ clocksource_timer_counter = base + LPC32XX_TIMER_TC;
+ sched_clock_register(lpc32xx_read_sched_clock, 32, rate);
+
+ return 0;
+
+err_clocksource_init:
+ iounmap(base);
+err_iomap:
+ clk_disable_unprepare(clk);
+err_clk_enable:
+ clk_put(clk);
+ return ret;
+}
+
+static int __init lpc32xx_clockevent_init(struct device_node *np)
+{
+ void __iomem *base;
+ unsigned long rate;
+ struct clk *clk;
+ int ret, irq;
+
+ clk = of_clk_get_by_name(np, "timerclk");
+ if (IS_ERR(clk)) {
+ pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
+ return PTR_ERR(clk);
+ }
+
+ ret = clk_prepare_enable(clk);
+ if (ret) {
+ pr_err("clock enable failed (%d)\n", ret);
+ goto err_clk_enable;
+ }
+
+ base = of_iomap(np, 0);
+ if (!base) {
+ pr_err("unable to map registers\n");
+ ret = -EADDRNOTAVAIL;
+ goto err_iomap;
+ }
+
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ pr_err("get irq failed\n");
+ ret = -ENOENT;
+ goto err_irq;
+ }
+
+ /*
+ * Disable timer and clear any pending interrupt (IR) on match
+ * channel 0 (MR0). Configure a compare match value of 1 on MR0
+ * and enable interrupt, reset on match and stop on match (MCR).
+ */
+ writel_relaxed(0, base + LPC32XX_TIMER_TCR);
+ writel_relaxed(0, base + LPC32XX_TIMER_CTCR);
+ writel_relaxed(LPC32XX_TIMER_IR_MR0INT, base + LPC32XX_TIMER_IR);
+ writel_relaxed(1, base + LPC32XX_TIMER_MR0);
+ writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R |
+ LPC32XX_TIMER_MCR_MR0S, base + LPC32XX_TIMER_MCR);
+
+ rate = clk_get_rate(clk);
+ lpc32xx_clk_event_ddata.base = base;
+ clockevents_config_and_register(&lpc32xx_clk_event_ddata.evtdev,
+ rate, 1, -1);
+
+ ret = request_irq(irq, lpc32xx_clock_event_handler,
+ IRQF_TIMER | IRQF_IRQPOLL, "lpc3220 clockevent",
+ &lpc32xx_clk_event_ddata);
+ if (ret) {
+ pr_err("request irq failed\n");
+ goto err_irq;
+ }
+
+ return 0;
+
+err_irq:
+ iounmap(base);
+err_iomap:
+ clk_disable_unprepare(clk);
+err_clk_enable:
+ clk_put(clk);
+ return ret;
+}
+
+/*
+ * This function asserts that we have exactly one clocksource and one
+ * clock_event_device in the end.
+ */
+static void __init lpc32xx_timer_init(struct device_node *np)
+{
+ static int has_clocksource, has_clockevent;
+ int ret;
+
+ if (!has_clocksource) {
+ ret = lpc32xx_clocksource_init(np);
+ if (!ret) {
+ has_clocksource = 1;
+ return;
+ }
+ }
+
+ if (!has_clockevent) {
+ ret = lpc32xx_clockevent_init(np);
+ if (!ret) {
+ has_clockevent = 1;
+ return;
+ }
+ }
+}
+CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index b9efd30513d5..c97d1980c0f8 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -166,7 +166,7 @@ static void __init integrator_ap_timer_init_of(struct device_node *node)
struct device_node *sec_node;
base = of_io_request_and_map(node, 0, "integrator-timer");
- if (!base)
+ if (IS_ERR(base))
return;
clk = of_clk_get(node, 0);
diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
new file mode 100644
index 000000000000..a97e8b50701c
--- /dev/null
+++ b/drivers/clocksource/timer-stm32.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * License terms: GNU General Public License (GPL), version 2
+ *
+ * Inspired by time-efm32.c from Uwe Kleine-Koenig
+ */
+
+#include <linux/kernel.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/clk.h>
+#include <linux/reset.h>
+
+#define TIM_CR1 0x00
+#define TIM_DIER 0x0c
+#define TIM_SR 0x10
+#define TIM_EGR 0x14
+#define TIM_PSC 0x28
+#define TIM_ARR 0x2c
+
+#define TIM_CR1_CEN BIT(0)
+#define TIM_CR1_OPM BIT(3)
+#define TIM_CR1_ARPE BIT(7)
+
+#define TIM_DIER_UIE BIT(0)
+
+#define TIM_SR_UIF BIT(0)
+
+#define TIM_EGR_UG BIT(0)
+
+struct stm32_clock_event_ddata {
+ struct clock_event_device evtdev;
+ unsigned periodic_top;
+ void __iomem *base;
+};
+
+static void stm32_clock_event_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evtdev)
+{
+ struct stm32_clock_event_ddata *data =
+ container_of(evtdev, struct stm32_clock_event_ddata, evtdev);
+ void *base = data->base;
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ writel_relaxed(data->periodic_top, base + TIM_ARR);
+ writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, base + TIM_CR1);
+ break;
+
+ case CLOCK_EVT_MODE_ONESHOT:
+ default:
+ writel_relaxed(0, base + TIM_CR1);
+ break;
+ }
+}
+
+static int stm32_clock_event_set_next_event(unsigned long evt,
+ struct clock_event_device *evtdev)
+{
+ struct stm32_clock_event_ddata *data =
+ container_of(evtdev, struct stm32_clock_event_ddata, evtdev);
+
+ writel_relaxed(evt, data->base + TIM_ARR);
+ writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN,
+ data->base + TIM_CR1);
+
+ return 0;
+}
+
+static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id)
+{
+ struct stm32_clock_event_ddata *data = dev_id;
+
+ writel_relaxed(0, data->base + TIM_SR);
+
+ data->evtdev.event_handler(&data->evtdev);
+
+ return IRQ_HANDLED;
+}
+
+static struct stm32_clock_event_ddata clock_event_ddata = {
+ .evtdev = {
+ .name = "stm32 clockevent",
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+ .set_mode = stm32_clock_event_set_mode,
+ .set_next_event = stm32_clock_event_set_next_event,
+ .rating = 200,
+ },
+};
+
+static void __init stm32_clockevent_init(struct device_node *np)
+{
+ struct stm32_clock_event_ddata *data = &clock_event_ddata;
+ struct clk *clk;
+ struct reset_control *rstc;
+ unsigned long rate, max_delta;
+ int irq, ret, bits, prescaler = 1;
+
+ clk = of_clk_get(np, 0);
+ if (IS_ERR(clk)) {
+ ret = PTR_ERR(clk);
+ pr_err("failed to get clock for clockevent (%d)\n", ret);
+ goto err_clk_get;
+ }
+
+ ret = clk_prepare_enable(clk);
+ if (ret) {
+ pr_err("failed to enable timer clock for clockevent (%d)\n",
+ ret);
+ goto err_clk_enable;
+ }
+
+ rate = clk_get_rate(clk);
+
+ rstc = of_reset_control_get(np, NULL);
+ if (!IS_ERR(rstc)) {
+ reset_control_assert(rstc);
+ reset_control_deassert(rstc);
+ }
+
+ data->base = of_iomap(np, 0);
+ if (!data->base) {
+ pr_err("failed to map registers for clockevent\n");
+ goto err_iomap;
+ }
+
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ pr_err("%s: failed to get irq.\n", np->full_name);
+ goto err_get_irq;
+ }
+
+ /* Detect whether the timer is 16 or 32 bits */
+ writel_relaxed(~0U, data->base + TIM_ARR);
+ max_delta = readl_relaxed(data->base + TIM_ARR);
+ if (max_delta == ~0U) {
+ prescaler = 1;
+ bits = 32;
+ } else {
+ prescaler = 1024;
+ bits = 16;
+ }
+ writel_relaxed(0, data->base + TIM_ARR);
+
+ writel_relaxed(prescaler - 1, data->base + TIM_PSC);
+ writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR);
+ writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER);
+ writel_relaxed(0, data->base + TIM_SR);
+
+ data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ);
+
+ clockevents_config_and_register(&data->evtdev,
+ DIV_ROUND_CLOSEST(rate, prescaler),
+ 0x1, max_delta);
+
+ ret = request_irq(irq, stm32_clock_event_handler, IRQF_TIMER,
+ "stm32 clockevent", data);
+ if (ret) {
+ pr_err("%s: failed to request irq.\n", np->full_name);
+ goto err_get_irq;
+ }
+
+ pr_info("%s: STM32 clockevent driver initialized (%d bits)\n",
+ np->full_name, bits);
+
+ return;
+
+err_get_irq:
+ iounmap(data->base);
+err_iomap:
+ clk_disable_unprepare(clk);
+err_clk_enable:
+ clk_put(clk);
+err_clk_get:
+ return;
+}
+
+CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 28aa4b7bb602..0ffb4ea7c925 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -324,7 +324,7 @@ static void __init sun5i_timer_init(struct device_node *node)
int irq;
timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
- if (!timer_base)
+ if (IS_ERR(timer_base))
panic("Can't map registers");
irq = irq_of_parse_and_map(node, 0);
diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
index 1e08195551fe..5f855f99bdfc 100644
--- a/drivers/power/reset/ltc2952-poweroff.c
+++ b/drivers/power/reset/ltc2952-poweroff.c
@@ -158,7 +158,6 @@ static irqreturn_t ltc2952_poweroff_handler(int irq, void *dev_id)
HRTIMER_MODE_REL);
} else {
hrtimer_cancel(&data->timer_trigger);
- /* omitting return value check, timer should have been valid */
}
return IRQ_HANDLED;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 37b5afdaf698..592c4b582495 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -322,17 +322,17 @@ static void dentry_free(struct dentry *dentry)
}
/**
- * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups
* @dentry: the target dentry
* After this call, in-progress rcu-walk path lookup will fail. This
* should be called after unhashing, and after changing d_inode (if
* the dentry has not already been unhashed).
*/
-static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+static inline void dentry_rcuwalk_invalidate(struct dentry *dentry)
{
- assert_spin_locked(&dentry->d_lock);
- /* Go through a barrier */
- write_seqcount_barrier(&dentry->d_seq);
+ lockdep_assert_held(&dentry->d_lock);
+ /* Go through am invalidation barrier */
+ write_seqcount_invalidate(&dentry->d_seq);
}
/*
@@ -372,7 +372,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
__d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
- dentry_rcuwalk_barrier(dentry);
+ dentry_rcuwalk_invalidate(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -494,7 +494,7 @@ void __d_drop(struct dentry *dentry)
__hlist_bl_del(&dentry->d_hash);
dentry->d_hash.pprev = NULL;
hlist_bl_unlock(b);
- dentry_rcuwalk_barrier(dentry);
+ dentry_rcuwalk_invalidate(dentry);
}
}
EXPORT_SYMBOL(__d_drop);
@@ -1752,7 +1752,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
if (inode)
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
__d_set_inode_and_type(dentry, inode, add_flags);
- dentry_rcuwalk_barrier(dentry);
+ dentry_rcuwalk_invalidate(dentry);
spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
}
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index a899402a5a0e..52f3b7da4f2d 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -43,8 +43,8 @@ struct alarm {
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
-int alarm_start(struct alarm *alarm, ktime_t start);
-int alarm_start_relative(struct alarm *alarm, ktime_t start);
+void alarm_start(struct alarm *alarm, ktime_t start);
+void alarm_start_relative(struct alarm *alarm, ktime_t start);
void alarm_restart(struct alarm *alarm);
int alarm_try_to_cancel(struct alarm *alarm);
int alarm_cancel(struct alarm *alarm);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 96c280b2c263..597a1e836f22 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -37,12 +37,15 @@ enum clock_event_mode {
* reached from DETACHED or SHUTDOWN.
* ONESHOT: Device is programmed to generate event only once. Can be reached
* from DETACHED or SHUTDOWN.
+ * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily
+ * stopped.
*/
enum clock_event_state {
CLOCK_EVT_STATE_DETACHED,
CLOCK_EVT_STATE_SHUTDOWN,
CLOCK_EVT_STATE_PERIODIC,
CLOCK_EVT_STATE_ONESHOT,
+ CLOCK_EVT_STATE_ONESHOT_STOPPED,
};
/*
@@ -84,12 +87,13 @@ enum clock_event_state {
* @mult: nanosecond to cycles multiplier
* @shift: nanoseconds to cycles divisor (power of two)
* @mode: operating mode, relevant only to ->set_mode(), OBSOLETE
- * @state: current state of the device, assigned by the core code
+ * @state_use_accessors:current state of the device, assigned by the core code
* @features: features
* @retries: number of forced programming retries
* @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
* @set_state_periodic: switch state to periodic, if !set_mode
* @set_state_oneshot: switch state to oneshot, if !set_mode
+ * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode
* @set_state_shutdown: switch state to shutdown, if !set_mode
* @tick_resume: resume clkevt device, if !set_mode
* @broadcast: function to broadcast events
@@ -113,7 +117,7 @@ struct clock_event_device {
u32 mult;
u32 shift;
enum clock_event_mode mode;
- enum clock_event_state state;
+ enum clock_event_state state_use_accessors;
unsigned int features;
unsigned long retries;
@@ -121,11 +125,12 @@ struct clock_event_device {
* State transition callback(s): Only one of the two groups should be
* defined:
* - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
- * - set_state_{shutdown|periodic|oneshot}(), tick_resume().
+ * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume().
*/
void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
int (*set_state_periodic)(struct clock_event_device *);
int (*set_state_oneshot)(struct clock_event_device *);
+ int (*set_state_oneshot_stopped)(struct clock_event_device *);
int (*set_state_shutdown)(struct clock_event_device *);
int (*tick_resume)(struct clock_event_device *);
@@ -144,6 +149,32 @@ struct clock_event_device {
struct module *owner;
} ____cacheline_aligned;
+/* Helpers to verify state of a clockevent device */
+static inline bool clockevent_state_detached(struct clock_event_device *dev)
+{
+ return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED;
+}
+
+static inline bool clockevent_state_shutdown(struct clock_event_device *dev)
+{
+ return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN;
+}
+
+static inline bool clockevent_state_periodic(struct clock_event_device *dev)
+{
+ return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC;
+}
+
+static inline bool clockevent_state_oneshot(struct clock_event_device *dev)
+{
+ return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT;
+}
+
+static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev)
+{
+ return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED;
+}
+
/*
* Calculate a multiplication factor for scaled math, which is used to convert
* nanoseconds based values to clock ticks:
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d27d0152271f..278dd279a7a8 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -181,7 +181,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
extern int clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void);
-extern struct clocksource* clocksource_get_next(void);
extern void clocksource_change_rating(struct clocksource *cs, int rating);
extern void clocksource_suspend(void);
extern void clocksource_resume(void);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 05f6df1fdf5b..76dd4f0da5ca 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -53,34 +53,25 @@ enum hrtimer_restart {
*
* 0x00 inactive
* 0x01 enqueued into rbtree
- * 0x02 callback function running
- * 0x04 timer is migrated to another cpu
*
- * Special cases:
- * 0x03 callback function running and enqueued
- * (was requeued on another CPU)
- * 0x05 timer was migrated on CPU hotunplug
+ * The callback state is not part of the timer->state because clearing it would
+ * mean touching the timer after the callback, this makes it impossible to free
+ * the timer from the callback function.
*
- * The "callback function running and enqueued" status is only possible on
- * SMP. It happens for example when a posix timer expired and the callback
+ * Therefore we track the callback state in:
+ *
+ * timer->base->cpu_base->running == timer
+ *
+ * On SMP it is possible to have a "callback function running and enqueued"
+ * status. It happens for example when a posix timer expired and the callback
* queued a signal. Between dropping the lock which protects the posix timer
* and reacquiring the base lock of the hrtimer, another CPU can deliver the
- * signal and rearm the timer. We have to preserve the callback running state,
- * as otherwise the timer could be removed before the softirq code finishes the
- * the handling of the timer.
- *
- * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
- * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
- * also affects HRTIMER_STATE_MIGRATE where the preservation is not
- * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
- * enqueued on the new cpu.
+ * signal and rearm the timer.
*
* All state transitions are protected by cpu_base->lock.
*/
#define HRTIMER_STATE_INACTIVE 0x00
#define HRTIMER_STATE_ENQUEUED 0x01
-#define HRTIMER_STATE_CALLBACK 0x02
-#define HRTIMER_STATE_MIGRATE 0x04
/**
* struct hrtimer - the basic hrtimer structure
@@ -130,6 +121,12 @@ struct hrtimer_sleeper {
struct task_struct *task;
};
+#ifdef CONFIG_64BIT
+# define HRTIMER_CLOCK_BASE_ALIGN 64
+#else
+# define HRTIMER_CLOCK_BASE_ALIGN 32
+#endif
+
/**
* struct hrtimer_clock_base - the timer base for a specific clock
* @cpu_base: per cpu clock base
@@ -137,9 +134,7 @@ struct hrtimer_sleeper {
* timer to a base on another cpu.
* @clockid: clock id for per_cpu support
* @active: red black tree root node for the active timers
- * @resolution: the resolution of the clock, in nanoseconds
* @get_time: function to retrieve the current time of the clock
- * @softirq_time: the time when running the hrtimer queue in the softirq
* @offset: offset of this clock to the monotonic base
*/
struct hrtimer_clock_base {
@@ -147,11 +142,9 @@ struct hrtimer_clock_base {
int index;
clockid_t clockid;
struct timerqueue_head active;
- ktime_t resolution;
ktime_t (*get_time)(void);
- ktime_t softirq_time;
ktime_t offset;
-};
+} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
enum hrtimer_base_type {
HRTIMER_BASE_MONOTONIC,
@@ -165,11 +158,16 @@ enum hrtimer_base_type {
* struct hrtimer_cpu_base - the per cpu clock bases
* @lock: lock protecting the base and associated clock bases
* and timers
+ * @seq: seqcount around __run_hrtimer
+ * @running: pointer to the currently running hrtimer
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
- * @clock_was_set: Indicates that clock was set from irq context.
+ * @clock_was_set_seq: Sequence counter of clock was set events
+ * @migration_enabled: The migration of hrtimers to other cpus is enabled
+ * @nohz_active: The nohz functionality is enabled
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
+ * @next_timer: Pointer to the first expiring timer
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hres_active: State of high resolution mode
* @hang_detected: The last hrtimer interrupt detected a hang
@@ -178,27 +176,38 @@ enum hrtimer_base_type {
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
* @clock_base: array of clock bases for this cpu
+ *
+ * Note: next_timer is just an optimization for __remove_hrtimer().
+ * Do not dereference the pointer because it is not reliable on
+ * cross cpu removals.
*/
struct hrtimer_cpu_base {
raw_spinlock_t lock;
+ seqcount_t seq;
+ struct hrtimer *running;
unsigned int cpu;
unsigned int active_bases;
- unsigned int clock_was_set;
+ unsigned int clock_was_set_seq;
+ bool migration_enabled;
+ bool nohz_active;
#ifdef CONFIG_HIGH_RES_TIMERS
+ unsigned int in_hrtirq : 1,
+ hres_active : 1,
+ hang_detected : 1;
ktime_t expires_next;
- int in_hrtirq;
- int hres_active;
- int hang_detected;
- unsigned long nr_events;
- unsigned long nr_retries;
- unsigned long nr_hangs;
- ktime_t max_hang_time;
+ struct hrtimer *next_timer;
+ unsigned int nr_events;
+ unsigned int nr_retries;
+ unsigned int nr_hangs;
+ unsigned int max_hang_time;
#endif
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
-};
+} ____cacheline_aligned;
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
+ BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN);
+
timer->node.expires = time;
timer->_softexpires = time;
}
@@ -262,19 +271,16 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
return ktime_sub(timer->node.expires, timer->base->get_time());
}
-#ifdef CONFIG_HIGH_RES_TIMERS
-struct clock_event_device;
-
-extern void hrtimer_interrupt(struct clock_event_device *dev);
-
-/*
- * In high resolution mode the time reference must be read accurate
- */
static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
{
return timer->base->get_time();
}
+#ifdef CONFIG_HIGH_RES_TIMERS
+struct clock_event_device;
+
+extern void hrtimer_interrupt(struct clock_event_device *dev);
+
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
return timer->base->cpu_base->hres_active;
@@ -295,21 +301,16 @@ extern void hrtimer_peek_ahead_timers(void);
extern void clock_was_set_delayed(void);
+extern unsigned int hrtimer_resolution;
+
#else
# define MONOTONIC_RES_NSEC LOW_RES_NSEC
# define KTIME_MONOTONIC_RES KTIME_LOW_RES
-static inline void hrtimer_peek_ahead_timers(void) { }
+#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
-/*
- * In non high resolution mode the time reference is taken from
- * the base softirq time variable.
- */
-static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
-{
- return timer->base->softirq_time;
-}
+static inline void hrtimer_peek_ahead_timers(void) { }
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
@@ -353,49 +354,47 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
#endif
/* Basic timer operations: */
-extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
- const enum hrtimer_mode mode);
-extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
unsigned long range_ns, const enum hrtimer_mode mode);
-extern int
-__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long delta_ns,
- const enum hrtimer_mode mode, int wakeup);
+
+/**
+ * hrtimer_start - (re)start an hrtimer on the current CPU
+ * @timer: the timer to be added
+ * @tim: expiry time
+ * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL)
+ */
+static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
+ const enum hrtimer_mode mode)
+{
+ hrtimer_start_range_ns(timer, tim, 0, mode);
+}
extern int hrtimer_cancel(struct hrtimer *timer);
extern int hrtimer_try_to_cancel(struct hrtimer *timer);
-static inline int hrtimer_start_expires(struct hrtimer *timer,
- enum hrtimer_mode mode)
+static inline void hrtimer_start_expires(struct hrtimer *timer,
+ enum hrtimer_mode mode)
{
unsigned long delta;
ktime_t soft, hard;
soft = hrtimer_get_softexpires(timer);
hard = hrtimer_get_expires(timer);
delta = ktime_to_ns(ktime_sub(hard, soft));
- return hrtimer_start_range_ns(timer, soft, delta, mode);
+ hrtimer_start_range_ns(timer, soft, delta, mode);
}
-static inline int hrtimer_restart(struct hrtimer *timer)
+static inline void hrtimer_restart(struct hrtimer *timer)
{
- return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
/* Query timers: */
extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
-extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
-extern ktime_t hrtimer_get_next_event(void);
+extern u64 hrtimer_get_next_event(void);
-/*
- * A timer is active, when it is enqueued into the rbtree or the
- * callback function is running or it's in the state of being migrated
- * to another cpu.
- */
-static inline int hrtimer_active(const struct hrtimer *timer)
-{
- return timer->state != HRTIMER_STATE_INACTIVE;
-}
+extern bool hrtimer_active(const struct hrtimer *timer);
/*
* Helper function to check, whether the timer is on one of the queues
@@ -411,14 +410,29 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
*/
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
- return timer->state & HRTIMER_STATE_CALLBACK;
+ return timer->base->cpu_base->running == timer;
}
/* Forward a hrtimer so it expires after now: */
extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
-/* Forward a hrtimer so it expires after the hrtimer's current now */
+/**
+ * hrtimer_forward_now - forward the timer expiry so it expires after now
+ * @timer: hrtimer to forward
+ * @interval: the interval to forward
+ *
+ * Forward the timer expiry so it will expire after the current time
+ * of the hrtimer clock base. Returns the number of overruns.
+ *
+ * Can be safely called from the callback function of @timer. If
+ * called from other contexts @timer must neither be enqueued nor
+ * running the callback and the caller needs to take care of
+ * serialization.
+ *
+ * Note: This only updates the timer expiry value and does not requeue
+ * the timer.
+ */
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
{
@@ -443,7 +457,6 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);
-extern void hrtimer_run_pending(void);
/* Bootup initialization: */
extern void __init hrtimers_init(void);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 950ae4501826..be7e75c945e9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -413,7 +413,8 @@ enum
BLOCK_IOPOLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
- HRTIMER_SOFTIRQ,
+ HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
+ numbering. Sigh! */
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
@@ -592,10 +593,10 @@ tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
clockid_t which_clock, enum hrtimer_mode mode);
static inline
-int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
- const enum hrtimer_mode mode)
+void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
+ const enum hrtimer_mode mode)
{
- return hrtimer_start(&ttimer->timer, time, mode);
+ hrtimer_start(&ttimer->timer, time, mode);
}
static inline
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index c367cbdf73ab..535fd3bb1ba8 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -7,6 +7,7 @@
#include <linux/time.h>
#include <linux/timex.h>
#include <asm/param.h> /* for HZ */
+#include <generated/timeconst.h>
/*
* The following defines establish the engineering parameters of the PLL
@@ -288,8 +289,133 @@ static inline u64 jiffies_to_nsecs(const unsigned long j)
return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
}
-extern unsigned long msecs_to_jiffies(const unsigned int m);
-extern unsigned long usecs_to_jiffies(const unsigned int u);
+extern unsigned long __msecs_to_jiffies(const unsigned int m);
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+/*
+ * HZ is equal to or smaller than 1000, and 1000 is a nice round
+ * multiple of HZ, divide with the factor between them, but round
+ * upwards:
+ */
+static inline unsigned long _msecs_to_jiffies(const unsigned int m)
+{
+ return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+}
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+/*
+ * HZ is larger than 1000, and HZ is a nice round multiple of 1000 -
+ * simply multiply with the factor between them.
+ *
+ * But first make sure the multiplication result cannot overflow:
+ */
+static inline unsigned long _msecs_to_jiffies(const unsigned int m)
+{
+ if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+ return m * (HZ / MSEC_PER_SEC);
+}
+#else
+/*
+ * Generic case - multiply, round and divide. But first check that if
+ * we are doing a net multiplication, that we wouldn't overflow:
+ */
+static inline unsigned long _msecs_to_jiffies(const unsigned int m)
+{
+ if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+
+ return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32;
+}
+#endif
+/**
+ * msecs_to_jiffies: - convert milliseconds to jiffies
+ * @m: time in milliseconds
+ *
+ * conversion is done as follows:
+ *
+ * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
+ *
+ * - 'too large' values [that would result in larger than
+ * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ * the input value by a factor or dividing it with a factor and
+ * handling any 32-bit overflows.
+ * for the details see __msecs_to_jiffies()
+ *
+ * msecs_to_jiffies() checks for the passed in value being a constant
+ * via __builtin_constant_p() allowing gcc to eliminate most of the
+ * code, __msecs_to_jiffies() is called if the value passed does not
+ * allow constant folding and the actual conversion must be done at
+ * runtime.
+ * the HZ range specific helpers _msecs_to_jiffies() are called both
+ * directly here and from __msecs_to_jiffies() in the case where
+ * constant folding is not possible.
+ */
+static inline unsigned long msecs_to_jiffies(const unsigned int m)
+{
+ if (__builtin_constant_p(m)) {
+ if ((int)m < 0)
+ return MAX_JIFFY_OFFSET;
+ return _msecs_to_jiffies(m);
+ } else {
+ return __msecs_to_jiffies(m);
+ }
+}
+
+extern unsigned long __usecs_to_jiffies(const unsigned int u);
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+ return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
+}
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+ return u * (HZ / USEC_PER_SEC);
+}
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+#else
+static inline unsigned long _usecs_to_jiffies(const unsigned int u)
+{
+ return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
+ >> USEC_TO_HZ_SHR32;
+}
+#endif
+
+/**
+ * usecs_to_jiffies: - convert microseconds to jiffies
+ * @u: time in microseconds
+ *
+ * conversion is done as follows:
+ *
+ * - 'too large' values [that would result in larger than
+ * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ * the input value by a factor or dividing it with a factor and
+ * handling any 32-bit overflows as for msecs_to_jiffies.
+ *
+ * usecs_to_jiffies() checks for the passed in value being a constant
+ * via __builtin_constant_p() allowing gcc to eliminate most of the
+ * code, __usecs_to_jiffies() is called if the value passed does not
+ * allow constant folding and the actual conversion must be done at
+ * runtime.
+ * the HZ range specific helpers _usecs_to_jiffies() are called both
+ * directly here and from __msecs_to_jiffies() in the case where
+ * constant folding is not possible.
+ */
+static inline unsigned long usecs_to_jiffies(const unsigned int u)
+{
+ if (__builtin_constant_p(u)) {
+ if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+ return _usecs_to_jiffies(u);
+ } else {
+ return __usecs_to_jiffies(u);
+ }
+}
+
extern unsigned long timespec_to_jiffies(const struct timespec *value);
extern void jiffies_to_timespec(const unsigned long jiffies,
struct timespec *value);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a204d5266f5f..1b82d44b0a02 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -562,8 +562,12 @@ struct perf_cpu_context {
struct perf_event_context *task_ctx;
int active_oncpu;
int exclusive;
+
+ raw_spinlock_t hrtimer_lock;
struct hrtimer hrtimer;
ktime_t hrtimer_interval;
+ unsigned int hrtimer_active;
+
struct pmu *unique_pmu;
struct perf_cgroup *cgrp;
};
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03a899aabd17..33a056bb886f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -44,6 +44,8 @@
#include <linux/debugobjects.h>
#include <linux/bug.h>
#include <linux/compiler.h>
+#include <linux/ktime.h>
+
#include <asm/barrier.h>
extern int rcu_expedited; /* for sysctl */
@@ -1100,9 +1102,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
#ifdef CONFIG_TINY_RCU
-static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
+static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
- *delta_jiffies = ULONG_MAX;
+ *nextevt = KTIME_MAX;
return 0;
}
#endif /* #ifdef CONFIG_TINY_RCU */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3fa4a43ab415..456879143f89 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -31,7 +31,7 @@
#define __LINUX_RCUTREE_H
void rcu_note_context_switch(void);
-int rcu_needs_cpu(unsigned long *delta_jiffies);
+int rcu_needs_cpu(u64 basem, u64 *nextevt);
void rcu_cpu_stall_reset(void);
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d4193d5613cf..30364cb58b1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -345,14 +345,10 @@ extern int runqueue_is_locked(int cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_balance_enter_idle(int cpu);
extern void set_cpu_sd_state_idle(void);
-extern int get_nohz_timer_target(int pinned);
+extern int get_nohz_timer_target(void);
#else
static inline void nohz_balance_enter_idle(int cpu) { }
static inline void set_cpu_sd_state_idle(void) { }
-static inline int get_nohz_timer_target(int pinned)
-{
- return smp_processor_id();
-}
#endif
/*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 596a0e007c62..c9e4731cf10b 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
loff_t *ppos);
#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
- return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
- return 1;
-}
-#endif
/*
* control realtime throttling:
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5f68d0a391ce..486e685a226a 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -233,6 +233,47 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
s->sequence++;
}
+/**
+ * raw_write_seqcount_barrier - do a seq write barrier
+ * @s: pointer to seqcount_t
+ *
+ * This can be used to provide an ordering guarantee instead of the
+ * usual consistency guarantee. It is one wmb cheaper, because we can
+ * collapse the two back-to-back wmb()s.
+ *
+ * seqcount_t seq;
+ * bool X = true, Y = false;
+ *
+ * void read(void)
+ * {
+ * bool x, y;
+ *
+ * do {
+ * int s = read_seqcount_begin(&seq);
+ *
+ * x = X; y = Y;
+ *
+ * } while (read_seqcount_retry(&seq, s));
+ *
+ * BUG_ON(!x && !y);
+ * }
+ *
+ * void write(void)
+ * {
+ * Y = true;
+ *
+ * raw_write_seqcount_barrier(seq);
+ *
+ * X = false;
+ * }
+ */
+static inline void raw_write_seqcount_barrier(seqcount_t *s)
+{
+ s->sequence++;
+ smp_wmb();
+ s->sequence++;
+}
+
/*
* raw_write_seqcount_latch - redirect readers to even/odd copy
* @s: pointer to seqcount_t
@@ -266,13 +307,13 @@ static inline void write_seqcount_end(seqcount_t *s)
}
/**
- * write_seqcount_barrier - invalidate in-progress read-side seq operations
+ * write_seqcount_invalidate - invalidate in-progress read-side seq operations
* @s: pointer to seqcount_t
*
- * After write_seqcount_barrier, no read-side seq operations will complete
+ * After write_seqcount_invalidate, no read-side seq operations will complete
* successfully and see data older than this.
*/
-static inline void write_seqcount_barrier(seqcount_t *s)
+static inline void write_seqcount_invalidate(seqcount_t *s)
{
smp_wmb();
s->sequence+=2;
diff --git a/include/linux/time64.h b/include/linux/time64.h
index a3831478d9cf..77b5df2acd2a 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -2,6 +2,7 @@
#define _LINUX_TIME64_H
#include <uapi/linux/time.h>
+#include <linux/math64.h>
typedef __s64 time64_t;
@@ -28,6 +29,7 @@ struct timespec64 {
#define FSEC_PER_SEC 1000000000000000LL
/* Located here for timespec[64]_valid_strict */
+#define TIME64_MAX ((s64)~((u64)1 << 63))
#define KTIME_MAX ((s64)~((u64)1 << 63))
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index fb86963859c7..25247220b4b7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -49,6 +49,8 @@ struct tk_read_base {
* @offs_boot: Offset clock monotonic -> clock boottime
* @offs_tai: Offset clock monotonic -> clock tai
* @tai_offset: The current UTC to TAI offset in seconds
+ * @clock_was_set_seq: The sequence number of clock was set events
+ * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
* @raw_time: Monotonic raw base time in timespec64 format
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -60,6 +62,9 @@ struct tk_read_base {
* shifted nano seconds.
* @ntp_error_shift: Shift conversion between clock shifted nano seconds and
* ntp shifted nano seconds.
+ * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
*
* Note: For timespec(64) based interfaces wall_to_monotonic is what
* we need to add to xtime (or xtime corrected for sub jiffie times)
@@ -85,6 +90,8 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
+ unsigned int clock_was_set_seq;
+ ktime_t next_leap_ktime;
struct timespec64 raw_time;
/* The following members are for timekeeping internal use */
@@ -104,6 +111,18 @@ struct timekeeper {
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+ long last_warning;
+ /*
+ * These simple flag variables are managed
+ * without locks, which is racy, but they are
+ * ok since we don't really care about being
+ * super precise about how many events were
+ * seen, just that a problem was observed.
+ */
+ int underflow_seen;
+ int overflow_seen;
+#endif
};
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 99176af216af..3aa72e648650 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -163,6 +163,7 @@ extern ktime_t ktime_get(void);
extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
extern ktime_t ktime_get_raw(void);
+extern u32 ktime_get_resolution_ns(void);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
@@ -266,7 +267,6 @@ extern int persistent_clock_is_local;
extern void read_persistent_clock(struct timespec *ts);
extern void read_persistent_clock64(struct timespec64 *ts);
-extern void read_boot_clock(struct timespec *ts);
extern void read_boot_clock64(struct timespec64 *ts);
extern int update_persistent_clock(struct timespec now);
extern int update_persistent_clock64(struct timespec64 now);
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 8c5a197e1587..61aa61dc410c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -14,27 +14,23 @@ struct timer_list {
* All fields that change during normal runtime grouped to the
* same cacheline
*/
- struct list_head entry;
- unsigned long expires;
- struct tvec_base *base;
-
- void (*function)(unsigned long);
- unsigned long data;
-
- int slack;
+ struct hlist_node entry;
+ unsigned long expires;
+ void (*function)(unsigned long);
+ unsigned long data;
+ u32 flags;
+ int slack;
#ifdef CONFIG_TIMER_STATS
- int start_pid;
- void *start_site;
- char start_comm[16];
+ int start_pid;
+ void *start_site;
+ char start_comm[16];
#endif
#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
+ struct lockdep_map lockdep_map;
#endif
};
-extern struct tvec_base boot_tvec_bases;
-
#ifdef CONFIG_LOCKDEP
/*
* NB: because we have to copy the lockdep_map, setting the lockdep_map key
@@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases;
#endif
/*
- * Note that all tvec_bases are at least 4 byte aligned and lower two bits
- * of base in timer_list is guaranteed to be zero. Use them for flags.
- *
* A deferrable timer will work normally when the system is busy, but
* will not cause a CPU to come out of idle just to service it; instead,
* the timer will be serviced when the CPU eventually wakes up with a
@@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases;
* workqueue locking issues. It's not meant for executing random crap
* with interrupts disabled. Abuse is monitored!
*/
-#define TIMER_DEFERRABLE 0x1LU
-#define TIMER_IRQSAFE 0x2LU
-
-#define TIMER_FLAG_MASK 0x3LU
+#define TIMER_CPUMASK 0x0007FFFF
+#define TIMER_MIGRATING 0x00080000
+#define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING)
+#define TIMER_DEFERRABLE 0x00100000
+#define TIMER_IRQSAFE 0x00200000
#define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
- .entry = { .prev = TIMER_ENTRY_STATIC }, \
+ .entry = { .next = TIMER_ENTRY_STATIC }, \
.function = (_function), \
.expires = (_expires), \
.data = (_data), \
- .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \
+ .flags = (_flags), \
.slack = -1, \
__TIMER_LOCKDEP_MAP_INITIALIZER( \
__FILE__ ":" __stringify(__LINE__)) \
@@ -168,7 +162,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
*/
static inline int timer_pending(const struct timer_list * timer)
{
- return timer->entry.next != NULL;
+ return timer->entry.pprev != NULL;
}
extern void add_timer_on(struct timer_list *timer, int cpu);
@@ -188,26 +182,16 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz);
#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1)
/*
- * Return when the next timer-wheel timeout occurs (in absolute jiffies),
- * locks the timer base and does the comparison against the given
- * jiffie.
- */
-extern unsigned long get_next_timer_interrupt(unsigned long now);
-
-/*
* Timer-statistics info:
*/
#ifdef CONFIG_TIMER_STATS
extern int timer_stats_active;
-#define TIMER_STATS_FLAG_DEFERRABLE 0x1
-
extern void init_timer_stats(void);
extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
- void *timerf, char *comm,
- unsigned int timer_flag);
+ void *timerf, char *comm, u32 flags);
extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
void *addr);
@@ -254,6 +238,15 @@ extern void run_local_timers(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+#include <linux/sysctl.h>
+
+extern unsigned int sysctl_timer_migration;
+int timer_migration_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+#endif
+
unsigned long __round_jiffies(unsigned long j, int cpu);
unsigned long __round_jiffies_relative(unsigned long j, int cpu);
unsigned long round_jiffies(unsigned long j);
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index a520fd70a59f..7eec17ad7fa1 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -16,10 +16,10 @@ struct timerqueue_head {
};
-extern void timerqueue_add(struct timerqueue_head *head,
- struct timerqueue_node *node);
-extern void timerqueue_del(struct timerqueue_head *head,
- struct timerqueue_node *node);
+extern bool timerqueue_add(struct timerqueue_head *head,
+ struct timerqueue_node *node);
+extern bool timerqueue_del(struct timerqueue_head *head,
+ struct timerqueue_node *node);
extern struct timerqueue_node *timerqueue_iterate_next(
struct timerqueue_node *node);
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 68c2c2000f02..073b9ac245ba 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -43,15 +43,18 @@ DEFINE_EVENT(timer_class, timer_init,
*/
TRACE_EVENT(timer_start,
- TP_PROTO(struct timer_list *timer, unsigned long expires),
+ TP_PROTO(struct timer_list *timer,
+ unsigned long expires,
+ unsigned int flags),
- TP_ARGS(timer, expires),
+ TP_ARGS(timer, expires, flags),
TP_STRUCT__entry(
__field( void *, timer )
__field( void *, function )
__field( unsigned long, expires )
__field( unsigned long, now )
+ __field( unsigned int, flags )
),
TP_fast_assign(
@@ -59,11 +62,12 @@ TRACE_EVENT(timer_start,
__entry->function = timer->function;
__entry->expires = expires;
__entry->now = jiffies;
+ __entry->flags = flags;
),
- TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
+ TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x",
__entry->timer, __entry->function, __entry->expires,
- (long)__entry->expires - __entry->now)
+ (long)__entry->expires - __entry->now, __entry->flags)
);
/**
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f2003b97ddc9..8e13f3e54ec3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -51,9 +51,11 @@
static struct workqueue_struct *perf_wq;
+typedef int (*remote_function_f)(void *);
+
struct remote_function_call {
struct task_struct *p;
- int (*func)(void *info);
+ remote_function_f func;
void *info;
int ret;
};
@@ -86,7 +88,7 @@ static void remote_function(void *data)
* -EAGAIN - when the process moved away
*/
static int
-task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
+task_function_call(struct task_struct *p, remote_function_f func, void *info)
{
struct remote_function_call data = {
.p = p,
@@ -110,7 +112,7 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
*
* returns: @func return value or -ENXIO when the cpu is offline
*/
-static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+static int cpu_function_call(int cpu, remote_function_f func, void *info)
{
struct remote_function_call data = {
.p = NULL,
@@ -747,62 +749,31 @@ perf_cgroup_mark_enabled(struct perf_event *event,
/*
* function must be called with interrupts disbled
*/
-static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
+static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
{
struct perf_cpu_context *cpuctx;
- enum hrtimer_restart ret = HRTIMER_NORESTART;
int rotations = 0;
WARN_ON(!irqs_disabled());
cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
-
rotations = perf_rotate_context(cpuctx);
- /*
- * arm timer if needed
- */
- if (rotations) {
+ raw_spin_lock(&cpuctx->hrtimer_lock);
+ if (rotations)
hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
- ret = HRTIMER_RESTART;
- }
-
- return ret;
-}
-
-/* CPU is going down */
-void perf_cpu_hrtimer_cancel(int cpu)
-{
- struct perf_cpu_context *cpuctx;
- struct pmu *pmu;
- unsigned long flags;
-
- if (WARN_ON(cpu != smp_processor_id()))
- return;
-
- local_irq_save(flags);
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
- if (pmu->task_ctx_nr == perf_sw_context)
- continue;
-
- hrtimer_cancel(&cpuctx->hrtimer);
- }
-
- rcu_read_unlock();
+ else
+ cpuctx->hrtimer_active = 0;
+ raw_spin_unlock(&cpuctx->hrtimer_lock);
- local_irq_restore(flags);
+ return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
}
-static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
+static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
{
- struct hrtimer *hr = &cpuctx->hrtimer;
+ struct hrtimer *timer = &cpuctx->hrtimer;
struct pmu *pmu = cpuctx->ctx.pmu;
- int timer;
+ u64 interval;
/* no multiplexing needed for SW PMU */
if (pmu->task_ctx_nr == perf_sw_context)
@@ -812,31 +783,36 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
* check default is sane, if not set then force to
* default interval (1/tick)
*/
- timer = pmu->hrtimer_interval_ms;
- if (timer < 1)
- timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
+ interval = pmu->hrtimer_interval_ms;
+ if (interval < 1)
+ interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
- cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
+ cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
- hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- hr->function = perf_cpu_hrtimer_handler;
+ raw_spin_lock_init(&cpuctx->hrtimer_lock);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ timer->function = perf_mux_hrtimer_handler;
}
-static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
+static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
{
- struct hrtimer *hr = &cpuctx->hrtimer;
+ struct hrtimer *timer = &cpuctx->hrtimer;
struct pmu *pmu = cpuctx->ctx.pmu;
+ unsigned long flags;
/* not for SW PMU */
if (pmu->task_ctx_nr == perf_sw_context)
- return;
+ return 0;
- if (hrtimer_active(hr))
- return;
+ raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
+ if (!cpuctx->hrtimer_active) {
+ cpuctx->hrtimer_active = 1;
+ hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ }
+ raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
- if (!hrtimer_callback_running(hr))
- __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
- 0, HRTIMER_MODE_REL_PINNED, 0);
+ return 0;
}
void perf_pmu_disable(struct pmu *pmu)
@@ -1935,7 +1911,7 @@ group_sched_in(struct perf_event *group_event,
if (event_sched_in(group_event, cpuctx, ctx)) {
pmu->cancel_txn(pmu);
- perf_cpu_hrtimer_restart(cpuctx);
+ perf_mux_hrtimer_restart(cpuctx);
return -EAGAIN;
}
@@ -1982,7 +1958,7 @@ group_error:
pmu->cancel_txn(pmu);
- perf_cpu_hrtimer_restart(cpuctx);
+ perf_mux_hrtimer_restart(cpuctx);
return -EAGAIN;
}
@@ -2255,7 +2231,7 @@ static int __perf_event_enable(void *info)
*/
if (leader != event) {
group_sched_out(leader, cpuctx, ctx);
- perf_cpu_hrtimer_restart(cpuctx);
+ perf_mux_hrtimer_restart(cpuctx);
}
if (leader->attr.pinned) {
update_group_times(leader);
@@ -6897,9 +6873,8 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
} else {
period = max_t(u64, 10000, hwc->sample_period);
}
- __hrtimer_start_range_ns(&hwc->hrtimer,
- ns_to_ktime(period), 0,
- HRTIMER_MODE_REL_PINNED, 0);
+ hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
+ HRTIMER_MODE_REL_PINNED);
}
static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -7200,6 +7175,8 @@ perf_event_mux_interval_ms_show(struct device *dev,
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
}
+static DEFINE_MUTEX(mux_interval_mutex);
+
static ssize_t
perf_event_mux_interval_ms_store(struct device *dev,
struct device_attribute *attr,
@@ -7219,17 +7196,21 @@ perf_event_mux_interval_ms_store(struct device *dev,
if (timer == pmu->hrtimer_interval_ms)
return count;
+ mutex_lock(&mux_interval_mutex);
pmu->hrtimer_interval_ms = timer;
/* update all cpuctx for this PMU */
- for_each_possible_cpu(cpu) {
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
- if (hrtimer_active(&cpuctx->hrtimer))
- hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
+ cpu_function_call(cpu,
+ (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
+ put_online_cpus();
+ mutex_unlock(&mux_interval_mutex);
return count;
}
@@ -7334,7 +7315,7 @@ skip_type:
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
cpuctx->ctx.pmu = pmu;
- __perf_cpu_hrtimer_init(cpuctx, cpu);
+ __perf_mux_hrtimer_init(cpuctx, cpu);
cpuctx->unique_pmu = pmu;
}
diff --git a/kernel/futex.c b/kernel/futex.c
index aacc706f85fc..ea6ca0bca525 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2064,11 +2064,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
queue_me(q, hb);
/* Arm the timer */
- if (timeout) {
+ if (timeout)
hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
- if (!hrtimer_active(&timeout->timer))
- timeout->task = NULL;
- }
/*
* If we have been removed from the hash list, then another task
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 30ec5b46cd8c..36573e96a477 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1182,11 +1182,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
set_current_state(state);
/* Setup the timer, when timeout != NULL */
- if (unlikely(timeout)) {
+ if (unlikely(timeout))
hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
- if (!hrtimer_active(&timeout->timer))
- timeout->task = NULL;
- }
ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 32664347091a..013485fb2b06 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1375,9 +1375,9 @@ static void rcu_prepare_kthreads(int cpu)
* Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
* any flavor of RCU.
*/
-int rcu_needs_cpu(unsigned long *delta_jiffies)
+int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
- *delta_jiffies = ULONG_MAX;
+ *nextevt = KTIME_MAX;
return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
? 0 : rcu_cpu_has_callbacks(NULL);
}
@@ -1439,8 +1439,6 @@ module_param(rcu_idle_gp_delay, int, 0644);
static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
module_param(rcu_idle_lazy_gp_delay, int, 0644);
-extern int tick_nohz_active;
-
/*
* Try to advance callbacks for all flavors of RCU on the current CPU, but
* only if it has been awhile since the last time we did so. Afterwards,
@@ -1487,12 +1485,13 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
*
* The caller must have disabled interrupts.
*/
-int rcu_needs_cpu(unsigned long *dj)
+int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+ unsigned long dj;
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
- *dj = ULONG_MAX;
+ *nextevt = KTIME_MAX;
return 0;
}
@@ -1501,7 +1500,7 @@ int rcu_needs_cpu(unsigned long *dj)
/* If no callbacks, RCU doesn't need the CPU. */
if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {
- *dj = ULONG_MAX;
+ *nextevt = KTIME_MAX;
return 0;
}
@@ -1515,11 +1514,12 @@ int rcu_needs_cpu(unsigned long *dj)
/* Request timer delay depending on laziness, and round. */
if (!rdtp->all_lazy) {
- *dj = round_up(rcu_idle_gp_delay + jiffies,
+ dj = round_up(rcu_idle_gp_delay + jiffies,
rcu_idle_gp_delay) - jiffies;
} else {
- *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
+ dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
}
+ *nextevt = basemono + dj * TICK_NSEC;
return 0;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f89ca9bcf42a..c9a707b59331 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,26 +90,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
-void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
-{
- unsigned long delta;
- ktime_t soft, hard, now;
-
- for (;;) {
- if (hrtimer_active(period_timer))
- break;
-
- now = hrtimer_cb_get_time(period_timer);
- hrtimer_forward(period_timer, now, period);
-
- soft = hrtimer_get_softexpires(period_timer);
- hard = hrtimer_get_expires(period_timer);
- delta = ktime_to_ns(ktime_sub(hard, soft));
- __hrtimer_start_range_ns(period_timer, soft, delta,
- HRTIMER_MODE_ABS_PINNED, 0);
- }
-}
-
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -355,12 +335,11 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
#ifdef CONFIG_SMP
-static int __hrtick_restart(struct rq *rq)
+static void __hrtick_restart(struct rq *rq)
{
struct hrtimer *timer = &rq->hrtick_timer;
- ktime_t time = hrtimer_get_softexpires(timer);
- return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
}
/*
@@ -440,8 +419,8 @@ void hrtick_start(struct rq *rq, u64 delay)
* doesn't make sense. Rely on vruntime for fairness.
*/
delay = max_t(u64, delay, 10000LL);
- __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
- HRTIMER_MODE_REL_PINNED, 0);
+ hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
+ HRTIMER_MODE_REL_PINNED);
}
static inline void init_hrtick(void)
@@ -639,13 +618,12 @@ void resched_cpu(int cpu)
* selecting an idle cpu will add more delays to the timers than intended
* (as that cpu's timer base may not be uptodate wrt jiffies etc).
*/
-int get_nohz_timer_target(int pinned)
+int get_nohz_timer_target(void)
{
- int cpu = smp_processor_id();
- int i;
+ int i, cpu = smp_processor_id();
struct sched_domain *sd;
- if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
+ if (!idle_cpu(cpu))
return cpu;
rcu_read_lock();
@@ -7126,8 +7104,6 @@ void __init sched_init_smp(void)
}
#endif /* CONFIG_SMP */
-const_debug unsigned int sysctl_timer_migration = 1;
-
int in_sched_functions(unsigned long addr)
{
return in_lock_functions(addr) ||
@@ -8163,10 +8139,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
__refill_cfs_bandwidth_runtime(cfs_b);
/* restart the period timer (if active) to handle new period expiry */
- if (runtime_enabled && cfs_b->timer_active) {
- /* force a reprogram */
- __start_cfs_bandwidth(cfs_b, true);
- }
+ if (runtime_enabled)
+ start_cfs_bandwidth(cfs_b);
raw_spin_unlock_irq(&cfs_b->lock);
for_each_online_cpu(i) {
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 392e8fb94db3..eac20c557a55 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -503,8 +503,6 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
ktime_t now, act;
- ktime_t soft, hard;
- unsigned long range;
s64 delta;
if (boosted)
@@ -527,15 +525,9 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
if (ktime_us_delta(act, now) < 0)
return 0;
- hrtimer_set_expires(&dl_se->dl_timer, act);
+ hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
- soft = hrtimer_get_softexpires(&dl_se->dl_timer);
- hard = hrtimer_get_expires(&dl_se->dl_timer);
- range = ktime_to_ns(ktime_sub(hard, soft));
- __hrtimer_start_range_ns(&dl_se->dl_timer, soft,
- range, HRTIMER_MODE_ABS, 0);
-
- return hrtimer_active(&dl_se->dl_timer);
+ return 1;
}
/*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 704683cc9042..315c68e015d9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -232,8 +232,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif
#endif
#ifdef CONFIG_CFS_BANDWIDTH
- SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",
- cfs_rq->tg->cfs_bandwidth.timer_active);
SEQ_printf(m, " .%-30s: %d\n", "throttled",
cfs_rq->throttled);
SEQ_printf(m, " .%-30s: %d\n", "throttle_count",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 433061d984ea..40a7fcbf491e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3504,16 +3504,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
if (cfs_b->quota == RUNTIME_INF)
amount = min_amount;
else {
- /*
- * If the bandwidth pool has become inactive, then at least one
- * period must have elapsed since the last consumption.
- * Refresh the global state and ensure bandwidth timer becomes
- * active.
- */
- if (!cfs_b->timer_active) {
- __refill_cfs_bandwidth_runtime(cfs_b);
- __start_cfs_bandwidth(cfs_b, false);
- }
+ start_cfs_bandwidth(cfs_b);
if (cfs_b->runtime > 0) {
amount = min(cfs_b->runtime, min_amount);
@@ -3662,6 +3653,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se;
long task_delta, dequeue = 1;
+ bool empty;
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
@@ -3691,13 +3683,21 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->throttled = 1;
cfs_rq->throttled_clock = rq_clock(rq);
raw_spin_lock(&cfs_b->lock);
+ empty = list_empty(&cfs_rq->throttled_list);
+
/*
* Add to the _head_ of the list, so that an already-started
* distribute_cfs_runtime will not see us
*/
list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
- if (!cfs_b->timer_active)
- __start_cfs_bandwidth(cfs_b, false);
+
+ /*
+ * If we're the first throttled task, make sure the bandwidth
+ * timer is running.
+ */
+ if (empty)
+ start_cfs_bandwidth(cfs_b);
+
raw_spin_unlock(&cfs_b->lock);
}
@@ -3812,13 +3812,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
if (cfs_b->idle && !throttled)
goto out_deactivate;
- /*
- * if we have relooped after returning idle once, we need to update our
- * status as actually running, so that other cpus doing
- * __start_cfs_bandwidth will stop trying to cancel us.
- */
- cfs_b->timer_active = 1;
-
__refill_cfs_bandwidth_runtime(cfs_b);
if (!throttled) {
@@ -3863,7 +3856,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
return 0;
out_deactivate:
- cfs_b->timer_active = 0;
return 1;
}
@@ -3878,7 +3870,7 @@ static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC;
* Are we near the end of the current quota period?
*
* Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the
- * hrtimer base being cleared by __hrtimer_start_range_ns. In the case of
+ * hrtimer base being cleared by hrtimer_start. In the case of
* migrate_hrtimers, base is never cleared, so we are fine.
*/
static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire)
@@ -3906,8 +3898,9 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
if (runtime_refresh_within(cfs_b, min_left))
return;
- start_bandwidth_timer(&cfs_b->slack_timer,
- ns_to_ktime(cfs_bandwidth_slack_period));
+ hrtimer_start(&cfs_b->slack_timer,
+ ns_to_ktime(cfs_bandwidth_slack_period),
+ HRTIMER_MODE_REL);
}
/* we know any runtime found here is valid as update_curr() precedes return */
@@ -4027,6 +4020,7 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
container_of(timer, struct cfs_bandwidth, slack_timer);
+
do_sched_cfs_slack_timer(cfs_b);
return HRTIMER_NORESTART;
@@ -4036,20 +4030,19 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
container_of(timer, struct cfs_bandwidth, period_timer);
- ktime_t now;
int overrun;
int idle = 0;
raw_spin_lock(&cfs_b->lock);
for (;;) {
- now = hrtimer_cb_get_time(timer);
- overrun = hrtimer_forward(timer, now, cfs_b->period);
-
+ overrun = hrtimer_forward_now(timer, cfs_b->period);
if (!overrun)
break;
idle = do_sched_cfs_period_timer(cfs_b, overrun);
}
+ if (idle)
+ cfs_b->period_active = 0;
raw_spin_unlock(&cfs_b->lock);
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -4063,7 +4056,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period = ns_to_ktime(default_cfs_period());
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
cfs_b->period_timer.function = sched_cfs_period_timer;
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
@@ -4075,28 +4068,15 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
INIT_LIST_HEAD(&cfs_rq->throttled_list);
}
-/* requires cfs_b->lock, may release to reprogram timer */
-void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force)
+void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
- /*
- * The timer may be active because we're trying to set a new bandwidth
- * period or because we're racing with the tear-down path
- * (timer_active==0 becomes visible before the hrtimer call-back
- * terminates). In either case we ensure that it's re-programmed
- */
- while (unlikely(hrtimer_active(&cfs_b->period_timer)) &&
- hrtimer_try_to_cancel(&cfs_b->period_timer) < 0) {
- /* bounce the lock to allow do_sched_cfs_period_timer to run */
- raw_spin_unlock(&cfs_b->lock);
- cpu_relax();
- raw_spin_lock(&cfs_b->lock);
- /* if someone else restarted the timer then we're done */
- if (!force && cfs_b->timer_active)
- return;
- }
+ lockdep_assert_held(&cfs_b->lock);
- cfs_b->timer_active = 1;
- start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+ if (!cfs_b->period_active) {
+ cfs_b->period_active = 1;
+ hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+ hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
+ }
}
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 560d2fa623c3..7d7093c51f8d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -18,19 +18,22 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
{
struct rt_bandwidth *rt_b =
container_of(timer, struct rt_bandwidth, rt_period_timer);
- ktime_t now;
- int overrun;
int idle = 0;
+ int overrun;
+ raw_spin_lock(&rt_b->rt_runtime_lock);
for (;;) {
- now = hrtimer_cb_get_time(timer);
- overrun = hrtimer_forward(timer, now, rt_b->rt_period);
-
+ overrun = hrtimer_forward_now(timer, rt_b->rt_period);
if (!overrun)
break;
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
idle = do_sched_rt_period_timer(rt_b, overrun);
+ raw_spin_lock(&rt_b->rt_runtime_lock);
}
+ if (idle)
+ rt_b->rt_period_active = 0;
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
}
@@ -52,11 +55,12 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return;
- if (hrtimer_active(&rt_b->rt_period_timer))
- return;
-
raw_spin_lock(&rt_b->rt_runtime_lock);
- start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+ if (!rt_b->rt_period_active) {
+ rt_b->rt_period_active = 1;
+ hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+ hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+ }
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d62b2882232b..aea7c1f393cb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -137,6 +137,7 @@ struct rt_bandwidth {
ktime_t rt_period;
u64 rt_runtime;
struct hrtimer rt_period_timer;
+ unsigned int rt_period_active;
};
void __dl_clear_params(struct task_struct *p);
@@ -221,7 +222,7 @@ struct cfs_bandwidth {
s64 hierarchical_quota;
u64 runtime_expires;
- int idle, timer_active;
+ int idle, period_active;
struct hrtimer period_timer, slack_timer;
struct list_head throttled_cfs_rq;
@@ -312,7 +313,7 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
-extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force);
+extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
extern void free_rt_sched_group(struct task_group *tg);
@@ -1409,8 +1410,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
static inline void sched_avg_update(struct rq *rq) { }
#endif
-extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
-
/*
* __task_rq_lock - lock the rq @p resides on.
*/
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2082b1a88fb9..b13e9d2de302 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -349,15 +349,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "timer_migration",
- .data = &sysctl_timer_migration,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
#endif /* CONFIG_SMP */
#ifdef CONFIG_NUMA_BALANCING
{
@@ -1132,6 +1123,15 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+ {
+ .procname = "timer_migration",
+ .data = &sysctl_timer_migration,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = timer_migration_handler,
+ },
+#endif
{ }
};
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 01f0312419b3..ffc4cc3dcd47 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -13,19 +13,4 @@ obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
-$(obj)/time.o: $(obj)/timeconst.h
-
-quiet_cmd_hzfile = HZFILE $@
- cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
-
-targets += hz.bc
-$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
- $(call if_changed,hzfile)
-
-quiet_cmd_bc = BC $@
- cmd_bc = bc -q $(filter-out FORCE,$^) > $@
-
-targets += timeconst.h
-$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
- $(call if_changed,bc)
-
+$(obj)/time.o: $(objtree)/include/config/
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 1b001ed1edb9..7fbba635a549 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -317,19 +317,16 @@ EXPORT_SYMBOL_GPL(alarm_init);
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
-int alarm_start(struct alarm *alarm, ktime_t start)
+void alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
- int ret;
spin_lock_irqsave(&base->lock, flags);
alarm->node.expires = start;
alarmtimer_enqueue(base, alarm);
- ret = hrtimer_start(&alarm->timer, alarm->node.expires,
- HRTIMER_MODE_ABS);
+ hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
spin_unlock_irqrestore(&base->lock, flags);
- return ret;
}
EXPORT_SYMBOL_GPL(alarm_start);
@@ -338,12 +335,12 @@ EXPORT_SYMBOL_GPL(alarm_start);
* @alarm: ptr to alarm to set
* @start: time relative to now to run the alarm
*/
-int alarm_start_relative(struct alarm *alarm, ktime_t start)
+void alarm_start_relative(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
start = ktime_add(start, base->gettime());
- return alarm_start(alarm, start);
+ alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -495,12 +492,12 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
*/
static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
- clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
-
if (!alarmtimer_get_rtcdev())
return -EINVAL;
- return hrtimer_get_res(baseid, tp);
+ tp->tv_sec = 0;
+ tp->tv_nsec = hrtimer_resolution;
+ return 0;
}
/**
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 637a09461c1d..08ccc3da3ca0 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,8 +94,8 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
-static int __clockevents_set_state(struct clock_event_device *dev,
- enum clock_event_state state)
+static int __clockevents_switch_state(struct clock_event_device *dev,
+ enum clock_event_state state)
{
/* Transition with legacy set_mode() callback */
if (dev->set_mode) {
@@ -134,32 +134,44 @@ static int __clockevents_set_state(struct clock_event_device *dev,
return -ENOSYS;
return dev->set_state_oneshot(dev);
+ case CLOCK_EVT_STATE_ONESHOT_STOPPED:
+ /* Core internal bug */
+ if (WARN_ONCE(!clockevent_state_oneshot(dev),
+ "Current state: %d\n",
+ clockevent_get_state(dev)))
+ return -EINVAL;
+
+ if (dev->set_state_oneshot_stopped)
+ return dev->set_state_oneshot_stopped(dev);
+ else
+ return -ENOSYS;
+
default:
return -ENOSYS;
}
}
/**
- * clockevents_set_state - set the operating state of a clock event device
+ * clockevents_switch_state - set the operating state of a clock event device
* @dev: device to modify
* @state: new state
*
* Must be called with interrupts disabled !
*/
-void clockevents_set_state(struct clock_event_device *dev,
- enum clock_event_state state)
+void clockevents_switch_state(struct clock_event_device *dev,
+ enum clock_event_state state)
{
- if (dev->state != state) {
- if (__clockevents_set_state(dev, state))
+ if (clockevent_get_state(dev) != state) {
+ if (__clockevents_switch_state(dev, state))
return;
- dev->state = state;
+ clockevent_set_state(dev, state);
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:
*/
- if (state == CLOCK_EVT_STATE_ONESHOT) {
+ if (clockevent_state_oneshot(dev)) {
if (unlikely(!dev->mult)) {
dev->mult = 1;
WARN_ON(1);
@@ -174,7 +186,7 @@ void clockevents_set_state(struct clock_event_device *dev,
*/
void clockevents_shutdown(struct clock_event_device *dev)
{
- clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
dev->next_event.tv64 = KTIME_MAX;
}
@@ -248,7 +260,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta);
- if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
+ if (clockevent_state_shutdown(dev))
return 0;
dev->retries++;
@@ -285,7 +297,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta);
- if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
+ if (clockevent_state_shutdown(dev))
return 0;
dev->retries++;
@@ -317,9 +329,13 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
dev->next_event = expires;
- if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
+ if (clockevent_state_shutdown(dev))
return 0;
+ /* We must be in ONESHOT state here */
+ WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",
+ clockevent_get_state(dev));
+
/* Shortcut for clockevent devices that can deal with ktime. */
if (dev->features & CLOCK_EVT_FEAT_KTIME)
return dev->set_next_ktime(expires, dev);
@@ -362,7 +378,7 @@ static int clockevents_replace(struct clock_event_device *ced)
struct clock_event_device *dev, *newdev = NULL;
list_for_each_entry(dev, &clockevent_devices, list) {
- if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
+ if (dev == ced || !clockevent_state_detached(dev))
continue;
if (!tick_check_replacement(newdev, dev))
@@ -388,7 +404,7 @@ static int clockevents_replace(struct clock_event_device *ced)
static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
{
/* Fast track. Device is unused */
- if (ced->state == CLOCK_EVT_STATE_DETACHED) {
+ if (clockevent_state_detached(ced)) {
list_del_init(&ced->list);
return 0;
}
@@ -445,7 +461,8 @@ static int clockevents_sanity_check(struct clock_event_device *dev)
if (dev->set_mode) {
/* We shouldn't be supporting new modes now */
WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
- dev->set_state_shutdown || dev->tick_resume);
+ dev->set_state_shutdown || dev->tick_resume ||
+ dev->set_state_oneshot_stopped);
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
return 0;
@@ -480,7 +497,7 @@ void clockevents_register_device(struct clock_event_device *dev)
BUG_ON(clockevents_sanity_check(dev));
/* Initialize state to DETACHED */
- dev->state = CLOCK_EVT_STATE_DETACHED;
+ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
@@ -545,11 +562,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
clockevents_config(dev, freq);
- if (dev->state == CLOCK_EVT_STATE_ONESHOT)
+ if (clockevent_state_oneshot(dev))
return clockevents_program_event(dev, dev->next_event, false);
- if (dev->state == CLOCK_EVT_STATE_PERIODIC)
- return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
+ if (clockevent_state_periodic(dev))
+ return __clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
return 0;
}
@@ -603,13 +620,13 @@ void clockevents_exchange_device(struct clock_event_device *old,
*/
if (old) {
module_put(old->owner);
- clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
+ clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);
}
if (new) {
- BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
+ BUG_ON(!clockevent_state_detached(new));
clockevents_shutdown(new);
}
}
@@ -622,7 +639,7 @@ void clockevents_suspend(void)
struct clock_event_device *dev;
list_for_each_entry_reverse(dev, &clockevent_devices, list)
- if (dev->suspend)
+ if (dev->suspend && !clockevent_state_detached(dev))
dev->suspend(dev);
}
@@ -634,7 +651,7 @@ void clockevents_resume(void)
struct clock_event_device *dev;
list_for_each_entry(dev, &clockevent_devices, list)
- if (dev->resume)
+ if (dev->resume && !clockevent_state_detached(dev))
dev->resume(dev);
}
@@ -665,7 +682,7 @@ void tick_cleanup_dead_cpu(int cpu)
if (cpumask_test_cpu(cpu, dev->cpumask) &&
cpumask_weight(dev->cpumask) == 1 &&
!tick_is_broadcast_device(dev)) {
- BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
+ BUG_ON(!clockevent_state_detached(dev));
list_del(&dev->list);
}
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 15facb1b9c60..841b72f720e8 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -23,6 +23,8 @@
* o Allow clocksource drivers to be unregistered
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/device.h>
#include <linux/clocksource.h>
#include <linux/init.h>
@@ -216,10 +218,11 @@ static void clocksource_watchdog(unsigned long data)
/* Check the deviation from the watchdog clocksource. */
if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
- pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
- pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+ pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
+ cs->name);
+ pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
watchdog->name, wdnow, wdlast, watchdog->mask);
- pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+ pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
cs->name, csnow, cslast, cs->mask);
__clocksource_unstable(cs);
continue;
@@ -567,9 +570,8 @@ static void __clocksource_select(bool skipcur)
*/
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
/* Override clocksource cannot be used. */
- printk(KERN_WARNING "Override clocksource %s is not "
- "HRT compatible. Cannot switch while in "
- "HRT/NOHZ mode\n", cs->name);
+ pr_warn("Override clocksource %s is not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
+ cs->name);
override_name[0] = 0;
} else
/* Override clocksource can be used. */
@@ -708,8 +710,8 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
clocksource_update_max_deferment(cs);
- pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
- cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
+ pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+ cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
}
EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
@@ -1008,12 +1010,10 @@ __setup("clocksource=", boot_override_clocksource);
static int __init boot_override_clock(char* str)
{
if (!strcmp(str, "pmtmr")) {
- printk("Warning: clock=pmtmr is deprecated. "
- "Use clocksource=acpi_pm.\n");
+ pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
return boot_override_clocksource("acpi_pm");
}
- printk("Warning! clock= boot option is deprecated. "
- "Use clocksource=xyz\n");
+ pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
return boot_override_clocksource(str);
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 93ef7190bdea..5c7ae4b641c4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -66,33 +66,29 @@
*/
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
-
.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
+ .seq = SEQCNT_ZERO(hrtimer_bases.seq),
.clock_base =
{
{
.index = HRTIMER_BASE_MONOTONIC,
.clockid = CLOCK_MONOTONIC,
.get_time = &ktime_get,
- .resolution = KTIME_LOW_RES,
},
{
.index = HRTIMER_BASE_REALTIME,
.clockid = CLOCK_REALTIME,
.get_time = &ktime_get_real,
- .resolution = KTIME_LOW_RES,
},
{
.index = HRTIMER_BASE_BOOTTIME,
.clockid = CLOCK_BOOTTIME,
.get_time = &ktime_get_boottime,
- .resolution = KTIME_LOW_RES,
},
{
.index = HRTIMER_BASE_TAI,
.clockid = CLOCK_TAI,
.get_time = &ktime_get_clocktai,
- .resolution = KTIME_LOW_RES,
},
}
};
@@ -109,27 +105,6 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
return hrtimer_clock_to_base_table[clock_id];
}
-
-/*
- * Get the coarse grained time at the softirq based on xtime and
- * wall_to_monotonic.
- */
-static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
-{
- ktime_t xtim, mono, boot, tai;
- ktime_t off_real, off_boot, off_tai;
-
- mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai);
- boot = ktime_add(mono, off_boot);
- xtim = ktime_add(mono, off_real);
- tai = ktime_add(mono, off_tai);
-
- base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
- base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
- base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
- base->clock_base[HRTIMER_BASE_TAI].softirq_time = tai;
-}
-
/*
* Functions and macros which are different for UP/SMP systems are kept in a
* single place
@@ -137,6 +112,18 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
#ifdef CONFIG_SMP
/*
+ * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
+ * such that hrtimer_callback_running() can unconditionally dereference
+ * timer->base->cpu_base
+ */
+static struct hrtimer_cpu_base migration_cpu_base = {
+ .seq = SEQCNT_ZERO(migration_cpu_base),
+ .clock_base = { { .cpu_base = &migration_cpu_base, }, },
+};
+
+#define migration_base migration_cpu_base.clock_base[0]
+
+/*
* We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
* means that all timers which are tied to this base via timer->base are
* locked, and the base itself is locked too.
@@ -145,8 +132,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
* be found on the lists/queues.
*
* When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
+ * possible to set timer->base = &migration_base and drop the lock: the timer
+ * remains locked.
*/
static
struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
@@ -156,7 +143,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
for (;;) {
base = timer->base;
- if (likely(base != NULL)) {
+ if (likely(base != &migration_base)) {
raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
if (likely(base == timer->base))
return base;
@@ -190,6 +177,24 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
#endif
}
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+static inline
+struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
+ int pinned)
+{
+ if (pinned || !base->migration_enabled)
+ return this_cpu_ptr(&hrtimer_bases);
+ return &per_cpu(hrtimer_bases, get_nohz_timer_target());
+}
+#else
+static inline
+struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
+ int pinned)
+{
+ return this_cpu_ptr(&hrtimer_bases);
+}
+#endif
+
/*
* Switch the timer base to the current CPU when possible.
*/
@@ -197,14 +202,13 @@ static inline struct hrtimer_clock_base *
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
int pinned)
{
+ struct hrtimer_cpu_base *new_cpu_base, *this_base;
struct hrtimer_clock_base *new_base;
- struct hrtimer_cpu_base *new_cpu_base;
- int this_cpu = smp_processor_id();
- int cpu = get_nohz_timer_target(pinned);
int basenum = base->index;
+ this_base = this_cpu_ptr(&hrtimer_bases);
+ new_cpu_base = get_target_base(this_base, pinned);
again:
- new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[basenum];
if (base != new_base) {
@@ -220,22 +224,24 @@ again:
if (unlikely(hrtimer_callback_running(timer)))
return base;
- /* See the comment in lock_timer_base() */
- timer->base = NULL;
+ /* See the comment in lock_hrtimer_base() */
+ timer->base = &migration_base;
raw_spin_unlock(&base->cpu_base->lock);
raw_spin_lock(&new_base->cpu_base->lock);
- if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
- cpu = this_cpu;
+ if (new_cpu_base != this_base &&
+ hrtimer_check_target(timer, new_base)) {
raw_spin_unlock(&new_base->cpu_base->lock);
raw_spin_lock(&base->cpu_base->lock);
+ new_cpu_base = this_base;
timer->base = base;
goto again;
}
timer->base = new_base;
} else {
- if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
- cpu = this_cpu;
+ if (new_cpu_base != this_base &&
+ hrtimer_check_target(timer, new_base)) {
+ new_cpu_base = this_base;
goto again;
}
}
@@ -443,24 +449,35 @@ static inline void debug_deactivate(struct hrtimer *timer)
}
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
+ struct hrtimer *timer)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ cpu_base->next_timer = timer;
+#endif
+}
+
static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
{
struct hrtimer_clock_base *base = cpu_base->clock_base;
ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
- int i;
+ unsigned int active = cpu_base->active_bases;
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+ hrtimer_update_next_timer(cpu_base, NULL);
+ for (; active; base++, active >>= 1) {
struct timerqueue_node *next;
struct hrtimer *timer;
- next = timerqueue_getnext(&base->active);
- if (!next)
+ if (!(active & 0x01))
continue;
+ next = timerqueue_getnext(&base->active);
timer = container_of(next, struct hrtimer, node);
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
- if (expires.tv64 < expires_next.tv64)
+ if (expires.tv64 < expires_next.tv64) {
expires_next = expires;
+ hrtimer_update_next_timer(cpu_base, timer);
+ }
}
/*
* clock_was_set() might have changed base->offset of any of
@@ -473,6 +490,16 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
}
#endif
+static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+{
+ ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
+ ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
+ ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
+
+ return ktime_get_update_offsets_now(&base->clock_was_set_seq,
+ offs_real, offs_boot, offs_tai);
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -480,6 +507,8 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
* High resolution timer enabled ?
*/
static int hrtimer_hres_enabled __read_mostly = 1;
+unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
+EXPORT_SYMBOL_GPL(hrtimer_resolution);
/*
* Enable / Disable high resolution mode
@@ -508,9 +537,14 @@ static inline int hrtimer_is_hres_enabled(void)
/*
* Is the high resolution mode active ?
*/
+static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
+{
+ return cpu_base->hres_active;
+}
+
static inline int hrtimer_hres_active(void)
{
- return __this_cpu_read(hrtimer_bases.hres_active);
+ return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}
/*
@@ -521,7 +555,12 @@ static inline int hrtimer_hres_active(void)
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
- ktime_t expires_next = __hrtimer_get_next_event(cpu_base);
+ ktime_t expires_next;
+
+ if (!cpu_base->hres_active)
+ return;
+
+ expires_next = __hrtimer_get_next_event(cpu_base);
if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
return;
@@ -545,63 +584,53 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
if (cpu_base->hang_detected)
return;
- if (cpu_base->expires_next.tv64 != KTIME_MAX)
- tick_program_event(cpu_base->expires_next, 1);
+ tick_program_event(cpu_base->expires_next, 1);
}
/*
- * Shared reprogramming for clock_realtime and clock_monotonic
- *
* When a timer is enqueued and expires earlier than the already enqueued
* timers, we have to check, whether it expires earlier than the timer for
* which the clock event device was armed.
*
- * Note, that in case the state has HRTIMER_STATE_CALLBACK set, no reprogramming
- * and no expiry check happens. The timer gets enqueued into the rbtree. The
- * reprogramming and expiry check is done in the hrtimer_interrupt or in the
- * softirq.
- *
* Called with interrupts disabled and base->cpu_base.lock held
*/
-static int hrtimer_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
+static void hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
- int res;
WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
/*
- * When the callback is running, we do not reprogram the clock event
- * device. The timer callback is either running on a different CPU or
- * the callback is executed in the hrtimer_interrupt context. The
- * reprogramming is handled either by the softirq, which called the
- * callback or at the end of the hrtimer_interrupt.
+ * If the timer is not on the current cpu, we cannot reprogram
+ * the other cpus clock event device.
*/
- if (hrtimer_callback_running(timer))
- return 0;
+ if (base->cpu_base != cpu_base)
+ return;
+
+ /*
+ * If the hrtimer interrupt is running, then it will
+ * reevaluate the clock bases and reprogram the clock event
+ * device. The callbacks are always executed in hard interrupt
+ * context so we don't need an extra check for a running
+ * callback.
+ */
+ if (cpu_base->in_hrtirq)
+ return;
/*
* CLOCK_REALTIME timer might be requested with an absolute
- * expiry time which is less than base->offset. Nothing wrong
- * about that, just avoid to call into the tick code, which
- * has now objections against negative expiry values.
+ * expiry time which is less than base->offset. Set it to 0.
*/
if (expires.tv64 < 0)
- return -ETIME;
+ expires.tv64 = 0;
if (expires.tv64 >= cpu_base->expires_next.tv64)
- return 0;
+ return;
- /*
- * When the target cpu of the timer is currently executing
- * hrtimer_interrupt(), then we do not touch the clock event
- * device. hrtimer_interrupt() will reevaluate all clock bases
- * before reprogramming the device.
- */
- if (cpu_base->in_hrtirq)
- return 0;
+ /* Update the pointer to the next expiring timer */
+ cpu_base->next_timer = timer;
/*
* If a hang was detected in the last timer interrupt then we
@@ -610,15 +639,14 @@ static int hrtimer_reprogram(struct hrtimer *timer,
* to make progress.
*/
if (cpu_base->hang_detected)
- return 0;
+ return;
/*
- * Clockevents returns -ETIME, when the event was in the past.
+ * Program the timer hardware. We enforce the expiry for
+ * events which are already in the past.
*/
- res = tick_program_event(expires, 0);
- if (!IS_ERR_VALUE(res))
- cpu_base->expires_next = expires;
- return res;
+ cpu_base->expires_next = expires;
+ tick_program_event(expires, 1);
}
/*
@@ -630,15 +658,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
base->hres_active = 0;
}
-static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
-{
- ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
- ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
- ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
-
- return ktime_get_update_offsets_now(offs_real, offs_boot, offs_tai);
-}
-
/*
* Retrigger next event is called after clock was set
*
@@ -648,7 +667,7 @@ static void retrigger_next_event(void *arg)
{
struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
- if (!hrtimer_hres_active())
+ if (!base->hres_active)
return;
raw_spin_lock(&base->lock);
@@ -662,29 +681,19 @@ static void retrigger_next_event(void *arg)
*/
static int hrtimer_switch_to_hres(void)
{
- int i, cpu = smp_processor_id();
- struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
- unsigned long flags;
-
- if (base->hres_active)
- return 1;
-
- local_irq_save(flags);
+ struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
if (tick_init_highres()) {
- local_irq_restore(flags);
printk(KERN_WARNING "Could not switch to high resolution "
- "mode on CPU %d\n", cpu);
+ "mode on CPU %d\n", base->cpu);
return 0;
}
base->hres_active = 1;
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
- base->clock_base[i].resolution = KTIME_HIGH_RES;
+ hrtimer_resolution = HIGH_RES_NSEC;
tick_setup_sched_timer();
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
- local_irq_restore(flags);
return 1;
}
@@ -706,6 +715,7 @@ void clock_was_set_delayed(void)
#else
+static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; }
static inline int hrtimer_hres_active(void) { return 0; }
static inline int hrtimer_is_hres_enabled(void) { return 0; }
static inline int hrtimer_switch_to_hres(void) { return 0; }
@@ -803,6 +813,14 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
*
* Forward the timer expiry so it will expire in the future.
* Returns the number of overruns.
+ *
+ * Can be safely called from the callback function of @timer. If
+ * called from other contexts @timer must neither be enqueued nor
+ * running the callback and the caller needs to take care of
+ * serialization.
+ *
+ * Note: This only updates the timer expiry value and does not requeue
+ * the timer.
*/
u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
{
@@ -814,8 +832,11 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
if (delta.tv64 < 0)
return 0;
- if (interval.tv64 < timer->base->resolution.tv64)
- interval.tv64 = timer->base->resolution.tv64;
+ if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
+ return 0;
+
+ if (interval.tv64 < hrtimer_resolution)
+ interval.tv64 = hrtimer_resolution;
if (unlikely(delta.tv64 >= interval.tv64)) {
s64 incr = ktime_to_ns(interval);
@@ -849,16 +870,11 @@ static int enqueue_hrtimer(struct hrtimer *timer,
{
debug_activate(timer);
- timerqueue_add(&base->active, &timer->node);
base->cpu_base->active_bases |= 1 << base->index;
- /*
- * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
- * state of a possibly running callback.
- */
- timer->state |= HRTIMER_STATE_ENQUEUED;
+ timer->state = HRTIMER_STATE_ENQUEUED;
- return (&timer->node == base->active.next);
+ return timerqueue_add(&base->active, &timer->node);
}
/*
@@ -875,39 +891,38 @@ static void __remove_hrtimer(struct hrtimer *timer,
struct hrtimer_clock_base *base,
unsigned long newstate, int reprogram)
{
- struct timerqueue_node *next_timer;
- if (!(timer->state & HRTIMER_STATE_ENQUEUED))
- goto out;
+ struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+ unsigned int state = timer->state;
+
+ timer->state = newstate;
+ if (!(state & HRTIMER_STATE_ENQUEUED))
+ return;
+
+ if (!timerqueue_del(&base->active, &timer->node))
+ cpu_base->active_bases &= ~(1 << base->index);
- next_timer = timerqueue_getnext(&base->active);
- timerqueue_del(&base->active, &timer->node);
- if (&timer->node == next_timer) {
#ifdef CONFIG_HIGH_RES_TIMERS
- /* Reprogram the clock event device. if enabled */
- if (reprogram && hrtimer_hres_active()) {
- ktime_t expires;
-
- expires = ktime_sub(hrtimer_get_expires(timer),
- base->offset);
- if (base->cpu_base->expires_next.tv64 == expires.tv64)
- hrtimer_force_reprogram(base->cpu_base, 1);
- }
+ /*
+ * Note: If reprogram is false we do not update
+ * cpu_base->next_timer. This happens when we remove the first
+ * timer on a remote cpu. No harm as we never dereference
+ * cpu_base->next_timer. So the worst thing what can happen is
+ * an superflous call to hrtimer_force_reprogram() on the
+ * remote cpu later on if the same timer gets enqueued again.
+ */
+ if (reprogram && timer == cpu_base->next_timer)
+ hrtimer_force_reprogram(cpu_base, 1);
#endif
- }
- if (!timerqueue_getnext(&base->active))
- base->cpu_base->active_bases &= ~(1 << base->index);
-out:
- timer->state = newstate;
}
/*
* remove hrtimer, called with base lock held
*/
static inline int
-remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
+remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
{
if (hrtimer_is_queued(timer)) {
- unsigned long state;
+ unsigned long state = timer->state;
int reprogram;
/*
@@ -921,30 +936,35 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
debug_deactivate(timer);
timer_stats_hrtimer_clear_start_info(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
- /*
- * We must preserve the CALLBACK state flag here,
- * otherwise we could move the timer base in
- * switch_hrtimer_base.
- */
- state = timer->state & HRTIMER_STATE_CALLBACK;
+
+ if (!restart)
+ state = HRTIMER_STATE_INACTIVE;
+
__remove_hrtimer(timer, base, state, reprogram);
return 1;
}
return 0;
}
-int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long delta_ns, const enum hrtimer_mode mode,
- int wakeup)
+/**
+ * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
+ * @timer: the timer to be added
+ * @tim: expiry time
+ * @delta_ns: "slack" range for the timer
+ * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL)
+ */
+void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+ unsigned long delta_ns, const enum hrtimer_mode mode)
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
- int ret, leftmost;
+ int leftmost;
base = lock_hrtimer_base(timer, &flags);
/* Remove an active timer from the queue: */
- ret = remove_hrtimer(timer, base);
+ remove_hrtimer(timer, base, true);
if (mode & HRTIMER_MODE_REL) {
tim = ktime_add_safe(tim, base->get_time());
@@ -956,7 +976,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* timeouts. This will go away with the GTOD framework.
*/
#ifdef CONFIG_TIME_LOW_RES
- tim = ktime_add_safe(tim, base->resolution);
+ tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
#endif
}
@@ -968,85 +988,25 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
timer_stats_hrtimer_set_start_info(timer);
leftmost = enqueue_hrtimer(timer, new_base);
-
- if (!leftmost) {
- unlock_hrtimer_base(timer, &flags);
- return ret;
- }
+ if (!leftmost)
+ goto unlock;
if (!hrtimer_is_hres_active(timer)) {
/*
* Kick to reschedule the next tick to handle the new timer
* on dynticks target.
*/
- wake_up_nohz_cpu(new_base->cpu_base->cpu);
- } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) &&
- hrtimer_reprogram(timer, new_base)) {
- /*
- * Only allow reprogramming if the new base is on this CPU.
- * (it might still be on another CPU if the timer was pending)
- *
- * XXX send_remote_softirq() ?
- */
- if (wakeup) {
- /*
- * We need to drop cpu_base->lock to avoid a
- * lock ordering issue vs. rq->lock.
- */
- raw_spin_unlock(&new_base->cpu_base->lock);
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
- local_irq_restore(flags);
- return ret;
- } else {
- __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
- }
+ if (new_base->cpu_base->nohz_active)
+ wake_up_nohz_cpu(new_base->cpu_base->cpu);
+ } else {
+ hrtimer_reprogram(timer, new_base);
}
-
+unlock:
unlock_hrtimer_base(timer, &flags);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
-
-/**
- * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
- * @timer: the timer to be added
- * @tim: expiry time
- * @delta_ns: "slack" range for the timer
- * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
- * relative (HRTIMER_MODE_REL)
- *
- * Returns:
- * 0 on success
- * 1 when the timer was active
- */
-int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long delta_ns, const enum hrtimer_mode mode)
-{
- return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
}
EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
/**
- * hrtimer_start - (re)start an hrtimer on the current CPU
- * @timer: the timer to be added
- * @tim: expiry time
- * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
- * relative (HRTIMER_MODE_REL)
- *
- * Returns:
- * 0 on success
- * 1 when the timer was active
- */
-int
-hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
-{
- return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
-}
-EXPORT_SYMBOL_GPL(hrtimer_start);
-
-
-/**
* hrtimer_try_to_cancel - try to deactivate a timer
* @timer: hrtimer to stop
*
@@ -1062,10 +1022,19 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
unsigned long flags;
int ret = -1;
+ /*
+ * Check lockless first. If the timer is not active (neither
+ * enqueued nor running the callback, nothing to do here. The
+ * base lock does not serialize against a concurrent enqueue,
+ * so we can avoid taking it.
+ */
+ if (!hrtimer_active(timer))
+ return 0;
+
base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer))
- ret = remove_hrtimer(timer, base);
+ ret = remove_hrtimer(timer, base, false);
unlock_hrtimer_base(timer, &flags);
@@ -1115,26 +1084,22 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
/**
* hrtimer_get_next_event - get the time until next expiry event
*
- * Returns the delta to the next expiry event or KTIME_MAX if no timer
- * is pending.
+ * Returns the next expiry time or KTIME_MAX if no timer is pending.
*/
-ktime_t hrtimer_get_next_event(void)
+u64 hrtimer_get_next_event(void)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- ktime_t mindelta = { .tv64 = KTIME_MAX };
+ u64 expires = KTIME_MAX;
unsigned long flags;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (!hrtimer_hres_active())
- mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base),
- ktime_get());
+ if (!__hrtimer_hres_active(cpu_base))
+ expires = __hrtimer_get_next_event(cpu_base).tv64;
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
- if (mindelta.tv64 < 0)
- mindelta.tv64 = 0;
- return mindelta;
+ return expires;
}
#endif
@@ -1176,37 +1141,73 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
}
EXPORT_SYMBOL_GPL(hrtimer_init);
-/**
- * hrtimer_get_res - get the timer resolution for a clock
- * @which_clock: which clock to query
- * @tp: pointer to timespec variable to store the resolution
+/*
+ * A timer is active, when it is enqueued into the rbtree or the
+ * callback function is running or it's in the state of being migrated
+ * to another cpu.
*
- * Store the resolution of the clock selected by @which_clock in the
- * variable pointed to by @tp.
+ * It is important for this function to not return a false negative.
*/
-int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
+bool hrtimer_active(const struct hrtimer *timer)
{
struct hrtimer_cpu_base *cpu_base;
- int base = hrtimer_clockid_to_base(which_clock);
+ unsigned int seq;
- cpu_base = raw_cpu_ptr(&hrtimer_bases);
- *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
+ do {
+ cpu_base = READ_ONCE(timer->base->cpu_base);
+ seq = raw_read_seqcount_begin(&cpu_base->seq);
- return 0;
+ if (timer->state != HRTIMER_STATE_INACTIVE ||
+ cpu_base->running == timer)
+ return true;
+
+ } while (read_seqcount_retry(&cpu_base->seq, seq) ||
+ cpu_base != READ_ONCE(timer->base->cpu_base));
+
+ return false;
}
-EXPORT_SYMBOL_GPL(hrtimer_get_res);
+EXPORT_SYMBOL_GPL(hrtimer_active);
-static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
+/*
+ * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
+ * distinct sections:
+ *
+ * - queued: the timer is queued
+ * - callback: the timer is being ran
+ * - post: the timer is inactive or (re)queued
+ *
+ * On the read side we ensure we observe timer->state and cpu_base->running
+ * from the same section, if anything changed while we looked at it, we retry.
+ * This includes timer->base changing because sequence numbers alone are
+ * insufficient for that.
+ *
+ * The sequence numbers are required because otherwise we could still observe
+ * a false negative if the read side got smeared over multiple consequtive
+ * __run_hrtimer() invocations.
+ */
+
+static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
+ struct hrtimer_clock_base *base,
+ struct hrtimer *timer, ktime_t *now)
{
- struct hrtimer_clock_base *base = timer->base;
- struct hrtimer_cpu_base *cpu_base = base->cpu_base;
enum hrtimer_restart (*fn)(struct hrtimer *);
int restart;
- WARN_ON(!irqs_disabled());
+ lockdep_assert_held(&cpu_base->lock);
debug_deactivate(timer);
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ cpu_base->running = timer;
+
+ /*
+ * Separate the ->running assignment from the ->state assignment.
+ *
+ * As with a regular write barrier, this ensures the read side in
+ * hrtimer_active() cannot observe cpu_base->running == NULL &&
+ * timer->state == INACTIVE.
+ */
+ raw_write_seqcount_barrier(&cpu_base->seq);
+
+ __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
timer_stats_account_hrtimer(timer);
fn = timer->function;
@@ -1222,58 +1223,43 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
raw_spin_lock(&cpu_base->lock);
/*
- * Note: We clear the CALLBACK bit after enqueue_hrtimer and
+ * Note: We clear the running state after enqueue_hrtimer and
* we do not reprogramm the event hardware. Happens either in
* hrtimer_start_range_ns() or in hrtimer_interrupt()
+ *
+ * Note: Because we dropped the cpu_base->lock above,
+ * hrtimer_start_range_ns() can have popped in and enqueued the timer
+ * for us already.
*/
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+ if (restart != HRTIMER_NORESTART &&
+ !(timer->state & HRTIMER_STATE_ENQUEUED))
enqueue_hrtimer(timer, base);
- }
- WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
+ /*
+ * Separate the ->running assignment from the ->state assignment.
+ *
+ * As with a regular write barrier, this ensures the read side in
+ * hrtimer_active() cannot observe cpu_base->running == NULL &&
+ * timer->state == INACTIVE.
+ */
+ raw_write_seqcount_barrier(&cpu_base->seq);
- timer->state &= ~HRTIMER_STATE_CALLBACK;
+ WARN_ON_ONCE(cpu_base->running != timer);
+ cpu_base->running = NULL;
}
-#ifdef CONFIG_HIGH_RES_TIMERS
-
-/*
- * High resolution timer interrupt
- * Called with interrupts disabled
- */
-void hrtimer_interrupt(struct clock_event_device *dev)
+static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
{
- struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- ktime_t expires_next, now, entry_time, delta;
- int i, retries = 0;
-
- BUG_ON(!cpu_base->hres_active);
- cpu_base->nr_events++;
- dev->next_event.tv64 = KTIME_MAX;
-
- raw_spin_lock(&cpu_base->lock);
- entry_time = now = hrtimer_update_base(cpu_base);
-retry:
- cpu_base->in_hrtirq = 1;
- /*
- * We set expires_next to KTIME_MAX here with cpu_base->lock
- * held to prevent that a timer is enqueued in our queue via
- * the migration code. This does not affect enqueueing of
- * timers which run their callback and need to be requeued on
- * this CPU.
- */
- cpu_base->expires_next.tv64 = KTIME_MAX;
+ struct hrtimer_clock_base *base = cpu_base->clock_base;
+ unsigned int active = cpu_base->active_bases;
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- struct hrtimer_clock_base *base;
+ for (; active; base++, active >>= 1) {
struct timerqueue_node *node;
ktime_t basenow;
- if (!(cpu_base->active_bases & (1 << i)))
+ if (!(active & 0x01))
continue;
- base = cpu_base->clock_base + i;
basenow = ktime_add(now, base->offset);
while ((node = timerqueue_getnext(&base->active))) {
@@ -1296,9 +1282,42 @@ retry:
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
break;
- __run_hrtimer(timer, &basenow);
+ __run_hrtimer(cpu_base, base, timer, &basenow);
}
}
+}
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+/*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+ */
+void hrtimer_interrupt(struct clock_event_device *dev)
+{
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+ ktime_t expires_next, now, entry_time, delta;
+ int retries = 0;
+
+ BUG_ON(!cpu_base->hres_active);
+ cpu_base->nr_events++;
+ dev->next_event.tv64 = KTIME_MAX;
+
+ raw_spin_lock(&cpu_base->lock);
+ entry_time = now = hrtimer_update_base(cpu_base);
+retry:
+ cpu_base->in_hrtirq = 1;
+ /*
+ * We set expires_next to KTIME_MAX here with cpu_base->lock
+ * held to prevent that a timer is enqueued in our queue via
+ * the migration code. This does not affect enqueueing of
+ * timers which run their callback and need to be requeued on
+ * this CPU.
+ */
+ cpu_base->expires_next.tv64 = KTIME_MAX;
+
+ __hrtimer_run_queues(cpu_base, now);
+
/* Reevaluate the clock bases for the next expiry */
expires_next = __hrtimer_get_next_event(cpu_base);
/*
@@ -1310,8 +1329,7 @@ retry:
raw_spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
- if (expires_next.tv64 == KTIME_MAX ||
- !tick_program_event(expires_next, 0)) {
+ if (!tick_program_event(expires_next, 0)) {
cpu_base->hang_detected = 0;
return;
}
@@ -1344,8 +1362,8 @@ retry:
cpu_base->hang_detected = 1;
raw_spin_unlock(&cpu_base->lock);
delta = ktime_sub(now, entry_time);
- if (delta.tv64 > cpu_base->max_hang_time.tv64)
- cpu_base->max_hang_time = delta;
+ if ((unsigned int)delta.tv64 > cpu_base->max_hang_time)
+ cpu_base->max_hang_time = (unsigned int) delta.tv64;
/*
* Limit it to a sensible value as we enforce a longer
* delay. Give the CPU at least 100ms to catch up.
@@ -1363,7 +1381,7 @@ retry:
* local version of hrtimer_peek_ahead_timers() called with interrupts
* disabled.
*/
-static void __hrtimer_peek_ahead_timers(void)
+static inline void __hrtimer_peek_ahead_timers(void)
{
struct tick_device *td;
@@ -1375,29 +1393,6 @@ static void __hrtimer_peek_ahead_timers(void)
hrtimer_interrupt(td->evtdev);
}
-/**
- * hrtimer_peek_ahead_timers -- run soft-expired timers now
- *
- * hrtimer_peek_ahead_timers will peek at the timer queue of
- * the current cpu and check if there are any timers for which
- * the soft expires time has passed. If any such timers exist,
- * they are run immediately and then removed from the timer queue.
- *
- */
-void hrtimer_peek_ahead_timers(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __hrtimer_peek_ahead_timers();
- local_irq_restore(flags);
-}
-
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
- hrtimer_peek_ahead_timers();
-}
-
#else /* CONFIG_HIGH_RES_TIMERS */
static inline void __hrtimer_peek_ahead_timers(void) { }
@@ -1405,66 +1400,32 @@ static inline void __hrtimer_peek_ahead_timers(void) { }
#endif /* !CONFIG_HIGH_RES_TIMERS */
/*
- * Called from timer softirq every jiffy, expire hrtimers:
- *
- * For HRT its the fall back code to run the softirq in the timer
- * softirq context in case the hrtimer initialization failed or has
- * not been done yet.
+ * Called from run_local_timers in hardirq context every jiffy
*/
-void hrtimer_run_pending(void)
+void hrtimer_run_queues(void)
{
- if (hrtimer_hres_active())
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+ ktime_t now;
+
+ if (__hrtimer_hres_active(cpu_base))
return;
/*
- * This _is_ ugly: We have to check in the softirq context,
- * whether we can switch to highres and / or nohz mode. The
- * clocksource switch happens in the timer interrupt with
- * xtime_lock held. Notification from there only sets the
- * check bit in the tick_oneshot code, otherwise we might
- * deadlock vs. xtime_lock.
+ * This _is_ ugly: We have to check periodically, whether we
+ * can switch to highres and / or nohz mode. The clocksource
+ * switch happens with xtime_lock held. Notification from
+ * there only sets the check bit in the tick_oneshot code,
+ * otherwise we might deadlock vs. xtime_lock.
*/
- if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
hrtimer_switch_to_hres();
-}
-
-/*
- * Called from hardirq context every jiffy
- */
-void hrtimer_run_queues(void)
-{
- struct timerqueue_node *node;
- struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
- struct hrtimer_clock_base *base;
- int index, gettime = 1;
-
- if (hrtimer_hres_active())
return;
-
- for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
- base = &cpu_base->clock_base[index];
- if (!timerqueue_getnext(&base->active))
- continue;
-
- if (gettime) {
- hrtimer_get_softirq_time(cpu_base);
- gettime = 0;
- }
-
- raw_spin_lock(&cpu_base->lock);
-
- while ((node = timerqueue_getnext(&base->active))) {
- struct hrtimer *timer;
-
- timer = container_of(node, struct hrtimer, node);
- if (base->softirq_time.tv64 <=
- hrtimer_get_expires_tv64(timer))
- break;
-
- __run_hrtimer(timer, &base->softirq_time);
- }
- raw_spin_unlock(&cpu_base->lock);
}
+
+ raw_spin_lock(&cpu_base->lock);
+ now = hrtimer_update_base(cpu_base);
+ __hrtimer_run_queues(cpu_base, now);
+ raw_spin_unlock(&cpu_base->lock);
}
/*
@@ -1497,8 +1458,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
do {
set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start_expires(&t->timer, mode);
- if (!hrtimer_active(&t->timer))
- t->task = NULL;
if (likely(t->task))
freezable_schedule();
@@ -1642,11 +1601,11 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
debug_deactivate(timer);
/*
- * Mark it as STATE_MIGRATE not INACTIVE otherwise the
+ * Mark it as ENQUEUED not INACTIVE otherwise the
* timer could be seen as !active and just vanish away
* under us on another CPU
*/
- __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
+ __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
timer->base = new_base;
/*
* Enqueue the timers on the new cpu. This does not
@@ -1657,9 +1616,6 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
* event device.
*/
enqueue_hrtimer(timer, new_base);
-
- /* Clear the migration state bit */
- timer->state &= ~HRTIMER_STATE_MIGRATE;
}
}
@@ -1731,9 +1687,6 @@ void __init hrtimers_init(void)
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
-#ifdef CONFIG_HIGH_RES_TIMERS
- open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
-#endif
}
/**
@@ -1772,8 +1725,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
hrtimer_init_sleeper(&t, current);
hrtimer_start_expires(&t.timer, mode);
- if (!hrtimer_active(&t.timer))
- t.task = NULL;
if (likely(t.task))
schedule();
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7a681003001c..fb4d98c7fd43 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -35,6 +35,7 @@ unsigned long tick_nsec;
static u64 tick_length;
static u64 tick_length_base;
+#define SECS_PER_DAY 86400
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -76,6 +77,9 @@ static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
+/* second value of the next pending leapsecond, or TIME64_MAX if no leap */
+static time64_t ntp_next_leap_sec = TIME64_MAX;
+
#ifdef CONFIG_NTP_PPS
/*
@@ -349,6 +353,7 @@ void ntp_clear(void)
tick_length = tick_length_base;
time_offset = 0;
+ ntp_next_leap_sec = TIME64_MAX;
/* Clear PPS state variables */
pps_clear();
}
@@ -359,6 +364,21 @@ u64 ntp_tick_length(void)
return tick_length;
}
+/**
+ * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
+ *
+ * Provides the time of the next leapsecond against CLOCK_REALTIME in
+ * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending.
+ */
+ktime_t ntp_get_next_leap(void)
+{
+ ktime_t ret;
+
+ if ((time_state == TIME_INS) && (time_status & STA_INS))
+ return ktime_set(ntp_next_leap_sec, 0);
+ ret.tv64 = KTIME_MAX;
+ return ret;
+}
/*
* this routine handles the overflow of the microsecond field
@@ -382,15 +402,21 @@ int second_overflow(unsigned long secs)
*/
switch (time_state) {
case TIME_OK:
- if (time_status & STA_INS)
+ if (time_status & STA_INS) {
time_state = TIME_INS;
- else if (time_status & STA_DEL)
+ ntp_next_leap_sec = secs + SECS_PER_DAY -
+ (secs % SECS_PER_DAY);
+ } else if (time_status & STA_DEL) {
time_state = TIME_DEL;
+ ntp_next_leap_sec = secs + SECS_PER_DAY -
+ ((secs+1) % SECS_PER_DAY);
+ }
break;
case TIME_INS:
- if (!(time_status & STA_INS))
+ if (!(time_status & STA_INS)) {
+ ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_OK;
- else if (secs % 86400 == 0) {
+ } else if (secs % SECS_PER_DAY == 0) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
@@ -398,19 +424,21 @@ int second_overflow(unsigned long secs)
}
break;
case TIME_DEL:
- if (!(time_status & STA_DEL))
+ if (!(time_status & STA_DEL)) {
+ ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_OK;
- else if ((secs + 1) % 86400 == 0) {
+ } else if ((secs + 1) % SECS_PER_DAY == 0) {
leap = 1;
+ ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
}
break;
case TIME_OOP:
+ ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_WAIT;
break;
-
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
@@ -547,6 +575,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
+ ntp_next_leap_sec = TIME64_MAX;
/* restart PPS frequency calibration */
pps_reset_freq_interval();
}
@@ -711,6 +740,24 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
if (!(time_status & STA_NANO))
txc->time.tv_usec /= NSEC_PER_USEC;
+ /* Handle leapsec adjustments */
+ if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) {
+ if ((time_state == TIME_INS) && (time_status & STA_INS)) {
+ result = TIME_OOP;
+ txc->tai++;
+ txc->time.tv_sec--;
+ }
+ if ((time_state == TIME_DEL) && (time_status & STA_DEL)) {
+ result = TIME_WAIT;
+ txc->tai--;
+ txc->time.tv_sec++;
+ }
+ if ((time_state == TIME_OOP) &&
+ (ts->tv_sec == ntp_next_leap_sec)) {
+ result = TIME_WAIT;
+ }
+ }
+
return result;
}
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index bbd102ad9df7..65430504ca26 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -5,6 +5,7 @@ extern void ntp_init(void);
extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);
+extern ktime_t ntp_get_next_leap(void);
extern int second_overflow(unsigned long secs);
extern int ntp_validate_timex(struct timex *);
extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 31ea01f42e1f..31d11ac9fa47 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -272,13 +272,20 @@ static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
return 0;
}
+static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec *tp)
+{
+ tp->tv_sec = 0;
+ tp->tv_nsec = hrtimer_resolution;
+ return 0;
+}
+
/*
* Initialize everything, well, just everything in Posix clocks/timers ;)
*/
static __init int init_posix_timers(void)
{
struct k_clock clock_realtime = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = posix_get_hrtimer_res,
.clock_get = posix_clock_realtime_get,
.clock_set = posix_clock_realtime_set,
.clock_adj = posix_clock_realtime_adj,
@@ -290,7 +297,7 @@ static __init int init_posix_timers(void)
.timer_del = common_timer_del,
};
struct k_clock clock_monotonic = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = posix_get_hrtimer_res,
.clock_get = posix_ktime_get_ts,
.nsleep = common_nsleep,
.nsleep_restart = hrtimer_nanosleep_restart,
@@ -300,7 +307,7 @@ static __init int init_posix_timers(void)
.timer_del = common_timer_del,
};
struct k_clock clock_monotonic_raw = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = posix_get_hrtimer_res,
.clock_get = posix_get_monotonic_raw,
};
struct k_clock clock_realtime_coarse = {
@@ -312,7 +319,7 @@ static __init int init_posix_timers(void)
.clock_get = posix_get_monotonic_coarse,
};
struct k_clock clock_tai = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = posix_get_hrtimer_res,
.clock_get = posix_get_tai,
.nsleep = common_nsleep,
.nsleep_restart = hrtimer_nanosleep_restart,
@@ -322,7 +329,7 @@ static __init int init_posix_timers(void)
.timer_del = common_timer_del,
};
struct k_clock clock_boottime = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = posix_get_hrtimer_res,
.clock_get = posix_get_boottime,
.nsleep = common_nsleep,
.nsleep_restart = hrtimer_nanosleep_restart,
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 6aac4beedbbe..3e7db49a2381 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -22,6 +22,7 @@ static void bc_set_mode(enum clock_event_mode mode,
struct clock_event_device *bc)
{
switch (mode) {
+ case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
/*
* Note, we cannot cancel the timer here as we might
@@ -66,9 +67,11 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
* hrtimer_{start/cancel} functions call into tracing,
* calls to these functions must be bound within RCU_NONIDLE.
*/
- RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ?
- !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) :
- 0);
+ RCU_NONIDLE({
+ bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
+ if (bc_moved)
+ hrtimer_start(&bctimer, expires,
+ HRTIMER_MODE_ABS_PINNED);});
if (bc_moved) {
/* Bind the "device" to the cpu */
bc->bound_on = smp_processor_id();
@@ -99,10 +102,13 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
{
ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
- if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX)
+ switch (ce_broadcast_hrtimer.mode) {
+ case CLOCK_EVT_MODE_ONESHOT:
+ if (ce_broadcast_hrtimer.next_event.tv64 != KTIME_MAX)
+ return HRTIMER_RESTART;
+ default:
return HRTIMER_NORESTART;
-
- return HRTIMER_RESTART;
+ }
}
void tick_setup_hrtimer_broadcast(void)
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7e8ca4f448a8..d39f32cdd1b5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -255,18 +255,18 @@ int tick_receive_broadcast(void)
/*
* Broadcast the event to the cpus, which are set in the mask (mangled).
*/
-static void tick_do_broadcast(struct cpumask *mask)
+static bool tick_do_broadcast(struct cpumask *mask)
{
int cpu = smp_processor_id();
struct tick_device *td;
+ bool local = false;
/*
* Check, if the current cpu is in the mask
*/
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
- td = &per_cpu(tick_cpu_device, cpu);
- td->evtdev->event_handler(td->evtdev);
+ local = true;
}
if (!cpumask_empty(mask)) {
@@ -279,16 +279,17 @@ static void tick_do_broadcast(struct cpumask *mask)
td = &per_cpu(tick_cpu_device, cpumask_first(mask));
td->evtdev->broadcast(mask);
}
+ return local;
}
/*
* Periodic broadcast:
* - invoke the broadcast handlers
*/
-static void tick_do_periodic_broadcast(void)
+static bool tick_do_periodic_broadcast(void)
{
cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
- tick_do_broadcast(tmpmask);
+ return tick_do_broadcast(tmpmask);
}
/*
@@ -296,34 +297,26 @@ static void tick_do_periodic_broadcast(void)
*/
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
- ktime_t next;
+ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+ bool bc_local;
raw_spin_lock(&tick_broadcast_lock);
+ bc_local = tick_do_periodic_broadcast();
- tick_do_periodic_broadcast();
+ if (clockevent_state_oneshot(dev)) {
+ ktime_t next = ktime_add(dev->next_event, tick_period);
- /*
- * The device is in periodic mode. No reprogramming necessary:
- */
- if (dev->state == CLOCK_EVT_STATE_PERIODIC)
- goto unlock;
+ clockevents_program_event(dev, next, true);
+ }
+ raw_spin_unlock(&tick_broadcast_lock);
/*
- * Setup the next period for devices, which do not have
- * periodic mode. We read dev->next_event first and add to it
- * when the event already expired. clockevents_program_event()
- * sets dev->next_event only when the event is really
- * programmed to the device.
+ * We run the handler of the local cpu after dropping
+ * tick_broadcast_lock because the handler might deadlock when
+ * trying to switch to oneshot mode.
*/
- for (next = dev->next_event; ;) {
- next = ktime_add(next, tick_period);
-
- if (!clockevents_program_event(dev, next, false))
- goto unlock;
- tick_do_periodic_broadcast();
- }
-unlock:
- raw_spin_unlock(&tick_broadcast_lock);
+ if (bc_local)
+ td->evtdev->event_handler(td->evtdev);
}
/**
@@ -532,23 +525,19 @@ static void tick_broadcast_set_affinity(struct clock_event_device *bc,
irq_set_affinity(bc->irq, bc->cpumask);
}
-static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
- ktime_t expires, int force)
+static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
+ ktime_t expires)
{
- int ret;
-
- if (bc->state != CLOCK_EVT_STATE_ONESHOT)
- clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
+ if (!clockevent_state_oneshot(bc))
+ clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
- ret = clockevents_program_event(bc, expires, force);
- if (!ret)
- tick_broadcast_set_affinity(bc, cpumask_of(cpu));
- return ret;
+ clockevents_program_event(bc, expires, 1);
+ tick_broadcast_set_affinity(bc, cpumask_of(cpu));
}
static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
- clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
}
/*
@@ -566,7 +555,7 @@ void tick_check_oneshot_broadcast_this_cpu(void)
* switched over, leave the device alone.
*/
if (td->mode == TICKDEV_MODE_ONESHOT) {
- clockevents_set_state(td->evtdev,
+ clockevents_switch_state(td->evtdev,
CLOCK_EVT_STATE_ONESHOT);
}
}
@@ -580,9 +569,9 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
struct tick_device *td;
ktime_t now, next_event;
int cpu, next_cpu = 0;
+ bool bc_local;
raw_spin_lock(&tick_broadcast_lock);
-again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
cpumask_clear(tmpmask);
@@ -624,7 +613,7 @@ again:
/*
* Wakeup the cpus which have an expired event.
*/
- tick_do_broadcast(tmpmask);
+ bc_local = tick_do_broadcast(tmpmask);
/*
* Two reasons for reprogram:
@@ -636,15 +625,15 @@ again:
* - There are pending events on sleeping CPUs which were not
* in the event mask
*/
- if (next_event.tv64 != KTIME_MAX) {
- /*
- * Rearm the broadcast device. If event expired,
- * repeat the above
- */
- if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
- goto again;
- }
+ if (next_event.tv64 != KTIME_MAX)
+ tick_broadcast_set_event(dev, next_cpu, next_event);
+
raw_spin_unlock(&tick_broadcast_lock);
+
+ if (bc_local) {
+ td = this_cpu_ptr(&tick_cpu_device);
+ td->evtdev->event_handler(td->evtdev);
+ }
}
static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
@@ -670,7 +659,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
if (dev->next_event.tv64 < bc->next_event.tv64)
return;
}
- clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
}
/**
@@ -726,7 +715,7 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
*/
if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
dev->next_event.tv64 < bc->next_event.tv64)
- tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
+ tick_broadcast_set_event(bc, cpu, dev->next_event);
}
/*
* If the current CPU owns the hrtimer broadcast
@@ -740,7 +729,7 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
} else {
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
- clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
/*
* The cpu which was handling the broadcast
* timer marked this cpu in the broadcast
@@ -842,7 +831,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
/* Set it up only once ! */
if (bc->event_handler != tick_handle_oneshot_broadcast) {
- int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
+ int was_periodic = clockevent_state_periodic(bc);
bc->event_handler = tick_handle_oneshot_broadcast;
@@ -858,10 +847,10 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
tick_broadcast_oneshot_mask, tmpmask);
if (was_periodic && !cpumask_empty(tmpmask)) {
- clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
tick_broadcast_init_next_event(tmpmask,
tick_next_period);
- tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
+ tick_broadcast_set_event(bc, cpu, tick_next_period);
} else
bc->next_event.tv64 = KTIME_MAX;
} else {
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 3ae6afa1eb98..17f144450050 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -102,7 +102,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
tick_periodic(cpu);
- if (dev->state != CLOCK_EVT_STATE_ONESHOT)
+#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
+ /*
+ * The cpu might have transitioned to HIGHRES or NOHZ mode via
+ * update_process_times() -> run_local_timers() ->
+ * hrtimer_run_queues().
+ */
+ if (dev->event_handler != tick_handle_periodic)
+ return;
+#endif
+
+ if (!clockevent_state_oneshot(dev))
return;
for (;;) {
/*
@@ -140,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) {
- clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
} else {
unsigned long seq;
ktime_t next;
@@ -150,7 +160,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq));
- clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
for (;;) {
if (!clockevents_program_event(dev, next, false))
@@ -367,7 +377,7 @@ void tick_shutdown(unsigned int cpu)
* Prevent that the clock events layer tries to call
* the set mode function!
*/
- dev->state = CLOCK_EVT_STATE_DETACHED;
+ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_exchange_device(dev, NULL);
dev->event_handler = clockevents_handle_noop;
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b64fdd8054c5..966a5a6fdd0a 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -36,11 +36,22 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
+static inline enum clock_event_state clockevent_get_state(struct clock_event_device *dev)
+{
+ return dev->state_use_accessors;
+}
+
+static inline void clockevent_set_state(struct clock_event_device *dev,
+ enum clock_event_state state)
+{
+ dev->state_use_accessors = state;
+}
+
extern void clockevents_shutdown(struct clock_event_device *dev);
extern void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new);
-extern void clockevents_set_state(struct clock_event_device *dev,
- enum clock_event_state state);
+extern void clockevents_switch_state(struct clock_event_device *dev,
+ enum clock_event_state state);
extern int clockevents_program_event(struct clock_event_device *dev,
ktime_t expires, bool force);
extern void clockevents_handle_noop(struct clock_event_device *dev);
@@ -137,3 +148,19 @@ extern void tick_nohz_init(void);
# else
static inline void tick_nohz_init(void) { }
#endif
+
+#ifdef CONFIG_NO_HZ_COMMON
+extern unsigned long tick_nohz_active;
+#else
+#define tick_nohz_active (0)
+#endif
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void timers_update_migration(bool update_nohz);
+#else
+static inline void timers_update_migration(bool update_nohz) { }
+#endif
+
+DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
+
+extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 67a64b1670bf..b51344652330 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -28,6 +28,22 @@ int tick_program_event(ktime_t expires, int force)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+ if (unlikely(expires.tv64 == KTIME_MAX)) {
+ /*
+ * We don't need the clock event device any more, stop it.
+ */
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
+ return 0;
+ }
+
+ if (unlikely(clockevent_state_oneshot_stopped(dev))) {
+ /*
+ * We need the clock event again, configure it in ONESHOT mode
+ * before using it.
+ */
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
+ }
+
return clockevents_program_event(dev, expires, force);
}
@@ -38,7 +54,7 @@ void tick_resume_oneshot(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
- clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
clockevents_program_event(dev, ktime_get(), true);
}
@@ -50,7 +66,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
ktime_t next_event)
{
newdev->event_handler = handler;
- clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(newdev, CLOCK_EVT_STATE_ONESHOT);
clockevents_program_event(newdev, next_event, true);
}
@@ -81,7 +97,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
td->mode = TICKDEV_MODE_ONESHOT;
dev->event_handler = handler;
- clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
+ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
tick_broadcast_switch_to_oneshot();
return 0;
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 914259128145..c792429e98c6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -399,7 +399,7 @@ void __init tick_nohz_init(void)
* NO HZ enabled ?
*/
static int tick_nohz_enabled __read_mostly = 1;
-int tick_nohz_active __read_mostly;
+unsigned long tick_nohz_active __read_mostly;
/*
* Enable / Disable tickless mode
*/
@@ -565,156 +565,144 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+{
+ hrtimer_cancel(&ts->sched_timer);
+ hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
+
+ /* Forward the time to expire in the future */
+ hrtimer_forward(&ts->sched_timer, now, tick_period);
+
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+ else
+ tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+}
+
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
- unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
- ktime_t last_update, expires, ret = { .tv64 = 0 };
- unsigned long rcu_delta_jiffies;
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
- u64 time_delta;
-
- time_delta = timekeeping_max_deferment();
+ u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
+ unsigned long seq, basejiff;
+ ktime_t tick;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&jiffies_lock);
- last_update = last_jiffies_update;
- last_jiffies = jiffies;
+ basemono = last_jiffies_update.tv64;
+ basejiff = jiffies;
} while (read_seqretry(&jiffies_lock, seq));
+ ts->last_jiffies = basejiff;
- if (rcu_needs_cpu(&rcu_delta_jiffies) ||
+ if (rcu_needs_cpu(basemono, &next_rcu) ||
arch_needs_cpu() || irq_work_needs_cpu()) {
- next_jiffies = last_jiffies + 1;
- delta_jiffies = 1;
+ next_tick = basemono + TICK_NSEC;
} else {
- /* Get the next timer wheel timer */
- next_jiffies = get_next_timer_interrupt(last_jiffies);
- delta_jiffies = next_jiffies - last_jiffies;
- if (rcu_delta_jiffies < delta_jiffies) {
- next_jiffies = last_jiffies + rcu_delta_jiffies;
- delta_jiffies = rcu_delta_jiffies;
- }
+ /*
+ * Get the next pending timer. If high resolution
+ * timers are enabled this only takes the timer wheel
+ * timers into account. If high resolution timers are
+ * disabled this also looks at the next expiring
+ * hrtimer.
+ */
+ next_tmr = get_next_timer_interrupt(basejiff, basemono);
+ ts->next_timer = next_tmr;
+ /* Take the next rcu event into account */
+ next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
}
/*
- * Do not stop the tick, if we are only one off (or less)
- * or if the cpu is required for RCU:
+ * If the tick is due in the next period, keep it ticking or
+ * restart it proper.
*/
- if (!ts->tick_stopped && delta_jiffies <= 1)
- goto out;
-
- /* Schedule the tick, if we are at least one jiffie off */
- if ((long)delta_jiffies >= 1) {
-
- /*
- * If this cpu is the one which updates jiffies, then
- * give up the assignment and let it be taken by the
- * cpu which runs the tick timer next, which might be
- * this cpu as well. If we don't drop this here the
- * jiffies might be stale and do_timer() never
- * invoked. Keep track of the fact that it was the one
- * which had the do_timer() duty last. If this cpu is
- * the one which had the do_timer() duty last, we
- * limit the sleep time to the timekeeping
- * max_deferement value which we retrieved
- * above. Otherwise we can sleep as long as we want.
- */
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- ts->do_timer_last = 1;
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
- time_delta = KTIME_MAX;
- ts->do_timer_last = 0;
- } else if (!ts->do_timer_last) {
- time_delta = KTIME_MAX;
+ delta = next_tick - basemono;
+ if (delta <= (u64)TICK_NSEC) {
+ tick.tv64 = 0;
+ if (!ts->tick_stopped)
+ goto out;
+ if (delta == 0) {
+ /* Tick is stopped, but required now. Enforce it */
+ tick_nohz_restart(ts, now);
+ goto out;
}
+ }
+
+ /*
+ * If this cpu is the one which updates jiffies, then give up
+ * the assignment and let it be taken by the cpu which runs
+ * the tick timer next, which might be this cpu as well. If we
+ * don't drop this here the jiffies might be stale and
+ * do_timer() never invoked. Keep track of the fact that it
+ * was the one which had the do_timer() duty last. If this cpu
+ * is the one which had the do_timer() duty last, we limit the
+ * sleep time to the timekeeping max_deferement value.
+ * Otherwise we can sleep as long as we want.
+ */
+ delta = timekeeping_max_deferment();
+ if (cpu == tick_do_timer_cpu) {
+ tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ ts->do_timer_last = 1;
+ } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ delta = KTIME_MAX;
+ ts->do_timer_last = 0;
+ } else if (!ts->do_timer_last) {
+ delta = KTIME_MAX;
+ }
#ifdef CONFIG_NO_HZ_FULL
- if (!ts->inidle) {
- time_delta = min(time_delta,
- scheduler_tick_max_deferment());
- }
+ /* Limit the tick delta to the maximum scheduler deferment */
+ if (!ts->inidle)
+ delta = min(delta, scheduler_tick_max_deferment());
#endif
- /*
- * calculate the expiry time for the next timer wheel
- * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
- * that there is no timer pending or at least extremely
- * far into the future (12 days for HZ=1000). In this
- * case we set the expiry to the end of time.
- */
- if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
- /*
- * Calculate the time delta for the next timer event.
- * If the time delta exceeds the maximum time delta
- * permitted by the current clocksource then adjust
- * the time delta accordingly to ensure the
- * clocksource does not wrap.
- */
- time_delta = min_t(u64, time_delta,
- tick_period.tv64 * delta_jiffies);
- }
-
- if (time_delta < KTIME_MAX)
- expires = ktime_add_ns(last_update, time_delta);
- else
- expires.tv64 = KTIME_MAX;
-
- /* Skip reprogram of event if its not changed */
- if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
- goto out;
+ /* Calculate the next expiry time */
+ if (delta < (KTIME_MAX - basemono))
+ expires = basemono + delta;
+ else
+ expires = KTIME_MAX;
- ret = expires;
+ expires = min_t(u64, expires, next_tick);
+ tick.tv64 = expires;
- /*
- * nohz_stop_sched_tick can be called several times before
- * the nohz_restart_sched_tick is called. This happens when
- * interrupts arrive which do not cause a reschedule. In the
- * first call we save the current tick time, so we can restart
- * the scheduler tick in nohz_restart_sched_tick.
- */
- if (!ts->tick_stopped) {
- nohz_balance_enter_idle(cpu);
- calc_load_enter_idle();
+ /* Skip reprogram of event if its not changed */
+ if (ts->tick_stopped && (expires == dev->next_event.tv64))
+ goto out;
- ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
- ts->tick_stopped = 1;
- trace_tick_stop(1, " ");
- }
+ /*
+ * nohz_stop_sched_tick can be called several times before
+ * the nohz_restart_sched_tick is called. This happens when
+ * interrupts arrive which do not cause a reschedule. In the
+ * first call we save the current tick time, so we can restart
+ * the scheduler tick in nohz_restart_sched_tick.
+ */
+ if (!ts->tick_stopped) {
+ nohz_balance_enter_idle(cpu);
+ calc_load_enter_idle();
- /*
- * If the expiration time == KTIME_MAX, then
- * in this case we simply stop the tick timer.
- */
- if (unlikely(expires.tv64 == KTIME_MAX)) {
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
- hrtimer_cancel(&ts->sched_timer);
- goto out;
- }
+ ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
+ ts->tick_stopped = 1;
+ trace_tick_stop(1, " ");
+ }
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
- hrtimer_start(&ts->sched_timer, expires,
- HRTIMER_MODE_ABS_PINNED);
- /* Check, if the timer was already in the past */
- if (hrtimer_active(&ts->sched_timer))
- goto out;
- } else if (!tick_program_event(expires, 0))
- goto out;
- /*
- * We are past the event already. So we crossed a
- * jiffie boundary. Update jiffies and raise the
- * softirq.
- */
- tick_do_update_jiffies64(ktime_get());
+ /*
+ * If the expiration time == KTIME_MAX, then we simply stop
+ * the tick timer.
+ */
+ if (unlikely(expires == KTIME_MAX)) {
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ hrtimer_cancel(&ts->sched_timer);
+ goto out;
}
- raise_softirq_irqoff(TIMER_SOFTIRQ);
+
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+ else
+ tick_program_event(tick, 1);
out:
- ts->next_jiffies = next_jiffies;
- ts->last_jiffies = last_jiffies;
+ /* Update the estimated sleep length */
ts->sleep_length = ktime_sub(dev->next_event, now);
-
- return ret;
+ return tick;
}
static void tick_nohz_full_stop_tick(struct tick_sched *ts)
@@ -876,32 +864,6 @@ ktime_t tick_nohz_get_sleep_length(void)
return ts->sleep_length;
}
-static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
-{
- hrtimer_cancel(&ts->sched_timer);
- hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
-
- while (1) {
- /* Forward the time to expire in the future */
- hrtimer_forward(&ts->sched_timer, now, tick_period);
-
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
- hrtimer_start_expires(&ts->sched_timer,
- HRTIMER_MODE_ABS_PINNED);
- /* Check, if the timer was already in the past */
- if (hrtimer_active(&ts->sched_timer))
- break;
- } else {
- if (!tick_program_event(
- hrtimer_get_expires(&ts->sched_timer), 0))
- break;
- }
- /* Reread time and update jiffies */
- now = ktime_get();
- tick_do_update_jiffies64(now);
- }
-}
-
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
@@ -972,12 +934,6 @@ void tick_nohz_idle_exit(void)
local_irq_enable();
}
-static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
-{
- hrtimer_forward(&ts->sched_timer, now, tick_period);
- return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
-}
-
/*
* The nohz low res interrupt handler
*/
@@ -996,10 +952,18 @@ static void tick_nohz_handler(struct clock_event_device *dev)
if (unlikely(ts->tick_stopped))
return;
- while (tick_nohz_reprogram(ts, now)) {
- now = ktime_get();
- tick_do_update_jiffies64(now);
- }
+ hrtimer_forward(&ts->sched_timer, now, tick_period);
+ tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+}
+
+static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
+{
+ if (!tick_nohz_enabled)
+ return;
+ ts->nohz_mode = mode;
+ /* One update is enough */
+ if (!test_and_set_bit(0, &tick_nohz_active))
+ timers_update_migration(true);
}
/**
@@ -1013,13 +977,8 @@ static void tick_nohz_switch_to_nohz(void)
if (!tick_nohz_enabled)
return;
- local_irq_disable();
- if (tick_switch_to_oneshot(tick_nohz_handler)) {
- local_irq_enable();
+ if (tick_switch_to_oneshot(tick_nohz_handler))
return;
- }
- tick_nohz_active = 1;
- ts->nohz_mode = NOHZ_MODE_LOWRES;
/*
* Recycle the hrtimer in ts, so we can share the
@@ -1029,13 +988,10 @@ static void tick_nohz_switch_to_nohz(void)
/* Get the next period */
next = tick_init_jiffy_update();
- for (;;) {
- hrtimer_set_expires(&ts->sched_timer, next);
- if (!tick_program_event(next, 0))
- break;
- next = ktime_add(next, tick_period);
- }
- local_irq_enable();
+ hrtimer_forward_now(&ts->sched_timer, tick_period);
+ hrtimer_set_expires(&ts->sched_timer, next);
+ tick_program_event(next, 1);
+ tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
}
/*
@@ -1087,6 +1043,7 @@ static inline void tick_nohz_irq_enter(void)
static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
+static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
#endif /* CONFIG_NO_HZ_COMMON */
@@ -1167,22 +1124,9 @@ void tick_setup_sched_timer(void)
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
- for (;;) {
- hrtimer_forward(&ts->sched_timer, now, tick_period);
- hrtimer_start_expires(&ts->sched_timer,
- HRTIMER_MODE_ABS_PINNED);
- /* Check, if the timer was already in the past */
- if (hrtimer_active(&ts->sched_timer))
- break;
- now = ktime_get();
- }
-
-#ifdef CONFIG_NO_HZ_COMMON
- if (tick_nohz_enabled) {
- ts->nohz_mode = NOHZ_MODE_HIGHRES;
- tick_nohz_active = 1;
- }
-#endif
+ hrtimer_forward(&ts->sched_timer, now, tick_period);
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+ tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
}
#endif /* HIGH_RES_TIMERS */
@@ -1227,7 +1171,7 @@ void tick_oneshot_notify(void)
* Called cyclic from the hrtimer softirq (driven by the timer
* softirq) allow_nohz signals, that we can switch into low-res nohz
* mode, because high resolution timers are disabled (either compile
- * or runtime).
+ * or runtime). Called with interrupts disabled.
*/
int tick_check_oneshot_change(int allow_nohz)
{
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 28b5da3e1a17..42fdf4958bcc 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -57,7 +57,7 @@ struct tick_sched {
ktime_t iowait_sleeptime;
ktime_t sleep_length;
unsigned long last_jiffies;
- unsigned long next_jiffies;
+ u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
};
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 2c85b7724af4..85d5bb1d67eb 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -41,7 +41,7 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
-#include "timeconst.h"
+#include <generated/timeconst.h>
#include "timekeeping.h"
/*
@@ -173,6 +173,10 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
return error;
if (tz) {
+ /* Verify we're witin the +-15 hrs range */
+ if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60)
+ return -EINVAL;
+
sys_tz = *tz;
update_vsyscall_tz();
if (firsttime) {
@@ -483,9 +487,11 @@ struct timespec64 ns_to_timespec64(const s64 nsec)
}
EXPORT_SYMBOL(ns_to_timespec64);
#endif
-/*
- * When we convert to jiffies then we interpret incoming values
- * the following way:
+/**
+ * msecs_to_jiffies: - convert milliseconds to jiffies
+ * @m: time in milliseconds
+ *
+ * conversion is done as follows:
*
* - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
*
@@ -493,66 +499,36 @@ EXPORT_SYMBOL(ns_to_timespec64);
* MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
*
* - all other values are converted to jiffies by either multiplying
- * the input value by a factor or dividing it with a factor
- *
- * We must also be careful about 32-bit overflows.
+ * the input value by a factor or dividing it with a factor and
+ * handling any 32-bit overflows.
+ * for the details see __msecs_to_jiffies()
+ *
+ * msecs_to_jiffies() checks for the passed in value being a constant
+ * via __builtin_constant_p() allowing gcc to eliminate most of the
+ * code, __msecs_to_jiffies() is called if the value passed does not
+ * allow constant folding and the actual conversion must be done at
+ * runtime.
+ * the _msecs_to_jiffies helpers are the HZ dependent conversion
+ * routines found in include/linux/jiffies.h
*/
-unsigned long msecs_to_jiffies(const unsigned int m)
+unsigned long __msecs_to_jiffies(const unsigned int m)
{
/*
* Negative value, means infinite timeout:
*/
if ((int)m < 0)
return MAX_JIFFY_OFFSET;
-
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
- /*
- * HZ is equal to or smaller than 1000, and 1000 is a nice
- * round multiple of HZ, divide with the factor between them,
- * but round upwards:
- */
- return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
- /*
- * HZ is larger than 1000, and HZ is a nice round multiple of
- * 1000 - simply multiply with the factor between them.
- *
- * But first make sure the multiplication result cannot
- * overflow:
- */
- if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
- return MAX_JIFFY_OFFSET;
-
- return m * (HZ / MSEC_PER_SEC);
-#else
- /*
- * Generic case - multiply, round and divide. But first
- * check that if we are doing a net multiplication, that
- * we wouldn't overflow:
- */
- if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
- return MAX_JIFFY_OFFSET;
-
- return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
- >> MSEC_TO_HZ_SHR32;
-#endif
+ return _msecs_to_jiffies(m);
}
-EXPORT_SYMBOL(msecs_to_jiffies);
+EXPORT_SYMBOL(__msecs_to_jiffies);
-unsigned long usecs_to_jiffies(const unsigned int u)
+unsigned long __usecs_to_jiffies(const unsigned int u)
{
if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
return MAX_JIFFY_OFFSET;
-#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
- return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
-#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
- return u * (HZ / USEC_PER_SEC);
-#else
- return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
- >> USEC_TO_HZ_SHR32;
-#endif
+ return _usecs_to_jiffies(u);
}
-EXPORT_SYMBOL(usecs_to_jiffies);
+EXPORT_SYMBOL(__usecs_to_jiffies);
/*
* The TICK_NSEC - 1 rounds up the value to the next resolution. Note
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index 511bdf2cafda..c7388dee8635 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -50,7 +50,7 @@ define timeconst(hz) {
print "#include <linux/types.h>\n\n"
print "#if HZ != ", hz, "\n"
- print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n"
+ print "#error \qinclude/generated/timeconst.h has the wrong HZ value!\q\n"
print "#endif\n\n"
if (hz < 2) {
@@ -105,4 +105,5 @@ define timeconst(hz) {
halt
}
+hz = read();
timeconst(hz)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 946acb72179f..30b7a409bf1e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,18 +118,6 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
-/*
- * These simple flag variables are managed
- * without locks, which is racy, but ok since
- * we don't really care about being super
- * precise about how many events were seen,
- * just that a problem was observed.
- */
-static int timekeeping_underflow_seen;
-static int timekeeping_overflow_seen;
-
-/* last_warning is only modified under the timekeeping lock */
-static long timekeeping_last_warning;
static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
{
@@ -149,29 +137,30 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
}
}
- if (timekeeping_underflow_seen) {
- if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+ if (tk->underflow_seen) {
+ if (jiffies - tk->last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
- timekeeping_last_warning = jiffies;
+ tk->last_warning = jiffies;
}
- timekeeping_underflow_seen = 0;
+ tk->underflow_seen = 0;
}
- if (timekeeping_overflow_seen) {
- if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+ if (tk->overflow_seen) {
+ if (jiffies - tk->last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
- timekeeping_last_warning = jiffies;
+ tk->last_warning = jiffies;
}
- timekeeping_overflow_seen = 0;
+ tk->overflow_seen = 0;
}
}
static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
{
+ struct timekeeper *tk = &tk_core.timekeeper;
cycle_t now, last, mask, max, delta;
unsigned int seq;
@@ -197,13 +186,13 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
* mask-relative negative values.
*/
if (unlikely((~delta & mask) < (mask >> 3))) {
- timekeeping_underflow_seen = 1;
+ tk->underflow_seen = 1;
delta = 0;
}
/* Cap delta value to the max_cycles values to avoid mult overflows */
if (unlikely(delta > max)) {
- timekeeping_overflow_seen = 1;
+ tk->overflow_seen = 1;
delta = tkr->clock->max_cycles;
}
@@ -551,6 +540,17 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
/*
+ * tk_update_leap_state - helper to update the next_leap_ktime
+ */
+static inline void tk_update_leap_state(struct timekeeper *tk)
+{
+ tk->next_leap_ktime = ntp_get_next_leap();
+ if (tk->next_leap_ktime.tv64 != KTIME_MAX)
+ /* Convert to monotonic time */
+ tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
+}
+
+/*
* Update the ktime_t based scalar nsec members of the timekeeper
*/
static inline void tk_update_ktime_data(struct timekeeper *tk)
@@ -591,17 +591,25 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
ntp_clear();
}
+ tk_update_leap_state(tk);
tk_update_ktime_data(tk);
update_vsyscall(tk);
update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
+ update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+ update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
+
+ if (action & TK_CLOCK_WAS_SET)
+ tk->clock_was_set_seq++;
+ /*
+ * The mirroring of the data to the shadow-timekeeper needs
+ * to happen last here to ensure we don't over-write the
+ * timekeeper structure on the next update with stale data
+ */
if (action & TK_MIRROR)
memcpy(&shadow_timekeeper, &tk_core.timekeeper,
sizeof(tk_core.timekeeper));
-
- update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
- update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
}
/**
@@ -699,6 +707,23 @@ ktime_t ktime_get(void)
}
EXPORT_SYMBOL_GPL(ktime_get);
+u32 ktime_get_resolution_ns(void)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+ unsigned int seq;
+ u32 nsecs;
+
+ WARN_ON(timekeeping_suspended);
+
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+ nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ return nsecs;
+}
+EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
+
static ktime_t *offsets[TK_OFFS_MAX] = {
[TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
[TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
@@ -1179,28 +1204,20 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
}
/**
- * read_boot_clock - Return time of the system start.
+ * read_boot_clock64 - Return time of the system start.
*
* Weak dummy function for arches that do not yet support it.
* Function to read the exact time the system has been started.
- * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
+ * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported.
*
* XXX - Do be sure to remove it once all arches implement it.
*/
-void __weak read_boot_clock(struct timespec *ts)
+void __weak read_boot_clock64(struct timespec64 *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
}
-void __weak read_boot_clock64(struct timespec64 *ts64)
-{
- struct timespec ts;
-
- read_boot_clock(&ts);
- *ts64 = timespec_to_timespec64(ts);
-}
-
/* Flag for if timekeeping_resume() has injected sleeptime */
static bool sleeptime_injected;
@@ -1836,8 +1853,9 @@ void update_wall_time(void)
* memcpy under the tk_core.seq against one before we start
* updating.
*/
+ timekeeping_update(tk, clock_set);
memcpy(real_tk, tk, sizeof(*tk));
- timekeeping_update(real_tk, clock_set);
+ /* The memcpy must come last. Do not put anything here! */
write_seqcount_end(&tk_core.seq);
out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1926,47 +1944,20 @@ void do_timer(unsigned long ticks)
}
/**
- * ktime_get_update_offsets_tick - hrtimer helper
- * @offs_real: pointer to storage for monotonic -> realtime offset
- * @offs_boot: pointer to storage for monotonic -> boottime offset
- * @offs_tai: pointer to storage for monotonic -> clock tai offset
- *
- * Returns monotonic time at last tick and various offsets
- */
-ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
- ktime_t *offs_tai)
-{
- struct timekeeper *tk = &tk_core.timekeeper;
- unsigned int seq;
- ktime_t base;
- u64 nsecs;
-
- do {
- seq = read_seqcount_begin(&tk_core.seq);
-
- base = tk->tkr_mono.base;
- nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
-
- *offs_real = tk->offs_real;
- *offs_boot = tk->offs_boot;
- *offs_tai = tk->offs_tai;
- } while (read_seqcount_retry(&tk_core.seq, seq));
-
- return ktime_add_ns(base, nsecs);
-}
-
-#ifdef CONFIG_HIGH_RES_TIMERS
-/**
* ktime_get_update_offsets_now - hrtimer helper
+ * @cwsseq: pointer to check and store the clock was set sequence number
* @offs_real: pointer to storage for monotonic -> realtime offset
* @offs_boot: pointer to storage for monotonic -> boottime offset
* @offs_tai: pointer to storage for monotonic -> clock tai offset
*
- * Returns current monotonic time and updates the offsets
+ * Returns current monotonic time and updates the offsets if the
+ * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
+ * different.
+ *
* Called from hrtimer_interrupt() or retrigger_next_event()
*/
-ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
- ktime_t *offs_tai)
+ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
+ ktime_t *offs_boot, ktime_t *offs_tai)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
@@ -1978,15 +1969,23 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ base = ktime_add_ns(base, nsecs);
+
+ if (*cwsseq != tk->clock_was_set_seq) {
+ *cwsseq = tk->clock_was_set_seq;
+ *offs_real = tk->offs_real;
+ *offs_boot = tk->offs_boot;
+ *offs_tai = tk->offs_tai;
+ }
+
+ /* Handle leapsecond insertion adjustments */
+ if (unlikely(base.tv64 >= tk->next_leap_ktime.tv64))
+ *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
- *offs_real = tk->offs_real;
- *offs_boot = tk->offs_boot;
- *offs_tai = tk->offs_tai;
} while (read_seqcount_retry(&tk_core.seq, seq));
- return ktime_add_ns(base, nsecs);
+ return base;
}
-#endif
/**
* do_adjtimex() - Accessor function to NTP __do_adjtimex function
@@ -2027,6 +2026,8 @@ int do_adjtimex(struct timex *txc)
__timekeeping_set_tai_offset(tk, tai);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
}
+ tk_update_leap_state(tk);
+
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index ead8794b9a4e..704f595ce83f 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -3,19 +3,16 @@
/*
* Internal interfaces for kernel/time/
*/
-extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real,
- ktime_t *offs_boot,
- ktime_t *offs_tai);
-extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real,
- ktime_t *offs_boot,
- ktime_t *offs_tai);
+extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
+ ktime_t *offs_real,
+ ktime_t *offs_boot,
+ ktime_t *offs_tai);
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
extern int timekeeping_inject_offset(struct timespec *ts);
extern s32 timekeeping_get_tai_offset(void);
extern void timekeeping_set_tai_offset(s32 tai_offset);
-extern void timekeeping_clocktai(struct timespec *ts);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2ece3aa5069c..520499dd85af 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -49,6 +49,8 @@
#include <asm/timex.h>
#include <asm/io.h>
+#include "tick-internal.h"
+
#define CREATE_TRACE_POINTS
#include <trace/events/timer.h>
@@ -68,11 +70,11 @@ EXPORT_SYMBOL(jiffies_64);
#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
struct tvec {
- struct list_head vec[TVN_SIZE];
+ struct hlist_head vec[TVN_SIZE];
};
struct tvec_root {
- struct list_head vec[TVR_SIZE];
+ struct hlist_head vec[TVR_SIZE];
};
struct tvec_base {
@@ -83,6 +85,8 @@ struct tvec_base {
unsigned long active_timers;
unsigned long all_timers;
int cpu;
+ bool migration_enabled;
+ bool nohz_active;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
@@ -90,43 +94,60 @@ struct tvec_base {
struct tvec tv5;
} ____cacheline_aligned;
-/*
- * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
- * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
- * pointer to per-cpu entries because we don't know where we'll map the section,
- * even for the boot cpu.
- *
- * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
- * rest of them.
- */
-struct tvec_base boot_tvec_bases;
-EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+unsigned int sysctl_timer_migration = 1;
-/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
+void timers_update_migration(bool update_nohz)
{
- return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
+ bool on = sysctl_timer_migration && tick_nohz_active;
+ unsigned int cpu;
+
+ /* Avoid the loop, if nothing to update */
+ if (this_cpu_read(tvec_bases.migration_enabled) == on)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu(tvec_bases.migration_enabled, cpu) = on;
+ per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
+ if (!update_nohz)
+ continue;
+ per_cpu(tvec_bases.nohz_active, cpu) = true;
+ per_cpu(hrtimer_bases.nohz_active, cpu) = true;
+ }
}
-static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
+int timer_migration_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
{
- return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
+ static DEFINE_MUTEX(mutex);
+ int ret;
+
+ mutex_lock(&mutex);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (!ret && write)
+ timers_update_migration(false);
+ mutex_unlock(&mutex);
+ return ret;
}
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+static inline struct tvec_base *get_target_base(struct tvec_base *base,
+ int pinned)
{
- return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
+ if (pinned || !base->migration_enabled)
+ return this_cpu_ptr(&tvec_bases);
+ return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
}
-
-static inline void
-timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
+#else
+static inline struct tvec_base *get_target_base(struct tvec_base *base,
+ int pinned)
{
- unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
-
- timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
+ return this_cpu_ptr(&tvec_bases);
}
+#endif
static unsigned long round_jiffies_common(unsigned long j, int cpu,
bool force_up)
@@ -349,26 +370,12 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
}
EXPORT_SYMBOL_GPL(set_timer_slack);
-/*
- * If the list is empty, catch up ->timer_jiffies to the current time.
- * The caller must hold the tvec_base lock. Returns true if the list
- * was empty and therefore ->timer_jiffies was updated.
- */
-static bool catchup_timer_jiffies(struct tvec_base *base)
-{
- if (!base->all_timers) {
- base->timer_jiffies = jiffies;
- return true;
- }
- return false;
-}
-
static void
__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
- struct list_head *vec;
+ struct hlist_head *vec;
if (idx < TVR_SIZE) {
int i = expires & TVR_MASK;
@@ -401,25 +408,25 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
vec = base->tv5.vec + i;
}
- /*
- * Timers are FIFO:
- */
- list_add_tail(&timer->entry, vec);
+
+ hlist_add_head(&timer->entry, vec);
}
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
- (void)catchup_timer_jiffies(base);
+ /* Advance base->jiffies, if the base is empty */
+ if (!base->all_timers++)
+ base->timer_jiffies = jiffies;
+
__internal_add_timer(base, timer);
/*
* Update base->active_timers and base->next_timer
*/
- if (!tbase_get_deferrable(timer->base)) {
+ if (!(timer->flags & TIMER_DEFERRABLE)) {
if (!base->active_timers++ ||
time_before(timer->expires, base->next_timer))
base->next_timer = timer->expires;
}
- base->all_timers++;
/*
* Check whether the other CPU is in dynticks mode and needs
@@ -434,8 +441,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
* require special care against races with idle_cpu(), lets deal
* with that later.
*/
- if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu))
- wake_up_nohz_cpu(base->cpu);
+ if (base->nohz_active) {
+ if (!(timer->flags & TIMER_DEFERRABLE) ||
+ tick_nohz_full_cpu(base->cpu))
+ wake_up_nohz_cpu(base->cpu);
+ }
}
#ifdef CONFIG_TIMER_STATS
@@ -451,15 +461,12 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
static void timer_stats_account_timer(struct timer_list *timer)
{
- unsigned int flag = 0;
-
if (likely(!timer->start_site))
return;
- if (unlikely(tbase_get_deferrable(timer->base)))
- flag |= TIMER_STATS_FLAG_DEFERRABLE;
timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
- timer->function, timer->start_comm, flag);
+ timer->function, timer->start_comm,
+ timer->flags);
}
#else
@@ -516,8 +523,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
* statically initialized. We just make sure that it
* is tracked in the object tracker.
*/
- if (timer->entry.next == NULL &&
- timer->entry.prev == TIMER_ENTRY_STATIC) {
+ if (timer->entry.pprev == NULL &&
+ timer->entry.next == TIMER_ENTRY_STATIC) {
debug_object_init(timer, &timer_debug_descr);
debug_object_activate(timer, &timer_debug_descr);
return 0;
@@ -563,7 +570,7 @@ static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- if (timer->entry.prev == TIMER_ENTRY_STATIC) {
+ if (timer->entry.next == TIMER_ENTRY_STATIC) {
/*
* This is not really a fixup. The timer was
* statically initialized. We just make sure that it
@@ -648,7 +655,7 @@ static inline void
debug_activate(struct timer_list *timer, unsigned long expires)
{
debug_timer_activate(timer);
- trace_timer_start(timer, expires);
+ trace_timer_start(timer, expires, timer->flags);
}
static inline void debug_deactivate(struct timer_list *timer)
@@ -665,10 +672,8 @@ static inline void debug_assert_init(struct timer_list *timer)
static void do_init_timer(struct timer_list *timer, unsigned int flags,
const char *name, struct lock_class_key *key)
{
- struct tvec_base *base = raw_cpu_read(tvec_bases);
-
- timer->entry.next = NULL;
- timer->base = (void *)((unsigned long)base | flags);
+ timer->entry.pprev = NULL;
+ timer->flags = flags | raw_smp_processor_id();
timer->slack = -1;
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
@@ -699,24 +704,23 @@ EXPORT_SYMBOL(init_timer_key);
static inline void detach_timer(struct timer_list *timer, bool clear_pending)
{
- struct list_head *entry = &timer->entry;
+ struct hlist_node *entry = &timer->entry;
debug_deactivate(timer);
- __list_del(entry->prev, entry->next);
+ __hlist_del(entry);
if (clear_pending)
- entry->next = NULL;
- entry->prev = LIST_POISON2;
+ entry->pprev = NULL;
+ entry->next = LIST_POISON2;
}
static inline void
detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
{
detach_timer(timer, true);
- if (!tbase_get_deferrable(timer->base))
+ if (!(timer->flags & TIMER_DEFERRABLE))
base->active_timers--;
base->all_timers--;
- (void)catchup_timer_jiffies(base);
}
static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@@ -726,13 +730,14 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
return 0;
detach_timer(timer, clear_pending);
- if (!tbase_get_deferrable(timer->base)) {
+ if (!(timer->flags & TIMER_DEFERRABLE)) {
base->active_timers--;
if (timer->expires == base->next_timer)
base->next_timer = base->timer_jiffies;
}
- base->all_timers--;
- (void)catchup_timer_jiffies(base);
+ /* If this was the last timer, advance base->jiffies */
+ if (!--base->all_timers)
+ base->timer_jiffies = jiffies;
return 1;
}
@@ -744,24 +749,22 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
* So __run_timers/migrate_timers can safely modify all timers which could
* be found on ->tvX lists.
*
- * When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
+ * When the timer's base is locked and removed from the list, the
+ * TIMER_MIGRATING flag is set, FIXME
*/
static struct tvec_base *lock_timer_base(struct timer_list *timer,
unsigned long *flags)
__acquires(timer->base->lock)
{
- struct tvec_base *base;
-
for (;;) {
- struct tvec_base *prelock_base = timer->base;
- base = tbase_get_base(prelock_base);
- if (likely(base != NULL)) {
+ u32 tf = timer->flags;
+ struct tvec_base *base;
+
+ if (!(tf & TIMER_MIGRATING)) {
+ base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
spin_lock_irqsave(&base->lock, *flags);
- if (likely(prelock_base == timer->base))
+ if (timer->flags == tf)
return base;
- /* The timer has migrated to another CPU */
spin_unlock_irqrestore(&base->lock, *flags);
}
cpu_relax();
@@ -770,11 +773,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
static inline int
__mod_timer(struct timer_list *timer, unsigned long expires,
- bool pending_only, int pinned)
+ bool pending_only, int pinned)
{
struct tvec_base *base, *new_base;
unsigned long flags;
- int ret = 0 , cpu;
+ int ret = 0;
timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
@@ -787,8 +790,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- cpu = get_nohz_timer_target(pinned);
- new_base = per_cpu(tvec_bases, cpu);
+ new_base = get_target_base(base, pinned);
if (base != new_base) {
/*
@@ -800,11 +802,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
*/
if (likely(base->running_timer != timer)) {
/* See the comment in lock_timer_base() */
- timer_set_base(timer, NULL);
+ timer->flags |= TIMER_MIGRATING;
+
spin_unlock(&base->lock);
base = new_base;
spin_lock(&base->lock);
- timer_set_base(timer, base);
+ timer->flags &= ~TIMER_BASEMASK;
+ timer->flags |= base->cpu;
}
}
@@ -966,13 +970,13 @@ EXPORT_SYMBOL(add_timer);
*/
void add_timer_on(struct timer_list *timer, int cpu)
{
- struct tvec_base *base = per_cpu(tvec_bases, cpu);
+ struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
unsigned long flags;
timer_stats_timer_set_start_info(timer);
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
- timer_set_base(timer, base);
+ timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
@@ -1037,8 +1041,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
EXPORT_SYMBOL(try_to_del_timer_sync);
#ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
-
/**
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -1093,7 +1095,7 @@ int del_timer_sync(struct timer_list *timer)
* don't use it in hardirq context, because it
* could lead to deadlock.
*/
- WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
+ WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
for (;;) {
int ret = try_to_del_timer_sync(timer);
if (ret >= 0)
@@ -1107,17 +1109,17 @@ EXPORT_SYMBOL(del_timer_sync);
static int cascade(struct tvec_base *base, struct tvec *tv, int index)
{
/* cascade all the timers from tv up one level */
- struct timer_list *timer, *tmp;
- struct list_head tv_list;
+ struct timer_list *timer;
+ struct hlist_node *tmp;
+ struct hlist_head tv_list;
- list_replace_init(tv->vec + index, &tv_list);
+ hlist_move_list(tv->vec + index, &tv_list);
/*
* We are removing _all_ timers from the list, so we
* don't have to detach them individually.
*/
- list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
- BUG_ON(tbase_get_base(timer->base) != base);
+ hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
/* No accounting, while moving them */
__internal_add_timer(base, timer);
}
@@ -1182,14 +1184,18 @@ static inline void __run_timers(struct tvec_base *base)
struct timer_list *timer;
spin_lock_irq(&base->lock);
- if (catchup_timer_jiffies(base)) {
- spin_unlock_irq(&base->lock);
- return;
- }
+
while (time_after_eq(jiffies, base->timer_jiffies)) {
- struct list_head work_list;
- struct list_head *head = &work_list;
- int index = base->timer_jiffies & TVR_MASK;
+ struct hlist_head work_list;
+ struct hlist_head *head = &work_list;
+ int index;
+
+ if (!base->all_timers) {
+ base->timer_jiffies = jiffies;
+ break;
+ }
+
+ index = base->timer_jiffies & TVR_MASK;
/*
* Cascade timers:
@@ -1200,16 +1206,16 @@ static inline void __run_timers(struct tvec_base *base)
!cascade(base, &base->tv4, INDEX(2)))
cascade(base, &base->tv5, INDEX(3));
++base->timer_jiffies;
- list_replace_init(base->tv1.vec + index, head);
- while (!list_empty(head)) {
+ hlist_move_list(base->tv1.vec + index, head);
+ while (!hlist_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
bool irqsafe;
- timer = list_first_entry(head, struct timer_list,entry);
+ timer = hlist_entry(head->first, struct timer_list, entry);
fn = timer->function;
data = timer->data;
- irqsafe = tbase_get_irqsafe(timer->base);
+ irqsafe = timer->flags & TIMER_IRQSAFE;
timer_stats_account_timer(timer);
@@ -1248,8 +1254,8 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
/* Look for timer events in tv1. */
index = slot = timer_jiffies & TVR_MASK;
do {
- list_for_each_entry(nte, base->tv1.vec + slot, entry) {
- if (tbase_get_deferrable(nte->base))
+ hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
+ if (nte->flags & TIMER_DEFERRABLE)
continue;
found = 1;
@@ -1279,8 +1285,8 @@ cascade:
index = slot = timer_jiffies & TVN_MASK;
do {
- list_for_each_entry(nte, varp->vec + slot, entry) {
- if (tbase_get_deferrable(nte->base))
+ hlist_for_each_entry(nte, varp->vec + slot, entry) {
+ if (nte->flags & TIMER_DEFERRABLE)
continue;
found = 1;
@@ -1311,54 +1317,48 @@ cascade:
* Check, if the next hrtimer event is before the next timer wheel
* event:
*/
-static unsigned long cmp_next_hrtimer_event(unsigned long now,
- unsigned long expires)
+static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
{
- ktime_t hr_delta = hrtimer_get_next_event();
- struct timespec tsdelta;
- unsigned long delta;
-
- if (hr_delta.tv64 == KTIME_MAX)
- return expires;
+ u64 nextevt = hrtimer_get_next_event();
/*
- * Expired timer available, let it expire in the next tick
+ * If high resolution timers are enabled
+ * hrtimer_get_next_event() returns KTIME_MAX.
*/
- if (hr_delta.tv64 <= 0)
- return now + 1;
-
- tsdelta = ktime_to_timespec(hr_delta);
- delta = timespec_to_jiffies(&tsdelta);
+ if (expires <= nextevt)
+ return expires;
/*
- * Limit the delta to the max value, which is checked in
- * tick_nohz_stop_sched_tick():
+ * If the next timer is already expired, return the tick base
+ * time so the tick is fired immediately.
*/
- if (delta > NEXT_TIMER_MAX_DELTA)
- delta = NEXT_TIMER_MAX_DELTA;
+ if (nextevt <= basem)
+ return basem;
/*
- * Take rounding errors in to account and make sure, that it
- * expires in the next tick. Otherwise we go into an endless
- * ping pong due to tick_nohz_stop_sched_tick() retriggering
- * the timer softirq
+ * Round up to the next jiffie. High resolution timers are
+ * off, so the hrtimers are expired in the tick and we need to
+ * make sure that this tick really expires the timer to avoid
+ * a ping pong of the nohz stop code.
+ *
+ * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
*/
- if (delta < 1)
- delta = 1;
- now += delta;
- if (time_before(now, expires))
- return now;
- return expires;
+ return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
}
/**
- * get_next_timer_interrupt - return the jiffy of the next pending timer
- * @now: current time (in jiffies)
+ * get_next_timer_interrupt - return the time (clock mono) of the next timer
+ * @basej: base time jiffies
+ * @basem: base time clock monotonic
+ *
+ * Returns the tick aligned clock monotonic time of the next pending
+ * timer or KTIME_MAX if no timer is pending.
*/
-unsigned long get_next_timer_interrupt(unsigned long now)
+u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
{
- struct tvec_base *base = __this_cpu_read(tvec_bases);
- unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
+ struct tvec_base *base = this_cpu_ptr(&tvec_bases);
+ u64 expires = KTIME_MAX;
+ unsigned long nextevt;
/*
* Pretend that there is no timer pending if the cpu is offline.
@@ -1371,14 +1371,15 @@ unsigned long get_next_timer_interrupt(unsigned long now)
if (base->active_timers) {
if (time_before_eq(base->next_timer, base->timer_jiffies))
base->next_timer = __next_timer_interrupt(base);
- expires = base->next_timer;
+ nextevt = base->next_timer;
+ if (time_before_eq(nextevt, basej))
+ expires = basem;
+ else
+ expires = basem + (nextevt - basej) * TICK_NSEC;
}
spin_unlock(&base->lock);
- if (time_before_eq(expires, now))
- return now;
-
- return cmp_next_hrtimer_event(now, expires);
+ return cmp_next_hrtimer_event(basem, expires);
}
#endif
@@ -1407,9 +1408,7 @@ void update_process_times(int user_tick)
*/
static void run_timer_softirq(struct softirq_action *h)
{
- struct tvec_base *base = __this_cpu_read(tvec_bases);
-
- hrtimer_run_pending();
+ struct tvec_base *base = this_cpu_ptr(&tvec_bases);
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
@@ -1545,15 +1544,16 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
{
struct timer_list *timer;
+ int cpu = new_base->cpu;
- while (!list_empty(head)) {
- timer = list_first_entry(head, struct timer_list, entry);
+ while (!hlist_empty(head)) {
+ timer = hlist_entry(head->first, struct timer_list, entry);
/* We ignore the accounting on the dying cpu */
detach_timer(timer, false);
- timer_set_base(timer, new_base);
+ timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
internal_add_timer(new_base, timer);
}
}
@@ -1565,8 +1565,8 @@ static void migrate_timers(int cpu)
int i;
BUG_ON(cpu_online(cpu));
- old_base = per_cpu(tvec_bases, cpu);
- new_base = get_cpu_var(tvec_bases);
+ old_base = per_cpu_ptr(&tvec_bases, cpu);
+ new_base = this_cpu_ptr(&tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
@@ -1590,7 +1590,6 @@ static void migrate_timers(int cpu)
spin_unlock(&old_base->lock);
spin_unlock_irq(&new_base->lock);
- put_cpu_var(tvec_bases);
}
static int timer_cpu_notify(struct notifier_block *self,
@@ -1616,52 +1615,27 @@ static inline void timer_register_cpu_notifier(void)
static inline void timer_register_cpu_notifier(void) { }
#endif /* CONFIG_HOTPLUG_CPU */
-static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+static void __init init_timer_cpu(int cpu)
{
- int j;
-
- BUG_ON(base != tbase_get_base(base));
+ struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
base->cpu = cpu;
- per_cpu(tvec_bases, cpu) = base;
spin_lock_init(&base->lock);
- for (j = 0; j < TVN_SIZE; j++) {
- INIT_LIST_HEAD(base->tv5.vec + j);
- INIT_LIST_HEAD(base->tv4.vec + j);
- INIT_LIST_HEAD(base->tv3.vec + j);
- INIT_LIST_HEAD(base->tv2.vec + j);
- }
- for (j = 0; j < TVR_SIZE; j++)
- INIT_LIST_HEAD(base->tv1.vec + j);
-
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
}
static void __init init_timer_cpus(void)
{
- struct tvec_base *base;
- int local_cpu = smp_processor_id();
int cpu;
- for_each_possible_cpu(cpu) {
- if (cpu == local_cpu)
- base = &boot_tvec_bases;
-#ifdef CONFIG_SMP
- else
- base = per_cpu_ptr(&__tvec_bases, cpu);
-#endif
-
- init_timer_cpu(base, cpu);
- }
+ for_each_possible_cpu(cpu)
+ init_timer_cpu(cpu);
}
void __init init_timers(void)
{
- /* ensure there are enough low bits for flags in timer->base pointer */
- BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
-
init_timer_cpus();
init_timer_stats();
timer_register_cpu_notifier();
@@ -1697,14 +1671,14 @@ unsigned long msleep_interruptible(unsigned int msecs)
EXPORT_SYMBOL(msleep_interruptible);
-static int __sched do_usleep_range(unsigned long min, unsigned long max)
+static void __sched do_usleep_range(unsigned long min, unsigned long max)
{
ktime_t kmin;
unsigned long delta;
kmin = ktime_set(0, min * NSEC_PER_USEC);
delta = (max - min) * NSEC_PER_USEC;
- return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+ schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
}
/**
@@ -1712,7 +1686,7 @@ static int __sched do_usleep_range(unsigned long min, unsigned long max)
* @min: Minimum time in usecs to sleep
* @max: Maximum time in usecs to sleep
*/
-void usleep_range(unsigned long min, unsigned long max)
+void __sched usleep_range(unsigned long min, unsigned long max)
{
__set_current_state(TASK_UNINTERRUPTIBLE);
do_usleep_range(min, max);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index e878c2e0ba45..a4536e1e3e2a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -29,19 +29,24 @@ struct timer_list_iter {
typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
-DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
-
/*
* This allows printing both to /proc/timer_list and
* to the console (on SysRq-Q):
*/
-#define SEQ_printf(m, x...) \
- do { \
- if (m) \
- seq_printf(m, x); \
- else \
- printk(x); \
- } while (0)
+__printf(2, 3)
+static void SEQ_printf(struct seq_file *m, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+
+ if (m)
+ seq_vprintf(m, fmt, args);
+ else
+ vprintk(fmt, args);
+
+ va_end(args);
+}
static void print_name_offset(struct seq_file *m, void *sym)
{
@@ -120,10 +125,10 @@ static void
print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
{
SEQ_printf(m, " .base: %pK\n", base);
- SEQ_printf(m, " .index: %d\n",
- base->index);
- SEQ_printf(m, " .resolution: %Lu nsecs\n",
- (unsigned long long)ktime_to_ns(base->resolution));
+ SEQ_printf(m, " .index: %d\n", base->index);
+
+ SEQ_printf(m, " .resolution: %u nsecs\n", (unsigned) hrtimer_resolution);
+
SEQ_printf(m, " .get_time: ");
print_name_offset(m, base->get_time);
SEQ_printf(m, "\n");
@@ -158,7 +163,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P(nr_events);
P(nr_retries);
P(nr_hangs);
- P_ns(max_hang_time);
+ P(max_hang_time);
#endif
#undef P
#undef P_ns
@@ -184,7 +189,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P_ns(idle_sleeptime);
P_ns(iowait_sleeptime);
P(last_jiffies);
- P(next_jiffies);
+ P(next_timer);
P_ns(idle_expires);
SEQ_printf(m, "jiffies: %Lu\n",
(unsigned long long)jiffies);
@@ -251,6 +256,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
SEQ_printf(m, "\n");
}
+ if (dev->set_state_oneshot_stopped) {
+ SEQ_printf(m, " oneshot stopped: ");
+ print_name_offset(m, dev->set_state_oneshot_stopped);
+ SEQ_printf(m, "\n");
+ }
+
if (dev->tick_resume) {
SEQ_printf(m, " resume: ");
print_name_offset(m, dev->tick_resume);
@@ -269,11 +280,11 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
{
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
- SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
- cpumask_bits(tick_get_broadcast_mask())[0]);
+ SEQ_printf(m, "tick_broadcast_mask: %*pb\n",
+ cpumask_pr_args(tick_get_broadcast_mask()));
#ifdef CONFIG_TICK_ONESHOT
- SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
- cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
+ SEQ_printf(m, "tick_broadcast_oneshot_mask: %*pb\n",
+ cpumask_pr_args(tick_get_broadcast_oneshot_mask()));
#endif
SEQ_printf(m, "\n");
#endif
@@ -282,7 +293,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
static inline void timer_list_header(struct seq_file *m, u64 now)
{
- SEQ_printf(m, "Timer List Version: v0.7\n");
+ SEQ_printf(m, "Timer List Version: v0.8\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 1fb08f21302e..1adecb4b87c8 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -68,7 +68,7 @@ struct entry {
* Number of timeout events:
*/
unsigned long count;
- unsigned int timer_flag;
+ u32 flags;
/*
* We save the command-line string to preserve
@@ -227,13 +227,13 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
* @startf: pointer to the function which did the timer setup
* @timerf: pointer to the timer callback function of the timer
* @comm: name of the process which set up the timer
+ * @tflags: The flags field of the timer
*
* When the timer is already registered, then the event counter is
* incremented. Otherwise the timer is registered in a free slot.
*/
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
- void *timerf, char *comm,
- unsigned int timer_flag)
+ void *timerf, char *comm, u32 tflags)
{
/*
* It doesn't matter which lock we take:
@@ -251,7 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
input.start_func = startf;
input.expire_func = timerf;
input.pid = pid;
- input.timer_flag = timer_flag;
+ input.flags = tflags;
raw_spin_lock_irqsave(lock, flags);
if (!timer_stats_active)
@@ -306,7 +306,7 @@ static int tstats_show(struct seq_file *m, void *v)
for (i = 0; i < nr_entries; i++) {
entry = entries + i;
- if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
+ if (entry->flags & TIMER_DEFERRABLE) {
seq_printf(m, "%4luD, %5d %-16s ",
entry->count, entry->pid, entry->comm);
} else {
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index a382e4a32609..782ae8ca2c06 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -36,7 +36,7 @@
* Adds the timer node to the timerqueue, sorted by the
* node's expires value.
*/
-void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
{
struct rb_node **p = &head->head.rb_node;
struct rb_node *parent = NULL;
@@ -56,8 +56,11 @@ void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
rb_link_node(&node->node, parent, p);
rb_insert_color(&node->node, &head->head);
- if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+ if (!head->next || node->expires.tv64 < head->next->expires.tv64) {
head->next = node;
+ return true;
+ }
+ return false;
}
EXPORT_SYMBOL_GPL(timerqueue_add);
@@ -69,7 +72,7 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
*
* Removes the timer node from the timerqueue.
*/
-void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
{
WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
@@ -82,6 +85,7 @@ void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
}
rb_erase(&node->node, &head->head);
RB_CLEAR_NODE(&node->node);
+ return head->next != NULL;
}
EXPORT_SYMBOL_GPL(timerqueue_del);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 508155b283dd..54817d365366 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2212,8 +2212,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
do {
set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
- if (!hrtimer_active(&t.timer))
- t.task = NULL;
if (likely(t.task))
schedule();
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1e1c89e51a11..73a123daa2cc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1885,13 +1885,10 @@ EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
- struct timespec ts;
-
- hrtimer_get_res(CLOCK_MONOTONIC, &ts);
seq_printf(seq, "%08x %08x %08x %08x\n",
(u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1000000,
- (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
+ (u32)NSEC_PER_SEC / hrtimer_resolution);
return 0;
}
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c
index 886be7da989d..f845ecf7e172 100644
--- a/sound/core/hrtimer.c
+++ b/sound/core/hrtimer.c
@@ -121,16 +121,9 @@ static struct snd_timer *mytimer;
static int __init snd_hrtimer_init(void)
{
struct snd_timer *timer;
- struct timespec tp;
int err;
- hrtimer_get_res(CLOCK_MONOTONIC, &tp);
- if (tp.tv_sec > 0 || !tp.tv_nsec) {
- pr_err("snd-hrtimer: Invalid resolution %u.%09u",
- (unsigned)tp.tv_sec, (unsigned)tp.tv_nsec);
- return -EINVAL;
- }
- resolution = tp.tv_nsec;
+ resolution = hrtimer_resolution;
/* Create a new timer and set up the fields */
err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER,
diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c
index d9647bd84d0f..27e25bb78c97 100644
--- a/sound/drivers/pcsp/pcsp.c
+++ b/sound/drivers/pcsp/pcsp.c
@@ -42,16 +42,13 @@ struct snd_pcsp pcsp_chip;
static int snd_pcsp_create(struct snd_card *card)
{
static struct snd_device_ops ops = { };
- struct timespec tp;
- int err;
- int div, min_div, order;
-
- hrtimer_get_res(CLOCK_MONOTONIC, &tp);
+ unsigned int resolution = hrtimer_resolution;
+ int err, div, min_div, order;
if (!nopcm) {
- if (tp.tv_sec || tp.tv_nsec > PCSP_MAX_PERIOD_NS) {
+ if (resolution > PCSP_MAX_PERIOD_NS) {
printk(KERN_ERR "PCSP: Timer resolution is not sufficient "
- "(%linS)\n", tp.tv_nsec);
+ "(%unS)\n", resolution);
printk(KERN_ERR "PCSP: Make sure you have HPET and ACPI "
"enabled.\n");
printk(KERN_ERR "PCSP: Turned into nopcm mode.\n");
@@ -59,13 +56,13 @@ static int snd_pcsp_create(struct snd_card *card)
}
}
- if (loops_per_jiffy >= PCSP_MIN_LPJ && tp.tv_nsec <= PCSP_MIN_PERIOD_NS)
+ if (loops_per_jiffy >= PCSP_MIN_LPJ && resolution <= PCSP_MIN_PERIOD_NS)
min_div = MIN_DIV;
else
min_div = MAX_DIV;
#if PCSP_DEBUG
- printk(KERN_DEBUG "PCSP: lpj=%li, min_div=%i, res=%li\n",
- loops_per_jiffy, min_div, tp.tv_nsec);
+ printk(KERN_DEBUG "PCSP: lpj=%li, min_div=%i, res=%u\n",
+ loops_per_jiffy, min_div, resolution);
#endif
div = MAX_DIV / min_div;
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index b8272e6c4b3b..fb46ad6ac92c 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -44,6 +44,7 @@
#include <time.h>
#include <sys/time.h>
#include <sys/timex.h>
+#include <sys/errno.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
@@ -63,6 +64,9 @@ static inline int ksft_exit_fail(void)
#define NSEC_PER_SEC 1000000000ULL
#define CLOCK_TAI 11
+time_t next_leap;
+int error_found;
+
/* returns 1 if a <= b, 0 otherwise */
static inline int in_order(struct timespec a, struct timespec b)
{
@@ -134,6 +138,35 @@ void handler(int unused)
exit(0);
}
+void sigalarm(int signo)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tx.time.tv_sec < next_leap) {
+ printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ if (ret != TIME_WAIT) {
+ printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+}
+
+
/* Test for known hrtimer failure */
void test_hrtimer_failure(void)
{
@@ -144,12 +177,19 @@ void test_hrtimer_failure(void)
clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
clock_gettime(CLOCK_REALTIME, &now);
- if (!in_order(target, now))
+ if (!in_order(target, now)) {
printf("ERROR: hrtimer early expiration failure observed.\n");
+ error_found = 1;
+ }
}
int main(int argc, char **argv)
{
+ timer_t tm1;
+ struct itimerspec its1;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
int settime = 0;
int tai_time = 0;
int insert = 1;
@@ -191,6 +231,12 @@ int main(int argc, char **argv)
signal(SIGINT, handler);
signal(SIGKILL, handler);
+ /* Set up timer signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
if (iterations < 0)
printf("This runs continuously. Press ctrl-c to stop\n");
else
@@ -201,7 +247,7 @@ int main(int argc, char **argv)
int ret;
struct timespec ts;
struct timex tx;
- time_t now, next_leap;
+ time_t now;
/* Get the current time */
clock_gettime(CLOCK_REALTIME, &ts);
@@ -251,10 +297,27 @@ int main(int argc, char **argv)
printf("Scheduling leap second for %s", ctime(&next_leap));
+ /* Set up timer */
+ printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap));
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+ if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
+ printf("Error: timer_create failed\n");
+ return ksft_exit_fail();
+ }
+ its1.it_value.tv_sec = next_leap;
+ its1.it_value.tv_nsec = 0;
+ its1.it_interval.tv_sec = 0;
+ its1.it_interval.tv_nsec = 0;
+ timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
+
/* Wake up 3 seconds before leap */
ts.tv_sec = next_leap - 3;
ts.tv_nsec = 0;
+
while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
printf("Something woke us up, returning to sleep\n");
@@ -276,6 +339,7 @@ int main(int argc, char **argv)
while (now < next_leap + 2) {
char buf[26];
struct timespec tai;
+ int ret;
tx.modes = 0;
ret = adjtimex(&tx);
@@ -308,8 +372,13 @@ int main(int argc, char **argv)
/* Note if kernel has known hrtimer failure */
test_hrtimer_failure();
- printf("Leap complete\n\n");
-
+ printf("Leap complete\n");
+ if (error_found) {
+ printf("Errors observed\n");
+ clear_time_state();
+ return ksft_exit_fail();
+ }
+ printf("\n");
if ((iterations != -1) && !(--iterations))
break;
}