summaryrefslogtreecommitdiff
path: root/drivers/cpufreq
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/Kconfig59
-rw-r--r--drivers/cpufreq/Makefile8
-rw-r--r--drivers/cpufreq/cpufreq-dt.c7
-rw-r--r--drivers/cpufreq/cpufreq.c100
-rw-r--r--drivers/cpufreq/cpufreq_governor_attr_set.c84
-rw-r--r--drivers/cpufreq/cpufreq_interactive.c1374
-rw-r--r--drivers/cpufreq/cpufreq_times.c464
7 files changed, 2094 insertions, 2 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 949610360b14..75ded53a7470 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -46,6 +46,15 @@ config CPU_FREQ_STAT_DETAILS
If in doubt, say N.
+config CPU_FREQ_TIMES
+ bool "CPU frequency time-in-state statistics"
+ default y
+ help
+ This driver exports CPU time-in-state information through procfs file
+ system.
+
+ If in doubt, say N.
+
choice
prompt "Default CPUFreq governor"
default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ
@@ -102,6 +111,25 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
Be aware that not all cpufreq drivers support the conservative
governor. If unsure have a look at the help section of the
driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+ bool "interactive"
+ select CPU_FREQ_GOV_INTERACTIVE
+ help
+ Use the CPUFreq governor 'interactive' as default. This allows
+ you to get a full dynamic cpu frequency capable system by simply
+ loading your cpufreq low-level hardware driver, using the
+ 'interactive' governor for latency-sensitive workloads.
+
+config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+ bool "schedutil"
+ depends on SMP
+ select CPU_FREQ_GOV_SCHEDUTIL
+ select CPU_FREQ_GOV_PERFORMANCE
+ help
+ Use the 'schedutil' CPUFreq governor by default. If unsure,
+ have a look at the help section of that governor. The fallback
+ governor will be 'performance'.
endchoice
config CPU_FREQ_GOV_PERFORMANCE
@@ -159,6 +187,20 @@ config CPU_FREQ_GOV_ONDEMAND
If in doubt, say N.
+config CPU_FREQ_GOV_INTERACTIVE
+ bool "'interactive' cpufreq policy governor"
+ help
+ 'interactive' - This driver adds a dynamic cpufreq policy governor
+ designed for latency-sensitive workloads.
+
+ This governor attempts to reduce the latency of clock
+ increases so that the system is more responsive to
+ interactive workloads.
+
+ For details, take a look at linux/Documentation/cpu-freq.
+
+ If in doubt, say N.
+
config CPU_FREQ_GOV_CONSERVATIVE
tristate "'conservative' cpufreq governor"
depends on CPU_FREQ
@@ -183,6 +225,23 @@ config CPU_FREQ_GOV_CONSERVATIVE
If in doubt, say N.
+config CPU_FREQ_GOV_SCHEDUTIL
+ bool "'schedutil' cpufreq policy governor"
+ depends on CPU_FREQ && SMP
+ select CPU_FREQ_GOV_ATTR_SET
+ select IRQ_WORK
+ help
+ This governor makes decisions based on the utilization data provided
+ by the scheduler. It sets the CPU frequency to be proportional to
+ the utilization/capacity ratio coming from the scheduler. If the
+ utilization is frequency-invariant, the new frequency is also
+ proportional to the maximum available frequency. If that is not the
+ case, it is proportional to the current frequency of the CPU. The
+ frequency tipping point is at utilization/capacity equal to 80% in
+ both cases.
+
+ If in doubt, say N.
+
comment "CPU frequency scaling drivers"
config CPUFREQ_DT
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index c0af1a1281c8..54070bf413bf 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -1,15 +1,19 @@
# CPUfreq core
-obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o
+obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o cpufreq_governor_attr_set.o
# CPUfreq stats
obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o
-# CPUfreq governors
+# CPUfreq times
+obj-$(CONFIG_CPU_FREQ_TIMES) += cpufreq_times.o
+
+# CPUfreq governors
obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o
obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
+obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o
obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index f951f911786e..a72ae98b4838 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -279,6 +279,13 @@ static int cpufreq_init(struct cpufreq_policy *policy)
policy->cpuinfo.transition_latency = transition_latency;
+ /*
+ * Android: set default parameters for parity between schedutil and
+ * schedfreq
+ */
+ policy->up_transition_delay_us = transition_latency / NSEC_PER_USEC;
+ policy->down_transition_delay_us = 50000; /* 50ms */
+
return 0;
out_free_cpufreq_table:
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 68b604ad8413..fc0d9f90ad8d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -19,6 +19,7 @@
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpufreq_times.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/init.h>
@@ -29,6 +30,9 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/tick.h>
+#ifdef CONFIG_SMP
+#include <linux/sched.h>
+#endif
#include <trace/events/power.h>
static LIST_HEAD(cpufreq_policy_list);
@@ -154,6 +158,12 @@ bool have_governor_per_policy(void)
}
EXPORT_SYMBOL_GPL(have_governor_per_policy);
+bool cpufreq_driver_is_slow(void)
+{
+ return !(cpufreq_driver->flags & CPUFREQ_DRIVER_FAST);
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_is_slow);
+
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
{
if (have_governor_per_policy())
@@ -347,6 +357,50 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
#endif
}
+/*********************************************************************
+ * FREQUENCY INVARIANT CPU CAPACITY *
+ *********************************************************************/
+
+static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void
+scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs)
+{
+ unsigned long cur = freqs ? freqs->new : policy->cur;
+ unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max;
+ struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo;
+ int cpu;
+
+ pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n",
+ cpumask_pr_args(policy->cpus), cur, policy->max, scale);
+
+ for_each_cpu(cpu, policy->cpus)
+ per_cpu(freq_scale, cpu) = scale;
+
+ if (freqs)
+ return;
+
+ scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq;
+
+ pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n",
+ cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq,
+ scale);
+
+ for_each_cpu(cpu, policy->cpus)
+ per_cpu(max_freq_scale, cpu) = scale;
+}
+
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(freq_scale, cpu);
+}
+
+unsigned long cpufreq_scale_max_freq_capacity(int cpu)
+{
+ return per_cpu(max_freq_scale, cpu);
+}
+
static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, unsigned int state)
{
@@ -384,6 +438,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
pr_debug("FREQ: %lu - CPU: %lu\n",
(unsigned long)freqs->new, (unsigned long)freqs->cpu);
trace_cpu_frequency(freqs->new, freqs->cpu);
+ cpufreq_times_record_transition(freqs);
srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
CPUFREQ_POSTCHANGE, freqs);
if (likely(policy) && likely(policy->cpu == freqs->cpu))
@@ -423,6 +478,9 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs)
{
+#ifdef CONFIG_SMP
+ int cpu;
+#endif
/*
* Catch double invocations of _begin() which lead to self-deadlock.
@@ -450,6 +508,12 @@ wait:
spin_unlock(&policy->transition_lock);
+ scale_freq_capacity(policy, freqs);
+#ifdef CONFIG_SMP
+ for_each_cpu(cpu, policy->cpus)
+ trace_cpu_capacity(capacity_curr_of(cpu), cpu);
+#endif
+
cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
}
EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
@@ -469,6 +533,38 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
}
EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end);
+/**
+ * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported
+ * one.
+ * @target_freq: target frequency to resolve.
+ *
+ * The target to driver frequency mapping is cached in the policy.
+ *
+ * Return: Lowest driver-supported frequency greater than or equal to the
+ * given target_freq, subject to policy (min/max) and driver limitations.
+ */
+unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ target_freq = clamp_val(target_freq, policy->min, policy->max);
+ policy->cached_target_freq = target_freq;
+
+ if (cpufreq_driver->target_index) {
+ int idx, rv;
+
+ rv = cpufreq_frequency_table_target(policy, policy->freq_table,
+ target_freq,
+ CPUFREQ_RELATION_L,
+ &idx);
+ if (rv)
+ return target_freq;
+ policy->cached_resolved_idx = idx;
+ return policy->freq_table[idx].frequency;
+ }
+
+ return target_freq;
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
/*********************************************************************
* SYSFS INTERFACE *
@@ -1250,6 +1346,7 @@ static int cpufreq_online(unsigned int cpu)
goto out_exit_policy;
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_CREATE_POLICY, policy);
+ cpufreq_times_create_policy(policy);
write_lock_irqsave(&cpufreq_driver_lock, flags);
list_add(&policy->policy_list, &cpufreq_policy_list);
@@ -2133,8 +2230,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_NOTIFY, new_policy);
+ scale_freq_capacity(new_policy, NULL);
+
policy->min = new_policy->min;
policy->max = new_policy->max;
+ trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu);
pr_debug("new min and max freqs are %u - %u kHz\n",
policy->min, policy->max);
diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
new file mode 100644
index 000000000000..52841f807a7e
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
@@ -0,0 +1,84 @@
+/*
+ * Abstract code for CPUFreq governor tunable sysfs attributes.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpufreq_governor.h"
+
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+ return container_of(kobj, struct gov_attr_set, kobj);
+}
+
+static inline struct governor_attr *to_gov_attr(struct attribute *attr)
+{
+ return container_of(attr, struct governor_attr, attr);
+}
+
+static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct governor_attr *gattr = to_gov_attr(attr);
+
+ return gattr->show(to_gov_attr_set(kobj), buf);
+}
+
+static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
+ struct governor_attr *gattr = to_gov_attr(attr);
+ int ret;
+
+ mutex_lock(&attr_set->update_lock);
+ ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY;
+ mutex_unlock(&attr_set->update_lock);
+ return ret;
+}
+
+const struct sysfs_ops governor_sysfs_ops = {
+ .show = governor_show,
+ .store = governor_store,
+};
+EXPORT_SYMBOL_GPL(governor_sysfs_ops);
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+ INIT_LIST_HEAD(&attr_set->policy_list);
+ mutex_init(&attr_set->update_lock);
+ attr_set->usage_count = 1;
+ list_add(list_node, &attr_set->policy_list);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_init);
+
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+ mutex_lock(&attr_set->update_lock);
+ attr_set->usage_count++;
+ list_add(list_node, &attr_set->policy_list);
+ mutex_unlock(&attr_set->update_lock);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_get);
+
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+ unsigned int count;
+
+ mutex_lock(&attr_set->update_lock);
+ list_del(list_node);
+ count = --attr_set->usage_count;
+ mutex_unlock(&attr_set->update_lock);
+ if (count)
+ return count;
+
+ kobject_put(&attr_set->kobj);
+ mutex_destroy(&attr_set->update_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_put);
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
new file mode 100644
index 000000000000..e0a586895136
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -0,0 +1,1374 @@
+/*
+ * drivers/cpufreq/cpufreq_interactive.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/tick.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpufreq_interactive.h>
+
+struct cpufreq_interactive_cpuinfo {
+ struct timer_list cpu_timer;
+ struct timer_list cpu_slack_timer;
+ spinlock_t load_lock; /* protects the next 4 fields */
+ u64 time_in_idle;
+ u64 time_in_idle_timestamp;
+ u64 cputime_speedadj;
+ u64 cputime_speedadj_timestamp;
+ struct cpufreq_policy *policy;
+ struct cpufreq_frequency_table *freq_table;
+ spinlock_t target_freq_lock; /*protects target freq */
+ unsigned int target_freq;
+ unsigned int floor_freq;
+ u64 pol_floor_val_time; /* policy floor_validate_time */
+ u64 loc_floor_val_time; /* per-cpu floor_validate_time */
+ u64 pol_hispeed_val_time; /* policy hispeed_validate_time */
+ u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */
+ struct rw_semaphore enable_sem;
+ int governor_enabled;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
+
+/* realtime thread handles frequency scaling */
+static struct task_struct *speedchange_task;
+static cpumask_t speedchange_cpumask;
+static spinlock_t speedchange_cpumask_lock;
+static struct mutex gov_lock;
+
+/* Target load. Lower values result in higher CPU speeds. */
+#define DEFAULT_TARGET_LOAD 90
+static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
+
+#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
+#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
+static unsigned int default_above_hispeed_delay[] = {
+ DEFAULT_ABOVE_HISPEED_DELAY };
+
+struct cpufreq_interactive_tunables {
+ int usage_count;
+ /* Hi speed to bump to from lo speed when load burst (default max) */
+ unsigned int hispeed_freq;
+ /* Go to hi speed when CPU load at or above this value. */
+#define DEFAULT_GO_HISPEED_LOAD 99
+ unsigned long go_hispeed_load;
+ /* Target load. Lower values result in higher CPU speeds. */
+ spinlock_t target_loads_lock;
+ unsigned int *target_loads;
+ int ntarget_loads;
+ /*
+ * The minimum amount of time to spend at a frequency before we can ramp
+ * down.
+ */
+#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
+ unsigned long min_sample_time;
+ /*
+ * The sample rate of the timer used to increase frequency
+ */
+ unsigned long timer_rate;
+ /*
+ * Wait this long before raising speed above hispeed, by default a
+ * single timer interval.
+ */
+ spinlock_t above_hispeed_delay_lock;
+ unsigned int *above_hispeed_delay;
+ int nabove_hispeed_delay;
+ /* Non-zero means indefinite speed boost active */
+ int boost_val;
+ /* Duration of a boot pulse in usecs */
+ int boostpulse_duration_val;
+ /* End time of boost pulse in ktime converted to usecs */
+ u64 boostpulse_endtime;
+ bool boosted;
+ /*
+ * Max additional time to wait in idle, beyond timer_rate, at speeds
+ * above minimum before wakeup to reduce speed, or -1 if unnecessary.
+ */
+#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
+ int timer_slack_val;
+ bool io_is_busy;
+};
+
+/* For cases where we have single governor instance for system */
+static struct cpufreq_interactive_tunables *common_tunables;
+
+static struct attribute_group *get_sysfs_attr(void);
+
+static void cpufreq_interactive_timer_resched(
+ struct cpufreq_interactive_cpuinfo *pcpu)
+{
+ struct cpufreq_interactive_tunables *tunables =
+ pcpu->policy->governor_data;
+ unsigned long expires;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcpu->load_lock, flags);
+ pcpu->time_in_idle =
+ get_cpu_idle_time(smp_processor_id(),
+ &pcpu->time_in_idle_timestamp,
+ tunables->io_is_busy);
+ pcpu->cputime_speedadj = 0;
+ pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
+ expires = jiffies + usecs_to_jiffies(tunables->timer_rate);
+ mod_timer_pinned(&pcpu->cpu_timer, expires);
+
+ if (tunables->timer_slack_val >= 0 &&
+ pcpu->target_freq > pcpu->policy->min) {
+ expires += usecs_to_jiffies(tunables->timer_slack_val);
+ mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
+ }
+
+ spin_unlock_irqrestore(&pcpu->load_lock, flags);
+}
+
+/* The caller shall take enable_sem write semaphore to avoid any timer race.
+ * The cpu_timer and cpu_slack_timer must be deactivated when calling this
+ * function.
+ */
+static void cpufreq_interactive_timer_start(
+ struct cpufreq_interactive_tunables *tunables, int cpu)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
+ unsigned long expires = jiffies +
+ usecs_to_jiffies(tunables->timer_rate);
+ unsigned long flags;
+
+ pcpu->cpu_timer.expires = expires;
+ add_timer_on(&pcpu->cpu_timer, cpu);
+ if (tunables->timer_slack_val >= 0 &&
+ pcpu->target_freq > pcpu->policy->min) {
+ expires += usecs_to_jiffies(tunables->timer_slack_val);
+ pcpu->cpu_slack_timer.expires = expires;
+ add_timer_on(&pcpu->cpu_slack_timer, cpu);
+ }
+
+ spin_lock_irqsave(&pcpu->load_lock, flags);
+ pcpu->time_in_idle =
+ get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp,
+ tunables->io_is_busy);
+ pcpu->cputime_speedadj = 0;
+ pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
+ spin_unlock_irqrestore(&pcpu->load_lock, flags);
+}
+
+static unsigned int freq_to_above_hispeed_delay(
+ struct cpufreq_interactive_tunables *tunables,
+ unsigned int freq)
+{
+ int i;
+ unsigned int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+
+ for (i = 0; i < tunables->nabove_hispeed_delay - 1 &&
+ freq >= tunables->above_hispeed_delay[i+1]; i += 2)
+ ;
+
+ ret = tunables->above_hispeed_delay[i];
+ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+ return ret;
+}
+
+static unsigned int freq_to_targetload(
+ struct cpufreq_interactive_tunables *tunables, unsigned int freq)
+{
+ int i;
+ unsigned int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tunables->target_loads_lock, flags);
+
+ for (i = 0; i < tunables->ntarget_loads - 1 &&
+ freq >= tunables->target_loads[i+1]; i += 2)
+ ;
+
+ ret = tunables->target_loads[i];
+ spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+ return ret;
+}
+
+/*
+ * If increasing frequencies never map to a lower target load then
+ * choose_freq() will find the minimum frequency that does not exceed its
+ * target load given the current load.
+ */
+static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu,
+ unsigned int loadadjfreq)
+{
+ unsigned int freq = pcpu->policy->cur;
+ unsigned int prevfreq, freqmin, freqmax;
+ unsigned int tl;
+ int index;
+
+ freqmin = 0;
+ freqmax = UINT_MAX;
+
+ do {
+ prevfreq = freq;
+ tl = freq_to_targetload(pcpu->policy->governor_data, freq);
+
+ /*
+ * Find the lowest frequency where the computed load is less
+ * than or equal to the target load.
+ */
+
+ if (cpufreq_frequency_table_target(
+ pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
+ CPUFREQ_RELATION_L, &index))
+ break;
+ freq = pcpu->freq_table[index].frequency;
+
+ if (freq > prevfreq) {
+ /* The previous frequency is too low. */
+ freqmin = prevfreq;
+
+ if (freq >= freqmax) {
+ /*
+ * Find the highest frequency that is less
+ * than freqmax.
+ */
+ if (cpufreq_frequency_table_target(
+ pcpu->policy, pcpu->freq_table,
+ freqmax - 1, CPUFREQ_RELATION_H,
+ &index))
+ break;
+ freq = pcpu->freq_table[index].frequency;
+
+ if (freq == freqmin) {
+ /*
+ * The first frequency below freqmax
+ * has already been found to be too
+ * low. freqmax is the lowest speed
+ * we found that is fast enough.
+ */
+ freq = freqmax;
+ break;
+ }
+ }
+ } else if (freq < prevfreq) {
+ /* The previous frequency is high enough. */
+ freqmax = prevfreq;
+
+ if (freq <= freqmin) {
+ /*
+ * Find the lowest frequency that is higher
+ * than freqmin.
+ */
+ if (cpufreq_frequency_table_target(
+ pcpu->policy, pcpu->freq_table,
+ freqmin + 1, CPUFREQ_RELATION_L,
+ &index))
+ break;
+ freq = pcpu->freq_table[index].frequency;
+
+ /*
+ * If freqmax is the first frequency above
+ * freqmin then we have already found that
+ * this speed is fast enough.
+ */
+ if (freq == freqmax)
+ break;
+ }
+ }
+
+ /* If same frequency chosen as previous then done. */
+ } while (freq != prevfreq);
+
+ return freq;
+}
+
+static u64 update_load(int cpu)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
+ struct cpufreq_interactive_tunables *tunables =
+ pcpu->policy->governor_data;
+ u64 now;
+ u64 now_idle;
+ u64 delta_idle;
+ u64 delta_time;
+ u64 active_time;
+
+ now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy);
+ delta_idle = (now_idle - pcpu->time_in_idle);
+ delta_time = (now - pcpu->time_in_idle_timestamp);
+
+ if (delta_time <= delta_idle)
+ active_time = 0;
+ else
+ active_time = delta_time - delta_idle;
+
+ pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
+
+ pcpu->time_in_idle = now_idle;
+ pcpu->time_in_idle_timestamp = now;
+ return now;
+}
+
+static void cpufreq_interactive_timer(unsigned long data)
+{
+ u64 now;
+ unsigned int delta_time;
+ u64 cputime_speedadj;
+ int cpu_load;
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, data);
+ struct cpufreq_interactive_tunables *tunables =
+ pcpu->policy->governor_data;
+ unsigned int new_freq;
+ unsigned int loadadjfreq;
+ unsigned int index;
+ unsigned long flags;
+ u64 max_fvtime;
+
+ if (!down_read_trylock(&pcpu->enable_sem))
+ return;
+ if (!pcpu->governor_enabled)
+ goto exit;
+
+ spin_lock_irqsave(&pcpu->load_lock, flags);
+ now = update_load(data);
+ delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
+ cputime_speedadj = pcpu->cputime_speedadj;
+ spin_unlock_irqrestore(&pcpu->load_lock, flags);
+
+ if (WARN_ON_ONCE(!delta_time))
+ goto rearm;
+
+ spin_lock_irqsave(&pcpu->target_freq_lock, flags);
+ do_div(cputime_speedadj, delta_time);
+ loadadjfreq = (unsigned int)cputime_speedadj * 100;
+ cpu_load = loadadjfreq / pcpu->policy->cur;
+ tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime;
+
+ if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) {
+ if (pcpu->policy->cur < tunables->hispeed_freq) {
+ new_freq = tunables->hispeed_freq;
+ } else {
+ new_freq = choose_freq(pcpu, loadadjfreq);
+
+ if (new_freq < tunables->hispeed_freq)
+ new_freq = tunables->hispeed_freq;
+ }
+ } else {
+ new_freq = choose_freq(pcpu, loadadjfreq);
+ if (new_freq > tunables->hispeed_freq &&
+ pcpu->policy->cur < tunables->hispeed_freq)
+ new_freq = tunables->hispeed_freq;
+ }
+
+ if (pcpu->policy->cur >= tunables->hispeed_freq &&
+ new_freq > pcpu->policy->cur &&
+ now - pcpu->pol_hispeed_val_time <
+ freq_to_above_hispeed_delay(tunables, pcpu->policy->cur)) {
+ trace_cpufreq_interactive_notyet(
+ data, cpu_load, pcpu->target_freq,
+ pcpu->policy->cur, new_freq);
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+ goto rearm;
+ }
+
+ pcpu->loc_hispeed_val_time = now;
+
+ if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
+ new_freq, CPUFREQ_RELATION_L,
+ &index)) {
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+ goto rearm;
+ }
+
+ new_freq = pcpu->freq_table[index].frequency;
+
+ /*
+ * Do not scale below floor_freq unless we have been at or above the
+ * floor frequency for the minimum sample time since last validated.
+ */
+ max_fvtime = max(pcpu->pol_floor_val_time, pcpu->loc_floor_val_time);
+ if (new_freq < pcpu->floor_freq &&
+ pcpu->target_freq >= pcpu->policy->cur) {
+ if (now - max_fvtime < tunables->min_sample_time) {
+ trace_cpufreq_interactive_notyet(
+ data, cpu_load, pcpu->target_freq,
+ pcpu->policy->cur, new_freq);
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+ goto rearm;
+ }
+ }
+
+ /*
+ * Update the timestamp for checking whether speed has been held at
+ * or above the selected frequency for a minimum of min_sample_time,
+ * if not boosted to hispeed_freq. If boosted to hispeed_freq then we
+ * allow the speed to drop as soon as the boostpulse duration expires
+ * (or the indefinite boost is turned off).
+ */
+
+ if (!tunables->boosted || new_freq > tunables->hispeed_freq) {
+ pcpu->floor_freq = new_freq;
+ if (pcpu->target_freq >= pcpu->policy->cur ||
+ new_freq >= pcpu->policy->cur)
+ pcpu->loc_floor_val_time = now;
+ }
+
+ if (pcpu->target_freq == new_freq &&
+ pcpu->target_freq <= pcpu->policy->cur) {
+ trace_cpufreq_interactive_already(
+ data, cpu_load, pcpu->target_freq,
+ pcpu->policy->cur, new_freq);
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+ goto rearm;
+ }
+
+ trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
+ pcpu->policy->cur, new_freq);
+
+ pcpu->target_freq = new_freq;
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+ spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+ cpumask_set_cpu(data, &speedchange_cpumask);
+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+ wake_up_process(speedchange_task);
+
+rearm:
+ if (!timer_pending(&pcpu->cpu_timer))
+ cpufreq_interactive_timer_resched(pcpu);
+
+exit:
+ up_read(&pcpu->enable_sem);
+ return;
+}
+
+static void cpufreq_interactive_idle_end(void)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, smp_processor_id());
+
+ if (!down_read_trylock(&pcpu->enable_sem))
+ return;
+ if (!pcpu->governor_enabled) {
+ up_read(&pcpu->enable_sem);
+ return;
+ }
+
+ /* Arm the timer for 1-2 ticks later if not already. */
+ if (!timer_pending(&pcpu->cpu_timer)) {
+ cpufreq_interactive_timer_resched(pcpu);
+ } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) {
+ del_timer(&pcpu->cpu_timer);
+ del_timer(&pcpu->cpu_slack_timer);
+ cpufreq_interactive_timer(smp_processor_id());
+ }
+
+ up_read(&pcpu->enable_sem);
+}
+
+static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy,
+ unsigned int *pmax_freq,
+ u64 *phvt, u64 *pfvt)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ unsigned int max_freq = 0;
+ u64 hvt = ~0ULL, fvt = 0;
+ unsigned int i;
+
+ for_each_cpu(i, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, i);
+
+ fvt = max(fvt, pcpu->loc_floor_val_time);
+ if (pcpu->target_freq > max_freq) {
+ max_freq = pcpu->target_freq;
+ hvt = pcpu->loc_hispeed_val_time;
+ } else if (pcpu->target_freq == max_freq) {
+ hvt = min(hvt, pcpu->loc_hispeed_val_time);
+ }
+ }
+
+ *pmax_freq = max_freq;
+ *phvt = hvt;
+ *pfvt = fvt;
+}
+
+static void cpufreq_interactive_adjust_cpu(unsigned int cpu,
+ struct cpufreq_policy *policy)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ u64 hvt, fvt;
+ unsigned int max_freq;
+ int i;
+
+ cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt);
+
+ for_each_cpu(i, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, i);
+ pcpu->pol_floor_val_time = fvt;
+ }
+
+ if (max_freq != policy->cur) {
+ __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H);
+ for_each_cpu(i, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, i);
+ pcpu->pol_hispeed_val_time = hvt;
+ }
+ }
+
+ trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur);
+}
+
+static int cpufreq_interactive_speedchange_task(void *data)
+{
+ unsigned int cpu;
+ cpumask_t tmp_mask;
+ unsigned long flags;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+
+ if (cpumask_empty(&speedchange_cpumask)) {
+ spin_unlock_irqrestore(&speedchange_cpumask_lock,
+ flags);
+ schedule();
+
+ if (kthread_should_stop())
+ break;
+
+ spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+ }
+
+ set_current_state(TASK_RUNNING);
+ tmp_mask = speedchange_cpumask;
+ cpumask_clear(&speedchange_cpumask);
+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+
+ for_each_cpu(cpu, &tmp_mask) {
+ pcpu = &per_cpu(cpuinfo, cpu);
+
+ down_write(&pcpu->policy->rwsem);
+
+ if (likely(down_read_trylock(&pcpu->enable_sem))) {
+ if (likely(pcpu->governor_enabled))
+ cpufreq_interactive_adjust_cpu(cpu,
+ pcpu->policy);
+ up_read(&pcpu->enable_sem);
+ }
+
+ up_write(&pcpu->policy->rwsem);
+ }
+ }
+
+ return 0;
+}
+
+static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables)
+{
+ int i;
+ int anyboost = 0;
+ unsigned long flags[2];
+ struct cpufreq_interactive_cpuinfo *pcpu;
+
+ tunables->boosted = true;
+
+ spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]);
+
+ for_each_online_cpu(i) {
+ pcpu = &per_cpu(cpuinfo, i);
+
+ if (!down_read_trylock(&pcpu->enable_sem))
+ continue;
+
+ if (!pcpu->governor_enabled) {
+ up_read(&pcpu->enable_sem);
+ continue;
+ }
+
+ if (tunables != pcpu->policy->governor_data) {
+ up_read(&pcpu->enable_sem);
+ continue;
+ }
+
+ spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]);
+ if (pcpu->target_freq < tunables->hispeed_freq) {
+ pcpu->target_freq = tunables->hispeed_freq;
+ cpumask_set_cpu(i, &speedchange_cpumask);
+ pcpu->pol_hispeed_val_time =
+ ktime_to_us(ktime_get());
+ anyboost = 1;
+ }
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]);
+
+ up_read(&pcpu->enable_sem);
+ }
+
+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]);
+
+ if (anyboost)
+ wake_up_process(speedchange_task);
+}
+
+static int cpufreq_interactive_notifier(
+ struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ int cpu;
+ unsigned long flags;
+
+ if (val == CPUFREQ_POSTCHANGE) {
+ pcpu = &per_cpu(cpuinfo, freq->cpu);
+ if (!down_read_trylock(&pcpu->enable_sem))
+ return 0;
+ if (!pcpu->governor_enabled) {
+ up_read(&pcpu->enable_sem);
+ return 0;
+ }
+
+ for_each_cpu(cpu, pcpu->policy->cpus) {
+ struct cpufreq_interactive_cpuinfo *pjcpu =
+ &per_cpu(cpuinfo, cpu);
+ if (cpu != freq->cpu) {
+ if (!down_read_trylock(&pjcpu->enable_sem))
+ continue;
+ if (!pjcpu->governor_enabled) {
+ up_read(&pjcpu->enable_sem);
+ continue;
+ }
+ }
+ spin_lock_irqsave(&pjcpu->load_lock, flags);
+ update_load(cpu);
+ spin_unlock_irqrestore(&pjcpu->load_lock, flags);
+ if (cpu != freq->cpu)
+ up_read(&pjcpu->enable_sem);
+ }
+
+ up_read(&pcpu->enable_sem);
+ }
+ return 0;
+}
+
+static struct notifier_block cpufreq_notifier_block = {
+ .notifier_call = cpufreq_interactive_notifier,
+};
+
+static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
+{
+ const char *cp;
+ int i;
+ int ntokens = 1;
+ unsigned int *tokenized_data;
+ int err = -EINVAL;
+
+ cp = buf;
+ while ((cp = strpbrk(cp + 1, " :")))
+ ntokens++;
+
+ if (!(ntokens & 0x1))
+ goto err;
+
+ tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
+ if (!tokenized_data) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ cp = buf;
+ i = 0;
+ while (i < ntokens) {
+ if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
+ goto err_kfree;
+
+ cp = strpbrk(cp, " :");
+ if (!cp)
+ break;
+ cp++;
+ }
+
+ if (i != ntokens)
+ goto err_kfree;
+
+ *num_tokens = ntokens;
+ return tokenized_data;
+
+err_kfree:
+ kfree(tokenized_data);
+err:
+ return ERR_PTR(err);
+}
+
+static ssize_t show_target_loads(
+ struct cpufreq_interactive_tunables *tunables,
+ char *buf)
+{
+ int i;
+ ssize_t ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tunables->target_loads_lock, flags);
+
+ for (i = 0; i < tunables->ntarget_loads; i++)
+ ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i],
+ i & 0x1 ? ":" : " ");
+
+ sprintf(buf + ret - 1, "\n");
+ spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+ return ret;
+}
+
+static ssize_t store_target_loads(
+ struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ntokens;
+ unsigned int *new_target_loads = NULL;
+ unsigned long flags;
+
+ new_target_loads = get_tokenized_data(buf, &ntokens);
+ if (IS_ERR(new_target_loads))
+ return PTR_RET(new_target_loads);
+
+ spin_lock_irqsave(&tunables->target_loads_lock, flags);
+ if (tunables->target_loads != default_target_loads)
+ kfree(tunables->target_loads);
+ tunables->target_loads = new_target_loads;
+ tunables->ntarget_loads = ntokens;
+ spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+ return count;
+}
+
+static ssize_t show_above_hispeed_delay(
+ struct cpufreq_interactive_tunables *tunables, char *buf)
+{
+ int i;
+ ssize_t ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+
+ for (i = 0; i < tunables->nabove_hispeed_delay; i++)
+ ret += sprintf(buf + ret, "%u%s",
+ tunables->above_hispeed_delay[i],
+ i & 0x1 ? ":" : " ");
+
+ sprintf(buf + ret - 1, "\n");
+ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+ return ret;
+}
+
+static ssize_t store_above_hispeed_delay(
+ struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ntokens;
+ unsigned int *new_above_hispeed_delay = NULL;
+ unsigned long flags;
+
+ new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
+ if (IS_ERR(new_above_hispeed_delay))
+ return PTR_RET(new_above_hispeed_delay);
+
+ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+ if (tunables->above_hispeed_delay != default_above_hispeed_delay)
+ kfree(tunables->above_hispeed_delay);
+ tunables->above_hispeed_delay = new_above_hispeed_delay;
+ tunables->nabove_hispeed_delay = ntokens;
+ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+ return count;
+
+}
+
+static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", tunables->hispeed_freq);
+}
+
+static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ret;
+ long unsigned int val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ tunables->hispeed_freq = val;
+ return count;
+}
+
+static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables
+ *tunables, char *buf)
+{
+ return sprintf(buf, "%lu\n", tunables->go_hispeed_load);
+}
+
+static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables
+ *tunables, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ tunables->go_hispeed_load = val;
+ return count;
+}
+
+static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables
+ *tunables, char *buf)
+{
+ return sprintf(buf, "%lu\n", tunables->min_sample_time);
+}
+
+static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables
+ *tunables, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ tunables->min_sample_time = val;
+ return count;
+}
+
+static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", tunables->timer_rate);
+}
+
+static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val, val_round;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ val_round = jiffies_to_usecs(usecs_to_jiffies(val));
+ if (val != val_round)
+ pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n",
+ val_round);
+
+ tunables->timer_rate = val_round;
+ return count;
+}
+
+static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", tunables->timer_slack_val);
+}
+
+static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtol(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ tunables->timer_slack_val = val;
+ return count;
+}
+
+static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", tunables->boost_val);
+}
+
+static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ tunables->boost_val = val;
+
+ if (tunables->boost_val) {
+ trace_cpufreq_interactive_boost("on");
+ if (!tunables->boosted)
+ cpufreq_interactive_boost(tunables);
+ } else {
+ tunables->boostpulse_endtime = ktime_to_us(ktime_get());
+ trace_cpufreq_interactive_unboost("off");
+ }
+
+ return count;
+}
+
+static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ tunables->boostpulse_endtime = ktime_to_us(ktime_get()) +
+ tunables->boostpulse_duration_val;
+ trace_cpufreq_interactive_boost("pulse");
+ if (!tunables->boosted)
+ cpufreq_interactive_boost(tunables);
+ return count;
+}
+
+static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables
+ *tunables, char *buf)
+{
+ return sprintf(buf, "%d\n", tunables->boostpulse_duration_val);
+}
+
+static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables
+ *tunables, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ tunables->boostpulse_duration_val = val;
+ return count;
+}
+
+static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", tunables->io_is_busy);
+}
+
+static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ tunables->io_is_busy = val;
+ return count;
+}
+
+/*
+ * Create show/store routines
+ * - sys: One governor instance for complete SYSTEM
+ * - pol: One governor instance per struct cpufreq_policy
+ */
+#define show_gov_pol_sys(file_name) \
+static ssize_t show_##file_name##_gov_sys \
+(struct kobject *kobj, struct attribute *attr, char *buf) \
+{ \
+ return show_##file_name(common_tunables, buf); \
+} \
+ \
+static ssize_t show_##file_name##_gov_pol \
+(struct cpufreq_policy *policy, char *buf) \
+{ \
+ return show_##file_name(policy->governor_data, buf); \
+}
+
+#define store_gov_pol_sys(file_name) \
+static ssize_t store_##file_name##_gov_sys \
+(struct kobject *kobj, struct attribute *attr, const char *buf, \
+ size_t count) \
+{ \
+ return store_##file_name(common_tunables, buf, count); \
+} \
+ \
+static ssize_t store_##file_name##_gov_pol \
+(struct cpufreq_policy *policy, const char *buf, size_t count) \
+{ \
+ return store_##file_name(policy->governor_data, buf, count); \
+}
+
+#define show_store_gov_pol_sys(file_name) \
+show_gov_pol_sys(file_name); \
+store_gov_pol_sys(file_name)
+
+show_store_gov_pol_sys(target_loads);
+show_store_gov_pol_sys(above_hispeed_delay);
+show_store_gov_pol_sys(hispeed_freq);
+show_store_gov_pol_sys(go_hispeed_load);
+show_store_gov_pol_sys(min_sample_time);
+show_store_gov_pol_sys(timer_rate);
+show_store_gov_pol_sys(timer_slack);
+show_store_gov_pol_sys(boost);
+store_gov_pol_sys(boostpulse);
+show_store_gov_pol_sys(boostpulse_duration);
+show_store_gov_pol_sys(io_is_busy);
+
+#define gov_sys_attr_rw(_name) \
+static struct global_attr _name##_gov_sys = \
+__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys)
+
+#define gov_pol_attr_rw(_name) \
+static struct freq_attr _name##_gov_pol = \
+__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
+
+#define gov_sys_pol_attr_rw(_name) \
+ gov_sys_attr_rw(_name); \
+ gov_pol_attr_rw(_name)
+
+gov_sys_pol_attr_rw(target_loads);
+gov_sys_pol_attr_rw(above_hispeed_delay);
+gov_sys_pol_attr_rw(hispeed_freq);
+gov_sys_pol_attr_rw(go_hispeed_load);
+gov_sys_pol_attr_rw(min_sample_time);
+gov_sys_pol_attr_rw(timer_rate);
+gov_sys_pol_attr_rw(timer_slack);
+gov_sys_pol_attr_rw(boost);
+gov_sys_pol_attr_rw(boostpulse_duration);
+gov_sys_pol_attr_rw(io_is_busy);
+
+static struct global_attr boostpulse_gov_sys =
+ __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys);
+
+static struct freq_attr boostpulse_gov_pol =
+ __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol);
+
+/* One Governor instance for entire system */
+static struct attribute *interactive_attributes_gov_sys[] = {
+ &target_loads_gov_sys.attr,
+ &above_hispeed_delay_gov_sys.attr,
+ &hispeed_freq_gov_sys.attr,
+ &go_hispeed_load_gov_sys.attr,
+ &min_sample_time_gov_sys.attr,
+ &timer_rate_gov_sys.attr,
+ &timer_slack_gov_sys.attr,
+ &boost_gov_sys.attr,
+ &boostpulse_gov_sys.attr,
+ &boostpulse_duration_gov_sys.attr,
+ &io_is_busy_gov_sys.attr,
+ NULL,
+};
+
+static struct attribute_group interactive_attr_group_gov_sys = {
+ .attrs = interactive_attributes_gov_sys,
+ .name = "interactive",
+};
+
+/* Per policy governor instance */
+static struct attribute *interactive_attributes_gov_pol[] = {
+ &target_loads_gov_pol.attr,
+ &above_hispeed_delay_gov_pol.attr,
+ &hispeed_freq_gov_pol.attr,
+ &go_hispeed_load_gov_pol.attr,
+ &min_sample_time_gov_pol.attr,
+ &timer_rate_gov_pol.attr,
+ &timer_slack_gov_pol.attr,
+ &boost_gov_pol.attr,
+ &boostpulse_gov_pol.attr,
+ &boostpulse_duration_gov_pol.attr,
+ &io_is_busy_gov_pol.attr,
+ NULL,
+};
+
+static struct attribute_group interactive_attr_group_gov_pol = {
+ .attrs = interactive_attributes_gov_pol,
+ .name = "interactive",
+};
+
+static struct attribute_group *get_sysfs_attr(void)
+{
+ if (have_governor_per_policy())
+ return &interactive_attr_group_gov_pol;
+ else
+ return &interactive_attr_group_gov_sys;
+}
+
+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ if (val == IDLE_END)
+ cpufreq_interactive_idle_end();
+
+ return 0;
+}
+
+static struct notifier_block cpufreq_interactive_idle_nb = {
+ .notifier_call = cpufreq_interactive_idle_notifier,
+};
+
+static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int rc;
+ unsigned int j;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ struct cpufreq_frequency_table *freq_table;
+ struct cpufreq_interactive_tunables *tunables;
+ unsigned long flags;
+
+ if (have_governor_per_policy())
+ tunables = policy->governor_data;
+ else
+ tunables = common_tunables;
+
+ if (WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT)))
+ return -EINVAL;
+
+ switch (event) {
+ case CPUFREQ_GOV_POLICY_INIT:
+ if (have_governor_per_policy()) {
+ WARN_ON(tunables);
+ } else if (tunables) {
+ tunables->usage_count++;
+ policy->governor_data = tunables;
+ return 0;
+ }
+
+ tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
+ if (!tunables) {
+ pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
+ return -ENOMEM;
+ }
+
+ tunables->usage_count = 1;
+ tunables->above_hispeed_delay = default_above_hispeed_delay;
+ tunables->nabove_hispeed_delay =
+ ARRAY_SIZE(default_above_hispeed_delay);
+ tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
+ tunables->target_loads = default_target_loads;
+ tunables->ntarget_loads = ARRAY_SIZE(default_target_loads);
+ tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
+ tunables->timer_rate = DEFAULT_TIMER_RATE;
+ tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME;
+ tunables->timer_slack_val = DEFAULT_TIMER_SLACK;
+
+ spin_lock_init(&tunables->target_loads_lock);
+ spin_lock_init(&tunables->above_hispeed_delay_lock);
+
+ policy->governor_data = tunables;
+ if (!have_governor_per_policy()) {
+ common_tunables = tunables;
+ }
+
+ rc = sysfs_create_group(get_governor_parent_kobj(policy),
+ get_sysfs_attr());
+ if (rc) {
+ kfree(tunables);
+ policy->governor_data = NULL;
+ if (!have_governor_per_policy()) {
+ common_tunables = NULL;
+ }
+ return rc;
+ }
+
+ if (!policy->governor->initialized) {
+ idle_notifier_register(&cpufreq_interactive_idle_nb);
+ cpufreq_register_notifier(&cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
+ break;
+
+ case CPUFREQ_GOV_POLICY_EXIT:
+ if (!--tunables->usage_count) {
+ if (policy->governor->initialized == 1) {
+ cpufreq_unregister_notifier(&cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ idle_notifier_unregister(&cpufreq_interactive_idle_nb);
+ }
+
+ sysfs_remove_group(get_governor_parent_kobj(policy),
+ get_sysfs_attr());
+
+ kfree(tunables);
+ common_tunables = NULL;
+ }
+
+ policy->governor_data = NULL;
+ break;
+
+ case CPUFREQ_GOV_START:
+ mutex_lock(&gov_lock);
+
+ freq_table = cpufreq_frequency_get_table(policy->cpu);
+ if (!tunables->hispeed_freq)
+ tunables->hispeed_freq = policy->max;
+
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+ pcpu->policy = policy;
+ pcpu->target_freq = policy->cur;
+ pcpu->freq_table = freq_table;
+ pcpu->floor_freq = pcpu->target_freq;
+ pcpu->pol_floor_val_time =
+ ktime_to_us(ktime_get());
+ pcpu->loc_floor_val_time = pcpu->pol_floor_val_time;
+ pcpu->pol_hispeed_val_time = pcpu->pol_floor_val_time;
+ pcpu->loc_hispeed_val_time = pcpu->pol_floor_val_time;
+ down_write(&pcpu->enable_sem);
+ del_timer_sync(&pcpu->cpu_timer);
+ del_timer_sync(&pcpu->cpu_slack_timer);
+ cpufreq_interactive_timer_start(tunables, j);
+ pcpu->governor_enabled = 1;
+ up_write(&pcpu->enable_sem);
+ }
+
+ mutex_unlock(&gov_lock);
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ mutex_lock(&gov_lock);
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+ down_write(&pcpu->enable_sem);
+ pcpu->governor_enabled = 0;
+ del_timer_sync(&pcpu->cpu_timer);
+ del_timer_sync(&pcpu->cpu_slack_timer);
+ up_write(&pcpu->enable_sem);
+ }
+
+ mutex_unlock(&gov_lock);
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ if (policy->max < policy->cur)
+ __cpufreq_driver_target(policy,
+ policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > policy->cur)
+ __cpufreq_driver_target(policy,
+ policy->min, CPUFREQ_RELATION_L);
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+
+ down_read(&pcpu->enable_sem);
+ if (pcpu->governor_enabled == 0) {
+ up_read(&pcpu->enable_sem);
+ continue;
+ }
+
+ spin_lock_irqsave(&pcpu->target_freq_lock, flags);
+ if (policy->max < pcpu->target_freq)
+ pcpu->target_freq = policy->max;
+ else if (policy->min > pcpu->target_freq)
+ pcpu->target_freq = policy->min;
+
+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+ up_read(&pcpu->enable_sem);
+ }
+ break;
+ }
+ return 0;
+}
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_interactive = {
+ .name = "interactive",
+ .governor = cpufreq_governor_interactive,
+ .max_transition_latency = 10000000,
+ .owner = THIS_MODULE,
+};
+
+static void cpufreq_interactive_nop_timer(unsigned long data)
+{
+}
+
+static int __init cpufreq_interactive_init(void)
+{
+ unsigned int i;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+ int ret = 0;
+
+ /* Initalize per-cpu timers */
+ for_each_possible_cpu(i) {
+ pcpu = &per_cpu(cpuinfo, i);
+ init_timer_deferrable(&pcpu->cpu_timer);
+ pcpu->cpu_timer.function = cpufreq_interactive_timer;
+ pcpu->cpu_timer.data = i;
+ init_timer(&pcpu->cpu_slack_timer);
+ pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
+ spin_lock_init(&pcpu->load_lock);
+ spin_lock_init(&pcpu->target_freq_lock);
+ init_rwsem(&pcpu->enable_sem);
+ }
+
+ spin_lock_init(&speedchange_cpumask_lock);
+ mutex_init(&gov_lock);
+ speedchange_task =
+ kthread_create(cpufreq_interactive_speedchange_task, NULL,
+ "cfinteractive");
+ if (IS_ERR(speedchange_task))
+ return PTR_ERR(speedchange_task);
+
+ sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
+ get_task_struct(speedchange_task);
+
+ /* NB: wake up so the thread does not look hung to the freezer */
+ wake_up_process(speedchange_task);
+
+ ret = cpufreq_register_governor(&cpufreq_gov_interactive);
+ if (ret) {
+ kthread_stop(speedchange_task);
+ put_task_struct(speedchange_task);
+ }
+ return ret;
+}
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+fs_initcall(cpufreq_interactive_init);
+#else
+module_init(cpufreq_interactive_init);
+#endif
+
+static void __exit cpufreq_interactive_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_interactive);
+ kthread_stop(speedchange_task);
+ put_task_struct(speedchange_task);
+}
+
+module_exit(cpufreq_interactive_exit);
+
+MODULE_AUTHOR("Mike Chan <mike@android.com>");
+MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
+ "Latency sensitive workloads");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c
new file mode 100644
index 000000000000..e7a8b636a5f4
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_times.c
@@ -0,0 +1,464 @@
+/* drivers/cpufreq/cpufreq_times.c
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/cpufreq_times.h>
+#include <linux/cputime.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+
+#define UID_HASH_BITS 10
+
+DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS);
+
+static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */
+static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */
+
+struct uid_entry {
+ uid_t uid;
+ unsigned int max_state;
+ struct hlist_node hash;
+ struct rcu_head rcu;
+ u64 time_in_state[0];
+};
+
+/**
+ * struct cpu_freqs - per-cpu frequency information
+ * @offset: start of these freqs' stats in task time_in_state array
+ * @max_state: number of entries in freq_table
+ * @last_index: index in freq_table of last frequency switched to
+ * @freq_table: list of available frequencies
+ */
+struct cpu_freqs {
+ unsigned int offset;
+ unsigned int max_state;
+ unsigned int last_index;
+ unsigned int freq_table[0];
+};
+
+static struct cpu_freqs *all_freqs[NR_CPUS];
+
+static unsigned int next_offset;
+
+
+/* Caller must hold rcu_read_lock() */
+static struct uid_entry *find_uid_entry_rcu(uid_t uid)
+{
+ struct uid_entry *uid_entry;
+
+ hash_for_each_possible_rcu(uid_hash_table, uid_entry, hash, uid) {
+ if (uid_entry->uid == uid)
+ return uid_entry;
+ }
+ return NULL;
+}
+
+/* Caller must hold uid lock */
+static struct uid_entry *find_uid_entry_locked(uid_t uid)
+{
+ struct uid_entry *uid_entry;
+
+ hash_for_each_possible(uid_hash_table, uid_entry, hash, uid) {
+ if (uid_entry->uid == uid)
+ return uid_entry;
+ }
+ return NULL;
+}
+
+/* Caller must hold uid lock */
+static struct uid_entry *find_or_register_uid_locked(uid_t uid)
+{
+ struct uid_entry *uid_entry, *temp;
+ unsigned int max_state = READ_ONCE(next_offset);
+ size_t alloc_size = sizeof(*uid_entry) + max_state *
+ sizeof(uid_entry->time_in_state[0]);
+
+ uid_entry = find_uid_entry_locked(uid);
+ if (uid_entry) {
+ if (uid_entry->max_state == max_state)
+ return uid_entry;
+ /* uid_entry->time_in_state is too small to track all freqs, so
+ * expand it.
+ */
+ temp = __krealloc(uid_entry, alloc_size, GFP_ATOMIC);
+ if (!temp)
+ return uid_entry;
+ temp->max_state = max_state;
+ memset(temp->time_in_state + uid_entry->max_state, 0,
+ (max_state - uid_entry->max_state) *
+ sizeof(uid_entry->time_in_state[0]));
+ if (temp != uid_entry) {
+ hlist_replace_rcu(&uid_entry->hash, &temp->hash);
+ kfree_rcu(uid_entry, rcu);
+ }
+ return temp;
+ }
+
+ uid_entry = kzalloc(alloc_size, GFP_ATOMIC);
+ if (!uid_entry)
+ return NULL;
+
+ uid_entry->uid = uid;
+ uid_entry->max_state = max_state;
+
+ hash_add_rcu(uid_hash_table, &uid_entry->hash, uid);
+
+ return uid_entry;
+}
+
+static bool freq_index_invalid(unsigned int index)
+{
+ unsigned int cpu;
+ struct cpu_freqs *freqs;
+
+ for_each_possible_cpu(cpu) {
+ freqs = all_freqs[cpu];
+ if (!freqs || index < freqs->offset ||
+ freqs->offset + freqs->max_state <= index)
+ continue;
+ return freqs->freq_table[index - freqs->offset] ==
+ CPUFREQ_ENTRY_INVALID;
+ }
+ return true;
+}
+
+static int single_uid_time_in_state_show(struct seq_file *m, void *ptr)
+{
+ struct uid_entry *uid_entry;
+ unsigned int i;
+ u64 time;
+ uid_t uid = from_kuid_munged(current_user_ns(), *(kuid_t *)m->private);
+
+ if (uid == overflowuid)
+ return -EINVAL;
+
+ rcu_read_lock();
+
+ uid_entry = find_uid_entry_rcu(uid);
+ if (!uid_entry) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ for (i = 0; i < uid_entry->max_state; ++i) {
+ if (freq_index_invalid(i))
+ continue;
+ time = cputime_to_clock_t(uid_entry->time_in_state[i]);
+ seq_write(m, &time, sizeof(time));
+ }
+
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static void *uid_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (*pos >= HASH_SIZE(uid_hash_table))
+ return NULL;
+
+ return &uid_hash_table[*pos];
+}
+
+static void *uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+
+ if (*pos >= HASH_SIZE(uid_hash_table))
+ return NULL;
+
+ return &uid_hash_table[*pos];
+}
+
+static void uid_seq_stop(struct seq_file *seq, void *v) { }
+
+static int uid_time_in_state_seq_show(struct seq_file *m, void *v)
+{
+ struct uid_entry *uid_entry;
+ struct cpu_freqs *freqs, *last_freqs = NULL;
+ int i, cpu;
+
+ if (v == uid_hash_table) {
+ seq_puts(m, "uid:");
+ for_each_possible_cpu(cpu) {
+ freqs = all_freqs[cpu];
+ if (!freqs || freqs == last_freqs)
+ continue;
+ last_freqs = freqs;
+ for (i = 0; i < freqs->max_state; i++) {
+ if (freqs->freq_table[i] ==
+ CPUFREQ_ENTRY_INVALID)
+ continue;
+ seq_printf(m, " %d", freqs->freq_table[i]);
+ }
+ }
+ seq_putc(m, '\n');
+ }
+
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) {
+ if (uid_entry->max_state)
+ seq_printf(m, "%d:", uid_entry->uid);
+ for (i = 0; i < uid_entry->max_state; ++i) {
+ if (freq_index_invalid(i))
+ continue;
+ seq_printf(m, " %lu", (unsigned long)cputime_to_clock_t(
+ uid_entry->time_in_state[i]));
+ }
+ if (uid_entry->max_state)
+ seq_putc(m, '\n');
+ }
+
+ rcu_read_unlock();
+ return 0;
+}
+
+void cpufreq_task_times_init(struct task_struct *p)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&task_time_in_state_lock, flags);
+ p->time_in_state = NULL;
+ spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+ p->max_state = 0;
+}
+
+void cpufreq_task_times_alloc(struct task_struct *p)
+{
+ void *temp;
+ unsigned long flags;
+ unsigned int max_state = READ_ONCE(next_offset);
+
+ /* We use one array to avoid multiple allocs per task */
+ temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC);
+ if (!temp)
+ return;
+
+ spin_lock_irqsave(&task_time_in_state_lock, flags);
+ p->time_in_state = temp;
+ spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+ p->max_state = max_state;
+}
+
+/* Caller must hold task_time_in_state_lock */
+static int cpufreq_task_times_realloc_locked(struct task_struct *p)
+{
+ void *temp;
+ unsigned int max_state = READ_ONCE(next_offset);
+
+ temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC);
+ if (!temp)
+ return -ENOMEM;
+ p->time_in_state = temp;
+ memset(p->time_in_state + p->max_state, 0,
+ (max_state - p->max_state) * sizeof(u64));
+ p->max_state = max_state;
+ return 0;
+}
+
+void cpufreq_task_times_exit(struct task_struct *p)
+{
+ unsigned long flags;
+ void *temp;
+
+ if (!p->time_in_state)
+ return;
+
+ spin_lock_irqsave(&task_time_in_state_lock, flags);
+ temp = p->time_in_state;
+ p->time_in_state = NULL;
+ spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+ kfree(temp);
+}
+
+int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *p)
+{
+ unsigned int cpu, i;
+ cputime_t cputime;
+ unsigned long flags;
+ struct cpu_freqs *freqs;
+ struct cpu_freqs *last_freqs = NULL;
+
+ spin_lock_irqsave(&task_time_in_state_lock, flags);
+ for_each_possible_cpu(cpu) {
+ freqs = all_freqs[cpu];
+ if (!freqs || freqs == last_freqs)
+ continue;
+ last_freqs = freqs;
+
+ seq_printf(m, "cpu%u\n", cpu);
+ for (i = 0; i < freqs->max_state; i++) {
+ if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID)
+ continue;
+ cputime = 0;
+ if (freqs->offset + i < p->max_state &&
+ p->time_in_state)
+ cputime = p->time_in_state[freqs->offset + i];
+ seq_printf(m, "%u %lu\n", freqs->freq_table[i],
+ (unsigned long)cputime_to_clock_t(cputime));
+ }
+ }
+ spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+ return 0;
+}
+
+void cpufreq_acct_update_power(struct task_struct *p, cputime_t cputime)
+{
+ unsigned long flags;
+ unsigned int state;
+ struct uid_entry *uid_entry;
+ struct cpu_freqs *freqs = all_freqs[task_cpu(p)];
+ uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
+
+ if (!freqs || p->flags & PF_EXITING)
+ return;
+
+ state = freqs->offset + READ_ONCE(freqs->last_index);
+
+ spin_lock_irqsave(&task_time_in_state_lock, flags);
+ if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) &&
+ p->time_in_state)
+ p->time_in_state[state] += cputime;
+ spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+
+ spin_lock_irqsave(&uid_lock, flags);
+ uid_entry = find_or_register_uid_locked(uid);
+ if (uid_entry && state < uid_entry->max_state)
+ uid_entry->time_in_state[state] += cputime;
+ spin_unlock_irqrestore(&uid_lock, flags);
+}
+
+void cpufreq_times_create_policy(struct cpufreq_policy *policy)
+{
+ int cpu, index;
+ unsigned int count = 0;
+ struct cpufreq_frequency_table *pos, *table;
+ struct cpu_freqs *freqs;
+ void *tmp;
+
+ if (all_freqs[policy->cpu])
+ return;
+
+ table = cpufreq_frequency_get_table(policy->cpu);
+ if (!table)
+ return;
+
+ cpufreq_for_each_entry(pos, table)
+ count++;
+
+ tmp = kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count,
+ GFP_KERNEL);
+ if (!tmp)
+ return;
+
+ freqs = tmp;
+ freqs->max_state = count;
+
+ index = cpufreq_frequency_table_get_index(policy, policy->cur);
+ if (index >= 0)
+ WRITE_ONCE(freqs->last_index, index);
+
+ cpufreq_for_each_entry(pos, table)
+ freqs->freq_table[pos - table] = pos->frequency;
+
+ freqs->offset = next_offset;
+ WRITE_ONCE(next_offset, freqs->offset + count);
+ for_each_cpu(cpu, policy->related_cpus)
+ all_freqs[cpu] = freqs;
+}
+
+void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end)
+{
+ struct uid_entry *uid_entry;
+ struct hlist_node *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&uid_lock, flags);
+
+ for (; uid_start <= uid_end; uid_start++) {
+ hash_for_each_possible_safe(uid_hash_table, uid_entry, tmp,
+ hash, uid_start) {
+ if (uid_start == uid_entry->uid) {
+ hash_del_rcu(&uid_entry->hash);
+ kfree_rcu(uid_entry, rcu);
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&uid_lock, flags);
+}
+
+void cpufreq_times_record_transition(struct cpufreq_freqs *freq)
+{
+ int index;
+ struct cpu_freqs *freqs = all_freqs[freq->cpu];
+ struct cpufreq_policy *policy;
+
+ if (!freqs)
+ return;
+
+ policy = cpufreq_cpu_get(freq->cpu);
+ if (!policy)
+ return;
+
+ index = cpufreq_frequency_table_get_index(policy, freq->new);
+ if (index >= 0)
+ WRITE_ONCE(freqs->last_index, index);
+
+ cpufreq_cpu_put(policy);
+}
+
+static const struct seq_operations uid_time_in_state_seq_ops = {
+ .start = uid_seq_start,
+ .next = uid_seq_next,
+ .stop = uid_seq_stop,
+ .show = uid_time_in_state_seq_show,
+};
+
+static int uid_time_in_state_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &uid_time_in_state_seq_ops);
+}
+
+int single_uid_time_in_state_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, single_uid_time_in_state_show,
+ &(inode->i_uid));
+}
+
+static const struct file_operations uid_time_in_state_fops = {
+ .open = uid_time_in_state_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init cpufreq_times_init(void)
+{
+ proc_create_data("uid_time_in_state", 0444, NULL,
+ &uid_time_in_state_fops, NULL);
+
+ return 0;
+}
+
+early_initcall(cpufreq_times_init);