diff options
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/Kconfig | 59 | ||||
-rw-r--r-- | drivers/cpufreq/Makefile | 8 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq-dt.c | 7 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 100 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor_attr_set.c | 84 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_interactive.c | 1374 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_times.c | 464 |
7 files changed, 2094 insertions, 2 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 949610360b14..75ded53a7470 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -46,6 +46,15 @@ config CPU_FREQ_STAT_DETAILS If in doubt, say N. +config CPU_FREQ_TIMES + bool "CPU frequency time-in-state statistics" + default y + help + This driver exports CPU time-in-state information through procfs file + system. + + If in doubt, say N. + choice prompt "Default CPUFreq governor" default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ @@ -102,6 +111,25 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE Be aware that not all cpufreq drivers support the conservative governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_INTERACTIVE + bool "interactive" + select CPU_FREQ_GOV_INTERACTIVE + help + Use the CPUFreq governor 'interactive' as default. This allows + you to get a full dynamic cpu frequency capable system by simply + loading your cpufreq low-level hardware driver, using the + 'interactive' governor for latency-sensitive workloads. + +config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL + bool "schedutil" + depends on SMP + select CPU_FREQ_GOV_SCHEDUTIL + select CPU_FREQ_GOV_PERFORMANCE + help + Use the 'schedutil' CPUFreq governor by default. If unsure, + have a look at the help section of that governor. The fallback + governor will be 'performance'. endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -159,6 +187,20 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. +config CPU_FREQ_GOV_INTERACTIVE + bool "'interactive' cpufreq policy governor" + help + 'interactive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads. + + This governor attempts to reduce the latency of clock + increases so that the system is more responsive to + interactive workloads. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + config CPU_FREQ_GOV_CONSERVATIVE tristate "'conservative' cpufreq governor" depends on CPU_FREQ @@ -183,6 +225,23 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +config CPU_FREQ_GOV_SCHEDUTIL + bool "'schedutil' cpufreq policy governor" + depends on CPU_FREQ && SMP + select CPU_FREQ_GOV_ATTR_SET + select IRQ_WORK + help + This governor makes decisions based on the utilization data provided + by the scheduler. It sets the CPU frequency to be proportional to + the utilization/capacity ratio coming from the scheduler. If the + utilization is frequency-invariant, the new frequency is also + proportional to the maximum available frequency. If that is not the + case, it is proportional to the current frequency of the CPU. The + frequency tipping point is at utilization/capacity equal to 80% in + both cases. + + If in doubt, say N. + comment "CPU frequency scaling drivers" config CPUFREQ_DT diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index c0af1a1281c8..54070bf413bf 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -1,15 +1,19 @@ # CPUfreq core -obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o +obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o cpufreq_governor_attr_set.o # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o -# CPUfreq governors +# CPUfreq times +obj-$(CONFIG_CPU_FREQ_TIMES) += cpufreq_times.o + +# CPUfreq governors obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index f951f911786e..a72ae98b4838 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -279,6 +279,13 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; + /* + * Android: set default parameters for parity between schedutil and + * schedfreq + */ + policy->up_transition_delay_us = transition_latency / NSEC_PER_USEC; + policy->down_transition_delay_us = 50000; /* 50ms */ + return 0; out_free_cpufreq_table: diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 68b604ad8413..fc0d9f90ad8d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -19,6 +19,7 @@ #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/cpufreq_times.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/init.h> @@ -29,6 +30,9 @@ #include <linux/suspend.h> #include <linux/syscore_ops.h> #include <linux/tick.h> +#ifdef CONFIG_SMP +#include <linux/sched.h> +#endif #include <trace/events/power.h> static LIST_HEAD(cpufreq_policy_list); @@ -154,6 +158,12 @@ bool have_governor_per_policy(void) } EXPORT_SYMBOL_GPL(have_governor_per_policy); +bool cpufreq_driver_is_slow(void) +{ + return !(cpufreq_driver->flags & CPUFREQ_DRIVER_FAST); +} +EXPORT_SYMBOL_GPL(cpufreq_driver_is_slow); + struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) { if (have_governor_per_policy()) @@ -347,6 +357,50 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) #endif } +/********************************************************************* + * FREQUENCY INVARIANT CPU CAPACITY * + *********************************************************************/ + +static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; +static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE; + +static void +scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) +{ + unsigned long cur = freqs ? freqs->new : policy->cur; + unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max; + struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo; + int cpu; + + pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n", + cpumask_pr_args(policy->cpus), cur, policy->max, scale); + + for_each_cpu(cpu, policy->cpus) + per_cpu(freq_scale, cpu) = scale; + + if (freqs) + return; + + scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq; + + pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n", + cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq, + scale); + + for_each_cpu(cpu, policy->cpus) + per_cpu(max_freq_scale, cpu) = scale; +} + +unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu) +{ + return per_cpu(freq_scale, cpu); +} + +unsigned long cpufreq_scale_max_freq_capacity(int cpu) +{ + return per_cpu(max_freq_scale, cpu); +} + static void __cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { @@ -384,6 +438,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new, (unsigned long)freqs->cpu); trace_cpu_frequency(freqs->new, freqs->cpu); + cpufreq_times_record_transition(freqs); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) @@ -423,6 +478,9 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy, void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) { +#ifdef CONFIG_SMP + int cpu; +#endif /* * Catch double invocations of _begin() which lead to self-deadlock. @@ -450,6 +508,12 @@ wait: spin_unlock(&policy->transition_lock); + scale_freq_capacity(policy, freqs); +#ifdef CONFIG_SMP + for_each_cpu(cpu, policy->cpus) + trace_cpu_capacity(capacity_curr_of(cpu), cpu); +#endif + cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE); } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin); @@ -469,6 +533,38 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end); +/** + * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported + * one. + * @target_freq: target frequency to resolve. + * + * The target to driver frequency mapping is cached in the policy. + * + * Return: Lowest driver-supported frequency greater than or equal to the + * given target_freq, subject to policy (min/max) and driver limitations. + */ +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + policy->cached_target_freq = target_freq; + + if (cpufreq_driver->target_index) { + int idx, rv; + + rv = cpufreq_frequency_table_target(policy, policy->freq_table, + target_freq, + CPUFREQ_RELATION_L, + &idx); + if (rv) + return target_freq; + policy->cached_resolved_idx = idx; + return policy->freq_table[idx].frequency; + } + + return target_freq; +} +EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); /********************************************************************* * SYSFS INTERFACE * @@ -1250,6 +1346,7 @@ static int cpufreq_online(unsigned int cpu) goto out_exit_policy; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); + cpufreq_times_create_policy(policy); write_lock_irqsave(&cpufreq_driver_lock, flags); list_add(&policy->policy_list, &cpufreq_policy_list); @@ -2133,8 +2230,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_NOTIFY, new_policy); + scale_freq_capacity(new_policy, NULL); + policy->min = new_policy->min; policy->max = new_policy->max; + trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu); pr_debug("new min and max freqs are %u - %u kHz\n", policy->min, policy->max); diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c new file mode 100644 index 000000000000..52841f807a7e --- /dev/null +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -0,0 +1,84 @@ +/* + * Abstract code for CPUFreq governor tunable sysfs attributes. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cpufreq_governor.h" + +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + +static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct governor_attr *gattr = to_gov_attr(attr); + + return gattr->show(to_gov_attr_set(kobj), buf); +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct gov_attr_set *attr_set = to_gov_attr_set(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret; + + mutex_lock(&attr_set->update_lock); + ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY; + mutex_unlock(&attr_set->update_lock); + return ret; +} + +const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; +EXPORT_SYMBOL_GPL(governor_sysfs_ops); + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + INIT_LIST_HEAD(&attr_set->policy_list); + mutex_init(&attr_set->update_lock); + attr_set->usage_count = 1; + list_add(list_node, &attr_set->policy_list); +} +EXPORT_SYMBOL_GPL(gov_attr_set_init); + +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + mutex_lock(&attr_set->update_lock); + attr_set->usage_count++; + list_add(list_node, &attr_set->policy_list); + mutex_unlock(&attr_set->update_lock); +} +EXPORT_SYMBOL_GPL(gov_attr_set_get); + +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + unsigned int count; + + mutex_lock(&attr_set->update_lock); + list_del(list_node); + count = --attr_set->usage_count; + mutex_unlock(&attr_set->update_lock); + if (count) + return count; + + kobject_put(&attr_set->kobj); + mutex_destroy(&attr_set->update_lock); + return 0; +} +EXPORT_SYMBOL_GPL(gov_attr_set_put); diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c new file mode 100644 index 000000000000..e0a586895136 --- /dev/null +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -0,0 +1,1374 @@ +/* + * drivers/cpufreq/cpufreq_interactive.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + * + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/cpufreq.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/sched/rt.h> +#include <linux/tick.h> +#include <linux/time.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> +#include <linux/slab.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/cpufreq_interactive.h> + +struct cpufreq_interactive_cpuinfo { + struct timer_list cpu_timer; + struct timer_list cpu_slack_timer; + spinlock_t load_lock; /* protects the next 4 fields */ + u64 time_in_idle; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; + struct cpufreq_policy *policy; + struct cpufreq_frequency_table *freq_table; + spinlock_t target_freq_lock; /*protects target freq */ + unsigned int target_freq; + unsigned int floor_freq; + u64 pol_floor_val_time; /* policy floor_validate_time */ + u64 loc_floor_val_time; /* per-cpu floor_validate_time */ + u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ + u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ + struct rw_semaphore enable_sem; + int governor_enabled; +}; + +static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); + +/* realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; +static struct mutex gov_lock; + +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; + +#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) +#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE +static unsigned int default_above_hispeed_delay[] = { + DEFAULT_ABOVE_HISPEED_DELAY }; + +struct cpufreq_interactive_tunables { + int usage_count; + /* Hi speed to bump to from lo speed when load burst (default max) */ + unsigned int hispeed_freq; + /* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 + unsigned long go_hispeed_load; + /* Target load. Lower values result in higher CPU speeds. */ + spinlock_t target_loads_lock; + unsigned int *target_loads; + int ntarget_loads; + /* + * The minimum amount of time to spend at a frequency before we can ramp + * down. + */ +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) + unsigned long min_sample_time; + /* + * The sample rate of the timer used to increase frequency + */ + unsigned long timer_rate; + /* + * Wait this long before raising speed above hispeed, by default a + * single timer interval. + */ + spinlock_t above_hispeed_delay_lock; + unsigned int *above_hispeed_delay; + int nabove_hispeed_delay; + /* Non-zero means indefinite speed boost active */ + int boost_val; + /* Duration of a boot pulse in usecs */ + int boostpulse_duration_val; + /* End time of boost pulse in ktime converted to usecs */ + u64 boostpulse_endtime; + bool boosted; + /* + * Max additional time to wait in idle, beyond timer_rate, at speeds + * above minimum before wakeup to reduce speed, or -1 if unnecessary. + */ +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) + int timer_slack_val; + bool io_is_busy; +}; + +/* For cases where we have single governor instance for system */ +static struct cpufreq_interactive_tunables *common_tunables; + +static struct attribute_group *get_sysfs_attr(void); + +static void cpufreq_interactive_timer_resched( + struct cpufreq_interactive_cpuinfo *pcpu) +{ + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + unsigned long expires; + unsigned long flags; + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time(smp_processor_id(), + &pcpu->time_in_idle_timestamp, + tunables->io_is_busy); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + expires = jiffies + usecs_to_jiffies(tunables->timer_rate); + mod_timer_pinned(&pcpu->cpu_timer, expires); + + if (tunables->timer_slack_val >= 0 && + pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(tunables->timer_slack_val); + mod_timer_pinned(&pcpu->cpu_slack_timer, expires); + } + + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +/* The caller shall take enable_sem write semaphore to avoid any timer race. + * The cpu_timer and cpu_slack_timer must be deactivated when calling this + * function. + */ +static void cpufreq_interactive_timer_start( + struct cpufreq_interactive_tunables *tunables, int cpu) +{ + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + unsigned long expires = jiffies + + usecs_to_jiffies(tunables->timer_rate); + unsigned long flags; + + pcpu->cpu_timer.expires = expires; + add_timer_on(&pcpu->cpu_timer, cpu); + if (tunables->timer_slack_val >= 0 && + pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(tunables->timer_slack_val); + pcpu->cpu_slack_timer.expires = expires; + add_timer_on(&pcpu->cpu_slack_timer, cpu); + } + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp, + tunables->io_is_busy); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +static unsigned int freq_to_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, + unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay - 1 && + freq >= tunables->above_hispeed_delay[i+1]; i += 2) + ; + + ret = tunables->above_hispeed_delay[i]; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; +} + +static unsigned int freq_to_targetload( + struct cpufreq_interactive_tunables *tunables, unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads - 1 && + freq >= tunables->target_loads[i+1]; i += 2) + ; + + ret = tunables->target_loads[i]; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + +/* + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. + */ +static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu, + unsigned int loadadjfreq) +{ + unsigned int freq = pcpu->policy->cur; + unsigned int prevfreq, freqmin, freqmax; + unsigned int tl; + int index; + + freqmin = 0; + freqmax = UINT_MAX; + + do { + prevfreq = freq; + tl = freq_to_targetload(pcpu->policy->governor_data, freq); + + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, loadadjfreq / tl, + CPUFREQ_RELATION_L, &index)) + break; + freq = pcpu->freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low. */ + freqmin = prevfreq; + + if (freq >= freqmax) { + /* + * Find the highest frequency that is less + * than freqmax. + */ + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmax - 1, CPUFREQ_RELATION_H, + &index)) + break; + freq = pcpu->freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax + * has already been found to be too + * low. freqmax is the lowest speed + * we found that is fast enough. + */ + freq = freqmax; + break; + } + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq <= freqmin) { + /* + * Find the lowest frequency that is higher + * than freqmin. + */ + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmin + 1, CPUFREQ_RELATION_L, + &index)) + break; + freq = pcpu->freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; + } + } + + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; +} + +static u64 update_load(int cpu) +{ + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + u64 now; + u64 now_idle; + u64 delta_idle; + u64 delta_time; + u64 active_time; + + now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); + delta_idle = (now_idle - pcpu->time_in_idle); + delta_time = (now - pcpu->time_in_idle_timestamp); + + if (delta_time <= delta_idle) + active_time = 0; + else + active_time = delta_time - delta_idle; + + pcpu->cputime_speedadj += active_time * pcpu->policy->cur; + + pcpu->time_in_idle = now_idle; + pcpu->time_in_idle_timestamp = now; + return now; +} + +static void cpufreq_interactive_timer(unsigned long data) +{ + u64 now; + unsigned int delta_time; + u64 cputime_speedadj; + int cpu_load; + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, data); + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + unsigned int new_freq; + unsigned int loadadjfreq; + unsigned int index; + unsigned long flags; + u64 max_fvtime; + + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) + goto exit; + + spin_lock_irqsave(&pcpu->load_lock, flags); + now = update_load(data); + delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); + cputime_speedadj = pcpu->cputime_speedadj; + spin_unlock_irqrestore(&pcpu->load_lock, flags); + + if (WARN_ON_ONCE(!delta_time)) + goto rearm; + + spin_lock_irqsave(&pcpu->target_freq_lock, flags); + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / pcpu->policy->cur; + tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime; + + if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { + if (pcpu->policy->cur < tunables->hispeed_freq) { + new_freq = tunables->hispeed_freq; + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + + if (new_freq < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + if (new_freq > tunables->hispeed_freq && + pcpu->policy->cur < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } + + if (pcpu->policy->cur >= tunables->hispeed_freq && + new_freq > pcpu->policy->cur && + now - pcpu->pol_hispeed_val_time < + freq_to_above_hispeed_delay(tunables, pcpu->policy->cur)) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + pcpu->loc_hispeed_val_time = now; + + if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, + new_freq, CPUFREQ_RELATION_L, + &index)) { + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + new_freq = pcpu->freq_table[index].frequency; + + /* + * Do not scale below floor_freq unless we have been at or above the + * floor frequency for the minimum sample time since last validated. + */ + max_fvtime = max(pcpu->pol_floor_val_time, pcpu->loc_floor_val_time); + if (new_freq < pcpu->floor_freq && + pcpu->target_freq >= pcpu->policy->cur) { + if (now - max_fvtime < tunables->min_sample_time) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + } + + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!tunables->boosted || new_freq > tunables->hispeed_freq) { + pcpu->floor_freq = new_freq; + if (pcpu->target_freq >= pcpu->policy->cur || + new_freq >= pcpu->policy->cur) + pcpu->loc_floor_val_time = now; + } + + if (pcpu->target_freq == new_freq && + pcpu->target_freq <= pcpu->policy->cur) { + trace_cpufreq_interactive_already( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + + pcpu->target_freq = new_freq; + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + cpumask_set_cpu(data, &speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + wake_up_process(speedchange_task); + +rearm: + if (!timer_pending(&pcpu->cpu_timer)) + cpufreq_interactive_timer_resched(pcpu); + +exit: + up_read(&pcpu->enable_sem); + return; +} + +static void cpufreq_interactive_idle_end(void) +{ + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } + + /* Arm the timer for 1-2 ticks later if not already. */ + if (!timer_pending(&pcpu->cpu_timer)) { + cpufreq_interactive_timer_resched(pcpu); + } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { + del_timer(&pcpu->cpu_timer); + del_timer(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer(smp_processor_id()); + } + + up_read(&pcpu->enable_sem); +} + +static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, + unsigned int *pmax_freq, + u64 *phvt, u64 *pfvt) +{ + struct cpufreq_interactive_cpuinfo *pcpu; + unsigned int max_freq = 0; + u64 hvt = ~0ULL, fvt = 0; + unsigned int i; + + for_each_cpu(i, policy->cpus) { + pcpu = &per_cpu(cpuinfo, i); + + fvt = max(fvt, pcpu->loc_floor_val_time); + if (pcpu->target_freq > max_freq) { + max_freq = pcpu->target_freq; + hvt = pcpu->loc_hispeed_val_time; + } else if (pcpu->target_freq == max_freq) { + hvt = min(hvt, pcpu->loc_hispeed_val_time); + } + } + + *pmax_freq = max_freq; + *phvt = hvt; + *pfvt = fvt; +} + +static void cpufreq_interactive_adjust_cpu(unsigned int cpu, + struct cpufreq_policy *policy) +{ + struct cpufreq_interactive_cpuinfo *pcpu; + u64 hvt, fvt; + unsigned int max_freq; + int i; + + cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt); + + for_each_cpu(i, policy->cpus) { + pcpu = &per_cpu(cpuinfo, i); + pcpu->pol_floor_val_time = fvt; + } + + if (max_freq != policy->cur) { + __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H); + for_each_cpu(i, policy->cpus) { + pcpu = &per_cpu(cpuinfo, i); + pcpu->pol_hispeed_val_time = hvt; + } + } + + trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur); +} + +static int cpufreq_interactive_speedchange_task(void *data) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + struct cpufreq_interactive_cpuinfo *pcpu; + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, + flags); + schedule(); + + if (kthread_should_stop()) + break; + + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + } + + set_current_state(TASK_RUNNING); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + pcpu = &per_cpu(cpuinfo, cpu); + + down_write(&pcpu->policy->rwsem); + + if (likely(down_read_trylock(&pcpu->enable_sem))) { + if (likely(pcpu->governor_enabled)) + cpufreq_interactive_adjust_cpu(cpu, + pcpu->policy); + up_read(&pcpu->enable_sem); + } + + up_write(&pcpu->policy->rwsem); + } + } + + return 0; +} + +static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables) +{ + int i; + int anyboost = 0; + unsigned long flags[2]; + struct cpufreq_interactive_cpuinfo *pcpu; + + tunables->boosted = true; + + spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); + + for_each_online_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + + if (!down_read_trylock(&pcpu->enable_sem)) + continue; + + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + continue; + } + + if (tunables != pcpu->policy->governor_data) { + up_read(&pcpu->enable_sem); + continue; + } + + spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); + if (pcpu->target_freq < tunables->hispeed_freq) { + pcpu->target_freq = tunables->hispeed_freq; + cpumask_set_cpu(i, &speedchange_cpumask); + pcpu->pol_hispeed_val_time = + ktime_to_us(ktime_get()); + anyboost = 1; + } + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); + + up_read(&pcpu->enable_sem); + } + + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); + + if (anyboost) + wake_up_process(speedchange_task); +} + +static int cpufreq_interactive_notifier( + struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpufreq_interactive_cpuinfo *pcpu; + int cpu; + unsigned long flags; + + if (val == CPUFREQ_POSTCHANGE) { + pcpu = &per_cpu(cpuinfo, freq->cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + return 0; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return 0; + } + + for_each_cpu(cpu, pcpu->policy->cpus) { + struct cpufreq_interactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, cpu); + if (cpu != freq->cpu) { + if (!down_read_trylock(&pjcpu->enable_sem)) + continue; + if (!pjcpu->governor_enabled) { + up_read(&pjcpu->enable_sem); + continue; + } + } + spin_lock_irqsave(&pjcpu->load_lock, flags); + update_load(cpu); + spin_unlock_irqrestore(&pjcpu->load_lock, flags); + if (cpu != freq->cpu) + up_read(&pjcpu->enable_sem); + } + + up_read(&pcpu->enable_sem); + } + return 0; +} + +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) +{ + const char *cp; + int i; + int ntokens = 1; + unsigned int *tokenized_data; + int err = -EINVAL; + + cp = buf; + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; + + if (!(ntokens & 0x1)) + goto err; + + tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + if (!tokenized_data) { + err = -ENOMEM; + goto err; + } + + cp = buf; + i = 0; + while (i < ntokens) { + if (sscanf(cp, "%u", &tokenized_data[i++]) != 1) + goto err_kfree; + + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } + + if (i != ntokens) + goto err_kfree; + + *num_tokens = ntokens; + return tokenized_data; + +err_kfree: + kfree(tokenized_data); +err: + return ERR_PTR(err); +} + +static ssize_t show_target_loads( + struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + +static ssize_t store_target_loads( + struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ntokens; + unsigned int *new_target_loads = NULL; + unsigned long flags; + + new_target_loads = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_target_loads)) + return PTR_RET(new_target_loads); + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + if (tunables->target_loads != default_target_loads) + kfree(tunables->target_loads); + tunables->target_loads = new_target_loads; + tunables->ntarget_loads = ntokens; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return count; +} + +static ssize_t show_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay; i++) + ret += sprintf(buf + ret, "%u%s", + tunables->above_hispeed_delay[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; +} + +static ssize_t store_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ntokens; + unsigned int *new_above_hispeed_delay = NULL; + unsigned long flags; + + new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_above_hispeed_delay)) + return PTR_RET(new_above_hispeed_delay); + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + if (tunables->above_hispeed_delay != default_above_hispeed_delay) + kfree(tunables->above_hispeed_delay); + tunables->above_hispeed_delay = new_above_hispeed_delay; + tunables->nabove_hispeed_delay = ntokens; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return count; + +} + +static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%u\n", tunables->hispeed_freq); +} + +static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + long unsigned int val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->hispeed_freq = val; + return count; +} + +static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%lu\n", tunables->go_hispeed_load); +} + +static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->go_hispeed_load = val; + return count; +} + +static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%lu\n", tunables->min_sample_time); +} + +static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->min_sample_time = val; + return count; +} + +static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%lu\n", tunables->timer_rate); +} + +static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val, val_round; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + val_round = jiffies_to_usecs(usecs_to_jiffies(val)); + if (val != val_round) + pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n", + val_round); + + tunables->timer_rate = val_round; + return count; +} + +static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%d\n", tunables->timer_slack_val); +} + +static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + tunables->timer_slack_val = val; + return count; +} + +static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%d\n", tunables->boost_val); +} + +static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boost_val = val; + + if (tunables->boost_val) { + trace_cpufreq_interactive_boost("on"); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); + } else { + tunables->boostpulse_endtime = ktime_to_us(ktime_get()); + trace_cpufreq_interactive_unboost("off"); + } + + return count; +} + +static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + + tunables->boostpulse_duration_val; + trace_cpufreq_interactive_boost("pulse"); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); + return count; +} + +static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%d\n", tunables->boostpulse_duration_val); +} + +static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_duration_val = val; + return count; +} + +static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%u\n", tunables->io_is_busy); +} + +static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->io_is_busy = val; + return count; +} + +/* + * Create show/store routines + * - sys: One governor instance for complete SYSTEM + * - pol: One governor instance per struct cpufreq_policy + */ +#define show_gov_pol_sys(file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return show_##file_name(common_tunables, buf); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + return show_##file_name(policy->governor_data, buf); \ +} + +#define store_gov_pol_sys(file_name) \ +static ssize_t store_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, const char *buf, \ + size_t count) \ +{ \ + return store_##file_name(common_tunables, buf, count); \ +} \ + \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + return store_##file_name(policy->governor_data, buf, count); \ +} + +#define show_store_gov_pol_sys(file_name) \ +show_gov_pol_sys(file_name); \ +store_gov_pol_sys(file_name) + +show_store_gov_pol_sys(target_loads); +show_store_gov_pol_sys(above_hispeed_delay); +show_store_gov_pol_sys(hispeed_freq); +show_store_gov_pol_sys(go_hispeed_load); +show_store_gov_pol_sys(min_sample_time); +show_store_gov_pol_sys(timer_rate); +show_store_gov_pol_sys(timer_slack); +show_store_gov_pol_sys(boost); +store_gov_pol_sys(boostpulse); +show_store_gov_pol_sys(boostpulse_duration); +show_store_gov_pol_sys(io_is_busy); + +#define gov_sys_attr_rw(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) + +#define gov_pol_attr_rw(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define gov_sys_pol_attr_rw(_name) \ + gov_sys_attr_rw(_name); \ + gov_pol_attr_rw(_name) + +gov_sys_pol_attr_rw(target_loads); +gov_sys_pol_attr_rw(above_hispeed_delay); +gov_sys_pol_attr_rw(hispeed_freq); +gov_sys_pol_attr_rw(go_hispeed_load); +gov_sys_pol_attr_rw(min_sample_time); +gov_sys_pol_attr_rw(timer_rate); +gov_sys_pol_attr_rw(timer_slack); +gov_sys_pol_attr_rw(boost); +gov_sys_pol_attr_rw(boostpulse_duration); +gov_sys_pol_attr_rw(io_is_busy); + +static struct global_attr boostpulse_gov_sys = + __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys); + +static struct freq_attr boostpulse_gov_pol = + __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol); + +/* One Governor instance for entire system */ +static struct attribute *interactive_attributes_gov_sys[] = { + &target_loads_gov_sys.attr, + &above_hispeed_delay_gov_sys.attr, + &hispeed_freq_gov_sys.attr, + &go_hispeed_load_gov_sys.attr, + &min_sample_time_gov_sys.attr, + &timer_rate_gov_sys.attr, + &timer_slack_gov_sys.attr, + &boost_gov_sys.attr, + &boostpulse_gov_sys.attr, + &boostpulse_duration_gov_sys.attr, + &io_is_busy_gov_sys.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group_gov_sys = { + .attrs = interactive_attributes_gov_sys, + .name = "interactive", +}; + +/* Per policy governor instance */ +static struct attribute *interactive_attributes_gov_pol[] = { + &target_loads_gov_pol.attr, + &above_hispeed_delay_gov_pol.attr, + &hispeed_freq_gov_pol.attr, + &go_hispeed_load_gov_pol.attr, + &min_sample_time_gov_pol.attr, + &timer_rate_gov_pol.attr, + &timer_slack_gov_pol.attr, + &boost_gov_pol.attr, + &boostpulse_gov_pol.attr, + &boostpulse_duration_gov_pol.attr, + &io_is_busy_gov_pol.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group_gov_pol = { + .attrs = interactive_attributes_gov_pol, + .name = "interactive", +}; + +static struct attribute_group *get_sysfs_attr(void) +{ + if (have_governor_per_policy()) + return &interactive_attr_group_gov_pol; + else + return &interactive_attr_group_gov_sys; +} + +static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + if (val == IDLE_END) + cpufreq_interactive_idle_end(); + + return 0; +} + +static struct notifier_block cpufreq_interactive_idle_nb = { + .notifier_call = cpufreq_interactive_idle_notifier, +}; + +static int cpufreq_governor_interactive(struct cpufreq_policy *policy, + unsigned int event) +{ + int rc; + unsigned int j; + struct cpufreq_interactive_cpuinfo *pcpu; + struct cpufreq_frequency_table *freq_table; + struct cpufreq_interactive_tunables *tunables; + unsigned long flags; + + if (have_governor_per_policy()) + tunables = policy->governor_data; + else + tunables = common_tunables; + + if (WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT))) + return -EINVAL; + + switch (event) { + case CPUFREQ_GOV_POLICY_INIT: + if (have_governor_per_policy()) { + WARN_ON(tunables); + } else if (tunables) { + tunables->usage_count++; + policy->governor_data = tunables; + return 0; + } + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (!tunables) { + pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); + return -ENOMEM; + } + + tunables->usage_count = 1; + tunables->above_hispeed_delay = default_above_hispeed_delay; + tunables->nabove_hispeed_delay = + ARRAY_SIZE(default_above_hispeed_delay); + tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; + tunables->target_loads = default_target_loads; + tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); + tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + tunables->timer_rate = DEFAULT_TIMER_RATE; + tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; + tunables->timer_slack_val = DEFAULT_TIMER_SLACK; + + spin_lock_init(&tunables->target_loads_lock); + spin_lock_init(&tunables->above_hispeed_delay_lock); + + policy->governor_data = tunables; + if (!have_governor_per_policy()) { + common_tunables = tunables; + } + + rc = sysfs_create_group(get_governor_parent_kobj(policy), + get_sysfs_attr()); + if (rc) { + kfree(tunables); + policy->governor_data = NULL; + if (!have_governor_per_policy()) { + common_tunables = NULL; + } + return rc; + } + + if (!policy->governor->initialized) { + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + break; + + case CPUFREQ_GOV_POLICY_EXIT: + if (!--tunables->usage_count) { + if (policy->governor->initialized == 1) { + cpufreq_unregister_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + idle_notifier_unregister(&cpufreq_interactive_idle_nb); + } + + sysfs_remove_group(get_governor_parent_kobj(policy), + get_sysfs_attr()); + + kfree(tunables); + common_tunables = NULL; + } + + policy->governor_data = NULL; + break; + + case CPUFREQ_GOV_START: + mutex_lock(&gov_lock); + + freq_table = cpufreq_frequency_get_table(policy->cpu); + if (!tunables->hispeed_freq) + tunables->hispeed_freq = policy->max; + + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + pcpu->policy = policy; + pcpu->target_freq = policy->cur; + pcpu->freq_table = freq_table; + pcpu->floor_freq = pcpu->target_freq; + pcpu->pol_floor_val_time = + ktime_to_us(ktime_get()); + pcpu->loc_floor_val_time = pcpu->pol_floor_val_time; + pcpu->pol_hispeed_val_time = pcpu->pol_floor_val_time; + pcpu->loc_hispeed_val_time = pcpu->pol_floor_val_time; + down_write(&pcpu->enable_sem); + del_timer_sync(&pcpu->cpu_timer); + del_timer_sync(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer_start(tunables, j); + pcpu->governor_enabled = 1; + up_write(&pcpu->enable_sem); + } + + mutex_unlock(&gov_lock); + break; + + case CPUFREQ_GOV_STOP: + mutex_lock(&gov_lock); + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + down_write(&pcpu->enable_sem); + pcpu->governor_enabled = 0; + del_timer_sync(&pcpu->cpu_timer); + del_timer_sync(&pcpu->cpu_slack_timer); + up_write(&pcpu->enable_sem); + } + + mutex_unlock(&gov_lock); + break; + + case CPUFREQ_GOV_LIMITS: + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, + policy->min, CPUFREQ_RELATION_L); + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + + down_read(&pcpu->enable_sem); + if (pcpu->governor_enabled == 0) { + up_read(&pcpu->enable_sem); + continue; + } + + spin_lock_irqsave(&pcpu->target_freq_lock, flags); + if (policy->max < pcpu->target_freq) + pcpu->target_freq = policy->max; + else if (policy->min > pcpu->target_freq) + pcpu->target_freq = policy->min; + + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + up_read(&pcpu->enable_sem); + } + break; + } + return 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_interactive = { + .name = "interactive", + .governor = cpufreq_governor_interactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, +}; + +static void cpufreq_interactive_nop_timer(unsigned long data) +{ +} + +static int __init cpufreq_interactive_init(void) +{ + unsigned int i; + struct cpufreq_interactive_cpuinfo *pcpu; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + int ret = 0; + + /* Initalize per-cpu timers */ + for_each_possible_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + init_timer_deferrable(&pcpu->cpu_timer); + pcpu->cpu_timer.function = cpufreq_interactive_timer; + pcpu->cpu_timer.data = i; + init_timer(&pcpu->cpu_slack_timer); + pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; + spin_lock_init(&pcpu->load_lock); + spin_lock_init(&pcpu->target_freq_lock); + init_rwsem(&pcpu->enable_sem); + } + + spin_lock_init(&speedchange_cpumask_lock); + mutex_init(&gov_lock); + speedchange_task = + kthread_create(cpufreq_interactive_speedchange_task, NULL, + "cfinteractive"); + if (IS_ERR(speedchange_task)) + return PTR_ERR(speedchange_task); + + sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); + get_task_struct(speedchange_task); + + /* NB: wake up so the thread does not look hung to the freezer */ + wake_up_process(speedchange_task); + + ret = cpufreq_register_governor(&cpufreq_gov_interactive); + if (ret) { + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); + } + return ret; +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +fs_initcall(cpufreq_interactive_init); +#else +module_init(cpufreq_interactive_init); +#endif + +static void __exit cpufreq_interactive_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_interactive); + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); +} + +module_exit(cpufreq_interactive_exit); + +MODULE_AUTHOR("Mike Chan <mike@android.com>"); +MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for " + "Latency sensitive workloads"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c new file mode 100644 index 000000000000..e7a8b636a5f4 --- /dev/null +++ b/drivers/cpufreq/cpufreq_times.c @@ -0,0 +1,464 @@ +/* drivers/cpufreq/cpufreq_times.c + * + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/cpufreq.h> +#include <linux/cpufreq_times.h> +#include <linux/cputime.h> +#include <linux/hashtable.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/threads.h> + +#define UID_HASH_BITS 10 + +DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS); + +static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ +static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */ + +struct uid_entry { + uid_t uid; + unsigned int max_state; + struct hlist_node hash; + struct rcu_head rcu; + u64 time_in_state[0]; +}; + +/** + * struct cpu_freqs - per-cpu frequency information + * @offset: start of these freqs' stats in task time_in_state array + * @max_state: number of entries in freq_table + * @last_index: index in freq_table of last frequency switched to + * @freq_table: list of available frequencies + */ +struct cpu_freqs { + unsigned int offset; + unsigned int max_state; + unsigned int last_index; + unsigned int freq_table[0]; +}; + +static struct cpu_freqs *all_freqs[NR_CPUS]; + +static unsigned int next_offset; + + +/* Caller must hold rcu_read_lock() */ +static struct uid_entry *find_uid_entry_rcu(uid_t uid) +{ + struct uid_entry *uid_entry; + + hash_for_each_possible_rcu(uid_hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +/* Caller must hold uid lock */ +static struct uid_entry *find_uid_entry_locked(uid_t uid) +{ + struct uid_entry *uid_entry; + + hash_for_each_possible(uid_hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +/* Caller must hold uid lock */ +static struct uid_entry *find_or_register_uid_locked(uid_t uid) +{ + struct uid_entry *uid_entry, *temp; + unsigned int max_state = READ_ONCE(next_offset); + size_t alloc_size = sizeof(*uid_entry) + max_state * + sizeof(uid_entry->time_in_state[0]); + + uid_entry = find_uid_entry_locked(uid); + if (uid_entry) { + if (uid_entry->max_state == max_state) + return uid_entry; + /* uid_entry->time_in_state is too small to track all freqs, so + * expand it. + */ + temp = __krealloc(uid_entry, alloc_size, GFP_ATOMIC); + if (!temp) + return uid_entry; + temp->max_state = max_state; + memset(temp->time_in_state + uid_entry->max_state, 0, + (max_state - uid_entry->max_state) * + sizeof(uid_entry->time_in_state[0])); + if (temp != uid_entry) { + hlist_replace_rcu(&uid_entry->hash, &temp->hash); + kfree_rcu(uid_entry, rcu); + } + return temp; + } + + uid_entry = kzalloc(alloc_size, GFP_ATOMIC); + if (!uid_entry) + return NULL; + + uid_entry->uid = uid; + uid_entry->max_state = max_state; + + hash_add_rcu(uid_hash_table, &uid_entry->hash, uid); + + return uid_entry; +} + +static bool freq_index_invalid(unsigned int index) +{ + unsigned int cpu; + struct cpu_freqs *freqs; + + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || index < freqs->offset || + freqs->offset + freqs->max_state <= index) + continue; + return freqs->freq_table[index - freqs->offset] == + CPUFREQ_ENTRY_INVALID; + } + return true; +} + +static int single_uid_time_in_state_show(struct seq_file *m, void *ptr) +{ + struct uid_entry *uid_entry; + unsigned int i; + u64 time; + uid_t uid = from_kuid_munged(current_user_ns(), *(kuid_t *)m->private); + + if (uid == overflowuid) + return -EINVAL; + + rcu_read_lock(); + + uid_entry = find_uid_entry_rcu(uid); + if (!uid_entry) { + rcu_read_unlock(); + return 0; + } + + for (i = 0; i < uid_entry->max_state; ++i) { + if (freq_index_invalid(i)) + continue; + time = cputime_to_clock_t(uid_entry->time_in_state[i]); + seq_write(m, &time, sizeof(time)); + } + + rcu_read_unlock(); + + return 0; +} + +static void *uid_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= HASH_SIZE(uid_hash_table)) + return NULL; + + return &uid_hash_table[*pos]; +} + +static void *uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= HASH_SIZE(uid_hash_table)) + return NULL; + + return &uid_hash_table[*pos]; +} + +static void uid_seq_stop(struct seq_file *seq, void *v) { } + +static int uid_time_in_state_seq_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry; + struct cpu_freqs *freqs, *last_freqs = NULL; + int i, cpu; + + if (v == uid_hash_table) { + seq_puts(m, "uid:"); + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || freqs == last_freqs) + continue; + last_freqs = freqs; + for (i = 0; i < freqs->max_state; i++) { + if (freqs->freq_table[i] == + CPUFREQ_ENTRY_INVALID) + continue; + seq_printf(m, " %d", freqs->freq_table[i]); + } + } + seq_putc(m, '\n'); + } + + rcu_read_lock(); + + hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) { + if (uid_entry->max_state) + seq_printf(m, "%d:", uid_entry->uid); + for (i = 0; i < uid_entry->max_state; ++i) { + if (freq_index_invalid(i)) + continue; + seq_printf(m, " %lu", (unsigned long)cputime_to_clock_t( + uid_entry->time_in_state[i])); + } + if (uid_entry->max_state) + seq_putc(m, '\n'); + } + + rcu_read_unlock(); + return 0; +} + +void cpufreq_task_times_init(struct task_struct *p) +{ + unsigned long flags; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + p->time_in_state = NULL; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + p->max_state = 0; +} + +void cpufreq_task_times_alloc(struct task_struct *p) +{ + void *temp; + unsigned long flags; + unsigned int max_state = READ_ONCE(next_offset); + + /* We use one array to avoid multiple allocs per task */ + temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC); + if (!temp) + return; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + p->time_in_state = temp; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + p->max_state = max_state; +} + +/* Caller must hold task_time_in_state_lock */ +static int cpufreq_task_times_realloc_locked(struct task_struct *p) +{ + void *temp; + unsigned int max_state = READ_ONCE(next_offset); + + temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC); + if (!temp) + return -ENOMEM; + p->time_in_state = temp; + memset(p->time_in_state + p->max_state, 0, + (max_state - p->max_state) * sizeof(u64)); + p->max_state = max_state; + return 0; +} + +void cpufreq_task_times_exit(struct task_struct *p) +{ + unsigned long flags; + void *temp; + + if (!p->time_in_state) + return; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + temp = p->time_in_state; + p->time_in_state = NULL; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + kfree(temp); +} + +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p) +{ + unsigned int cpu, i; + cputime_t cputime; + unsigned long flags; + struct cpu_freqs *freqs; + struct cpu_freqs *last_freqs = NULL; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || freqs == last_freqs) + continue; + last_freqs = freqs; + + seq_printf(m, "cpu%u\n", cpu); + for (i = 0; i < freqs->max_state; i++) { + if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID) + continue; + cputime = 0; + if (freqs->offset + i < p->max_state && + p->time_in_state) + cputime = p->time_in_state[freqs->offset + i]; + seq_printf(m, "%u %lu\n", freqs->freq_table[i], + (unsigned long)cputime_to_clock_t(cputime)); + } + } + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + return 0; +} + +void cpufreq_acct_update_power(struct task_struct *p, cputime_t cputime) +{ + unsigned long flags; + unsigned int state; + struct uid_entry *uid_entry; + struct cpu_freqs *freqs = all_freqs[task_cpu(p)]; + uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); + + if (!freqs || p->flags & PF_EXITING) + return; + + state = freqs->offset + READ_ONCE(freqs->last_index); + + spin_lock_irqsave(&task_time_in_state_lock, flags); + if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) && + p->time_in_state) + p->time_in_state[state] += cputime; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + + spin_lock_irqsave(&uid_lock, flags); + uid_entry = find_or_register_uid_locked(uid); + if (uid_entry && state < uid_entry->max_state) + uid_entry->time_in_state[state] += cputime; + spin_unlock_irqrestore(&uid_lock, flags); +} + +void cpufreq_times_create_policy(struct cpufreq_policy *policy) +{ + int cpu, index; + unsigned int count = 0; + struct cpufreq_frequency_table *pos, *table; + struct cpu_freqs *freqs; + void *tmp; + + if (all_freqs[policy->cpu]) + return; + + table = cpufreq_frequency_get_table(policy->cpu); + if (!table) + return; + + cpufreq_for_each_entry(pos, table) + count++; + + tmp = kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count, + GFP_KERNEL); + if (!tmp) + return; + + freqs = tmp; + freqs->max_state = count; + + index = cpufreq_frequency_table_get_index(policy, policy->cur); + if (index >= 0) + WRITE_ONCE(freqs->last_index, index); + + cpufreq_for_each_entry(pos, table) + freqs->freq_table[pos - table] = pos->frequency; + + freqs->offset = next_offset; + WRITE_ONCE(next_offset, freqs->offset + count); + for_each_cpu(cpu, policy->related_cpus) + all_freqs[cpu] = freqs; +} + +void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end) +{ + struct uid_entry *uid_entry; + struct hlist_node *tmp; + unsigned long flags; + + spin_lock_irqsave(&uid_lock, flags); + + for (; uid_start <= uid_end; uid_start++) { + hash_for_each_possible_safe(uid_hash_table, uid_entry, tmp, + hash, uid_start) { + if (uid_start == uid_entry->uid) { + hash_del_rcu(&uid_entry->hash); + kfree_rcu(uid_entry, rcu); + } + } + } + + spin_unlock_irqrestore(&uid_lock, flags); +} + +void cpufreq_times_record_transition(struct cpufreq_freqs *freq) +{ + int index; + struct cpu_freqs *freqs = all_freqs[freq->cpu]; + struct cpufreq_policy *policy; + + if (!freqs) + return; + + policy = cpufreq_cpu_get(freq->cpu); + if (!policy) + return; + + index = cpufreq_frequency_table_get_index(policy, freq->new); + if (index >= 0) + WRITE_ONCE(freqs->last_index, index); + + cpufreq_cpu_put(policy); +} + +static const struct seq_operations uid_time_in_state_seq_ops = { + .start = uid_seq_start, + .next = uid_seq_next, + .stop = uid_seq_stop, + .show = uid_time_in_state_seq_show, +}; + +static int uid_time_in_state_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &uid_time_in_state_seq_ops); +} + +int single_uid_time_in_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, single_uid_time_in_state_show, + &(inode->i_uid)); +} + +static const struct file_operations uid_time_in_state_fops = { + .open = uid_time_in_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init cpufreq_times_init(void) +{ + proc_create_data("uid_time_in_state", 0444, NULL, + &uid_time_in_state_fops, NULL); + + return 0; +} + +early_initcall(cpufreq_times_init); |