[PATCH OpenHarmony-5.10 1/3] sched: optimization for Enery Aware Scheduling(EAS)

From: Srinath Sridharan <srinathsr@google.com> ohos inclusion category: feature issue: #I4TNS2 CVE: NA Signed-off-by: Hu Zhaodong <huzhaodong@huawei.com> ------------------------------------------- EAS scheduler optimization Allow running cfs task migration in clock interrupt Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org> Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org> Signed-off-by: Srinath Sridharan <srinathsr@google.com> --- init/Kconfig | 6 ++ kernel/sched/core.c | 35 +++++++++++ kernel/sched/fair.c | 141 ++++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 7 +++ 4 files changed, 188 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 1d248e9c5a89..db7449e779c6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -860,6 +860,12 @@ config UCLAMP_BUCKETS_COUNT source "kernel/sched/rtg/Kconfig" +config SCHED_EAS + bool "EAS scheduler optimization" + default n + help + Check and migrate the CFS process to a more suitable CPU in the tick. + endmenu # diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8e506f6efc73..471b2129ea84 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4134,6 +4134,11 @@ void scheduler_tick(void) #ifdef CONFIG_SMP rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq); + +#ifdef CONFIG_SCHED_EAS + if (curr->sched_class->check_for_migration) + curr->sched_class->check_for_migration(rq, curr); +#endif #endif } @@ -7025,6 +7030,32 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, attach_tasks_core(&tasks, rq); } +#ifdef CONFIG_SCHED_EAS +static void clear_eas_migration_request(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + clear_reserved(cpu); + if (rq->push_task) { + struct task_struct *push_task = NULL; + + raw_spin_lock_irqsave(&rq->lock, flags); + if (rq->push_task) { + clear_reserved(rq->push_cpu); + push_task = rq->push_task; + rq->push_task = NULL; + } + rq->active_balance = 0; + raw_spin_unlock_irqrestore(&rq->lock, flags); + if (push_task) + put_task_struct(push_task); + } +} +#else +static inline void clear_eas_migration_request(int cpu) {} +#endif + #ifdef CONFIG_CPU_ISOLATION_OPT int do_isolation_work_cpu_stop(void *data) { @@ -7058,6 +7089,7 @@ int do_isolation_work_cpu_stop(void *data) set_rq_online(rq); rq_unlock(rq, &rf); + clear_eas_migration_request(cpu); local_irq_enable(); return 0; } @@ -7425,6 +7457,7 @@ int sched_cpu_starting(unsigned int cpu) { sched_rq_cpu_starting(cpu); sched_tick_start(cpu); + clear_eas_migration_request(cpu); return 0; } @@ -7447,6 +7480,8 @@ int sched_cpu_dying(unsigned int cpu) BUG_ON(rq->nr_running != 1); rq_unlock_irqrestore(rq, &rf); + clear_eas_migration_request(cpu); + calc_load_migrate(rq); update_max_interval(); nohz_balance_exit_idle(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b8d6c1dfc30..9789a385fecd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10110,9 +10110,13 @@ static int active_load_balance_cpu_stop(void *data) int busiest_cpu = cpu_of(busiest_rq); int target_cpu = busiest_rq->push_cpu; struct rq *target_rq = cpu_rq(target_cpu); - struct sched_domain *sd; + struct sched_domain *sd = NULL; struct task_struct *p = NULL; struct rq_flags rf; +#ifdef CONFIG_SCHED_EAS + struct task_struct *push_task; + int push_task_detached = 0; +#endif rq_lock_irq(busiest_rq, &rf); /* @@ -10139,6 +10143,32 @@ static int active_load_balance_cpu_stop(void *data) */ BUG_ON(busiest_rq == target_rq); +#ifdef CONFIG_SCHED_EAS + push_task = busiest_rq->push_task; + target_cpu = busiest_rq->push_cpu; + if (push_task) { + struct lb_env env = { + .sd = sd, + .dst_cpu = target_cpu, + .dst_rq = target_rq, + .src_cpu = busiest_rq->cpu, + .src_rq = busiest_rq, + .idle = CPU_IDLE, + .flags = 0, + .loop = 0, + }; + if (task_on_rq_queued(push_task) && + push_task->state == TASK_RUNNING && + task_cpu(push_task) == busiest_cpu && + cpu_online(target_cpu)) { + update_rq_clock(busiest_rq); + detach_task(push_task, &env); + push_task_detached = 1; + } + goto out_unlock; + } +#endif + /* Search for an sd spanning us and the target CPU. */ rcu_read_lock(); for_each_domain(target_cpu, sd) { @@ -10178,8 +10208,23 @@ static int active_load_balance_cpu_stop(void *data) rcu_read_unlock(); out_unlock: busiest_rq->active_balance = 0; + +#ifdef CONFIG_SCHED_EAS + push_task = busiest_rq->push_task; + if (push_task) + busiest_rq->push_task = NULL; +#endif rq_unlock(busiest_rq, &rf); +#ifdef CONFIG_SCHED_EAS + if (push_task) { + if (push_task_detached) + attach_one_task(target_rq, push_task); + + put_task_struct(push_task); + } +#endif + if (p) attach_one_task(target_rq, p); @@ -10979,6 +11024,97 @@ static void rq_offline_fair(struct rq *rq) unthrottle_offline_cfs_rqs(rq); } +#ifdef CONFIG_SCHED_EAS +static inline int +kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu) +{ + unsigned long flags; + int rc = 0; + + if (cpu_of(rq) == new_cpu) + return rc; + + /* Invoke active balance to force migrate currently running task */ + raw_spin_lock_irqsave(&rq->lock, flags); + if (!rq->active_balance) { + rq->active_balance = 1; + rq->push_cpu = new_cpu; + get_task_struct(p); + rq->push_task = p; + rc = 1; + } + raw_spin_unlock_irqrestore(&rq->lock, flags); + return rc; +} + +DEFINE_RAW_SPINLOCK(migration_lock); +void check_for_migration_fair(struct rq *rq, struct task_struct *p) +{ + int active_balance; + int new_cpu = -1; + int prev_cpu = task_cpu(p); + int ret; + +#ifdef CONFIG_SCHED_RTG + bool need_down_migrate = false; + struct cpumask *rtg_target = find_rtg_target(p); + + if (rtg_target && + (capacity_orig_of(prev_cpu) > + capacity_orig_of(cpumask_first(rtg_target)))) + need_down_migrate = true; +#endif + + if (rq->misfit_task_load) { + if (rq->curr->state != TASK_RUNNING || + rq->curr->nr_cpus_allowed == 1) + return; + + raw_spin_lock(&migration_lock); +#ifdef CONFIG_SCHED_RTG + if (rtg_target) { + new_cpu = find_rtg_cpu(p); + + if (new_cpu != -1 && need_down_migrate && + cpumask_test_cpu(new_cpu, rtg_target) && + idle_cpu(new_cpu)) + goto do_active_balance; + + if (new_cpu != -1 && + capacity_orig_of(new_cpu) > capacity_orig_of(prev_cpu)) + goto do_active_balance; + + goto out_unlock; + } +#endif + rcu_read_lock(); + new_cpu = find_energy_efficient_cpu(p, prev_cpu); + rcu_read_unlock(); + + if (new_cpu == -1 || + capacity_orig_of(new_cpu) <= capacity_orig_of(prev_cpu)) + goto out_unlock; +#ifdef CONFIG_SCHED_RTG +do_active_balance: +#endif + active_balance = kick_active_balance(rq, p, new_cpu); + if (active_balance) { + mark_reserved(new_cpu); + raw_spin_unlock(&migration_lock); + ret = stop_one_cpu_nowait(prev_cpu, + active_load_balance_cpu_stop, rq, + &rq->active_balance_work); + if (!ret) + clear_reserved(new_cpu); + else + wake_up_if_idle(new_cpu); + return; + } +out_unlock: + raw_spin_unlock(&migration_lock); + } +} +#endif /* CONFIG_SCHED_EAS */ #endif /* CONFIG_SMP */ /* @@ -11530,6 +11666,9 @@ const struct sched_class fair_sched_class #ifdef CONFIG_SCHED_WALT .fixup_walt_sched_stats = walt_fixup_sched_stats_fair, #endif +#ifdef CONFIG_SCHED_EAS + .check_for_migration = check_for_migration_fair, +#endif }; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d79744dcc048..1a4f1806eb78 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1047,6 +1047,9 @@ struct rq { /* For active balancing */ int active_balance; int push_cpu; +#ifdef CONFIG_SCHED_EAS + struct task_struct *push_task; +#endif struct cpu_stop_work active_balance_work; /* CPU of this runqueue: */ @@ -1925,6 +1928,9 @@ struct sched_class { void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p, u16 updated_demand_scaled); #endif +#ifdef CONFIG_SCHED_EAS + void (*check_for_migration)(struct rq *rq, struct task_struct *p); +#endif } __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */ static inline void put_prev_task(struct rq *rq, struct task_struct *prev) @@ -2768,6 +2774,7 @@ extern bool task_fits_max(struct task_struct *p, int cpu); extern unsigned long capacity_spare_without(int cpu, struct task_struct *p); extern int update_preferred_cluster(struct related_thread_group *grp, struct task_struct *p, u32 old_load, bool from_tick); +extern struct cpumask *find_rtg_target(struct task_struct *p); #endif #ifdef CONFIG_SCHED_WALT -- 2.25.1

From: Hu Zhaodong <huzhaodong@huawei.com> ohos inclusion category: feature issue: #I4TNS2 CVE: NA Signed-off-by: Hu Zhaodong <huzhaodong@huawei.com> ------------------------------------------- allow migrating running rt task in clock interrupt Signed-off-by: gaochao <gaochao49@huawei.com> --- include/linux/sched/sysctl.h | 4 ++ init/Kconfig | 7 +++ kernel/sched/rt.c | 90 ++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 7 +++ kernel/sysctl.c | 9 ++++ 5 files changed, 117 insertions(+) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 210909cd4141..acec3b1fd469 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -41,6 +41,10 @@ sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +#ifdef CONFIG_SCHED_RT_ACTIVE_LB +extern unsigned int sysctl_sched_enable_rt_active_lb; +#endif + enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_LOG, diff --git a/init/Kconfig b/init/Kconfig index db7449e779c6..57554d795040 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -866,6 +866,13 @@ config SCHED_EAS help Check and migrate the CFS process to a more suitable CPU in the tick. +config SCHED_RT_ACTIVE_LB + bool "RT Capacity Aware Misfit Task" + depends on SCHED_EAS + default n + help + Check and migrate the RT process to a more suitable CPU in the tick. + endmenu # diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 6c1475950441..9adcbf0e0bee 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -17,6 +17,10 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; +#ifdef CONFIG_SCHED_RT_ACTIVE_LB +unsigned int sysctl_sched_enable_rt_active_lb = 1; +#endif + static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) { struct rt_bandwidth *rt_b = @@ -2443,6 +2447,89 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) } } +#ifdef CONFIG_SCHED_RT_ACTIVE_LB +static int rt_active_load_balance_cpu_stop(void *data) +{ + struct rq *busiest_rq = data; + struct task_struct *next_task = busiest_rq->rt_push_task; + struct rq *lowest_rq = NULL; + unsigned long flags; + + raw_spin_lock_irqsave(&busiest_rq->lock, flags); + busiest_rq->rt_active_balance = 0; + + /* find_lock_lowest_rq locks the rq if found */ + lowest_rq = find_lock_lowest_rq(next_task, busiest_rq); + if (!lowest_rq) + goto out; + + if (capacity_orig_of(cpu_of(lowest_rq)) <= capacity_orig_of(task_cpu(next_task))) + goto unlock; + + deactivate_task(busiest_rq, next_task, 0); + set_task_cpu(next_task, lowest_rq->cpu); + activate_task(lowest_rq, next_task, 0); + + resched_curr(lowest_rq); +unlock: + double_unlock_balance(busiest_rq, lowest_rq); +out: + put_task_struct(next_task); + raw_spin_unlock_irqrestore(&busiest_rq->lock, flags); + + return 0; +} + +void check_for_migration_rt(struct rq *rq, struct task_struct *p) +{ + bool need_actvie_lb = false; + bool misfit_task = false; + int cpu = task_cpu(p); + unsigned long cpu_orig_cap; +#ifdef CONFIG_SCHED_RTG + struct cpumask *rtg_target = NULL; +#endif + + if (!sysctl_sched_enable_rt_active_lb) + return; + + if (p->nr_cpus_allowed == 1) + return; + + cpu_orig_cap = capacity_orig_of(cpu); + /* cpu has max capacity, no need to do balance */ + if (cpu_orig_cap == rq->rd->max_cpu_capacity) + return; + +#ifdef CONFIG_SCHED_RTG + rtg_target = find_rtg_target(p); + if (rtg_target) + misfit_task = capacity_orig_of(cpumask_first(rtg_target)) > + cpu_orig_cap; + else + misfit_task = !rt_task_fits_capacity(p, cpu); +#else + misfit_task = !rt_task_fits_capacity(p, cpu); +#endif + + if (misfit_task) { + raw_spin_lock(&rq->lock); + if (!rq->active_balance && !rq->rt_active_balance) { + rq->rt_active_balance = 1; + rq->rt_push_task = p; + get_task_struct(p); + need_actvie_lb = true; + } + raw_spin_unlock(&rq->lock); + + if (need_actvie_lb) + stop_one_cpu_nowait(task_cpu(p), + rt_active_load_balance_cpu_stop, + rq, &rq->rt_active_balance_work); + } +} +#endif + static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) { /* @@ -2491,6 +2578,9 @@ const struct sched_class rt_sched_class #ifdef CONFIG_SCHED_WALT .fixup_walt_sched_stats = fixup_walt_sched_stats_common, #endif +#ifdef CONFIG_SCHED_RT_ACTIVE_LB + .check_for_migration = check_for_migration_rt, +#endif }; #ifdef CONFIG_RT_GROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1a4f1806eb78..09ad491bed45 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1052,6 +1052,13 @@ struct rq { #endif struct cpu_stop_work active_balance_work; + /* For rt active balancing */ +#ifdef CONFIG_SCHED_RT_ACTIVE_LB + int rt_active_balance; + struct task_struct *rt_push_task; + struct cpu_stop_work rt_active_balance_work; +#endif + /* CPU of this runqueue: */ int cpu; int online; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f13b9e456f50..d5fef7aba276 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1659,6 +1659,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_RT_ACTIVE_LB + { + .procname = "sched_enable_rt_active_lb", + .data = &sysctl_sched_enable_rt_active_lb, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_SCHED_WALT { .procname = "sched_use_walt_cpu_util", -- 2.25.1

From: Hu Zhaodong <huzhaodong@huawei.com> ohos inclusion category: feature issue: #I4TNS2 CVE: NA Signed-off-by: Hu Zhaodong <huzhaodong@huawei.com> ------------------------------------------- RT task detects capacity during CPU selection Signed-off-by: gaochao <gaochao49@huawei.com> --- include/linux/sched/sysctl.h | 3 + include/trace/events/eas_sched.h | 76 +++++++++++++ include/trace/events/sched.h | 4 + init/Kconfig | 7 ++ kernel/sched/fair.c | 8 ++ kernel/sched/rt.c | 177 +++++++++++++++++++++++++++++++ kernel/sched/sched.h | 17 +++ kernel/sched/topology.c | 14 +++ kernel/sysctl.c | 9 ++ 9 files changed, 315 insertions(+) create mode 100644 include/trace/events/eas_sched.h diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index acec3b1fd469..a08551ebd23d 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -41,6 +41,9 @@ sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +#ifdef CONFIG_SCHED_RT_CAS +extern unsigned int sysctl_sched_enable_rt_cas; +#endif #ifdef CONFIG_SCHED_RT_ACTIVE_LB extern unsigned int sysctl_sched_enable_rt_active_lb; #endif diff --git a/include/trace/events/eas_sched.h b/include/trace/events/eas_sched.h new file mode 100644 index 000000000000..bd24c9ef5b6e --- /dev/null +++ b/include/trace/events/eas_sched.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_SCHED_RT_CAS +TRACE_EVENT(sched_find_cas_cpu_each, + + TP_PROTO(struct task_struct *task, int cpu, int target_cpu, + int isolated, int idle, unsigned long task_util, + unsigned long cpu_util, int cpu_cap), + + TP_ARGS(task, cpu, target_cpu, isolated, idle, task_util, cpu_util, cpu_cap), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, prio) + __field(int, cpu) + __field(int, target_cpu) + __field(int, isolated) + __field(unsigned long, idle) + __field(unsigned long, task_util) + __field(unsigned long, cpu_util) + __field(unsigned long, cpu_cap) + ), + + TP_fast_assign( + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + __entry->prio = task->prio; + __entry->cpu = cpu; + __entry->target_cpu = target_cpu; + __entry->isolated = isolated; + __entry->idle = idle; + __entry->task_util = task_util; + __entry->cpu_util = cpu_util; + __entry->cpu_cap = cpu_cap; + ), + + TP_printk("comm=%s pid=%d prio=%d cpu=%d target_cpu=%d isolated=%d idle=%d task_util=%lu cpu_util=%lu cpu_cap=%lu", + __entry->comm, __entry->pid, __entry->prio, + __entry->cpu, __entry->target_cpu, __entry->isolated, + __entry->idle, __entry->task_util, + __entry->cpu_util, __entry->cpu_cap) +); + +TRACE_EVENT(sched_find_cas_cpu, + + TP_PROTO(struct task_struct *task, struct cpumask *lowest_mask, + unsigned long tutil, int prev_cpu, int target_cpu), + + TP_ARGS(task, lowest_mask, tutil, prev_cpu, target_cpu), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(unsigned int, prio) + __bitmask(lowest, num_possible_cpus()) + __field(unsigned long, tutil) + __field(int, prev_cpu) + __field(int, target_cpu) + ), + + TP_fast_assign( + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + __entry->prio = task->prio; + __assign_bitmask(lowest, cpumask_bits(lowest_mask), num_possible_cpus()); + __entry->tutil = tutil; + __entry->prev_cpu = prev_cpu; + __entry->target_cpu = target_cpu; + ), + + TP_printk("comm=%s pid=%d prio=%d lowest_mask=%s tutil=%lu perfer_idle=%u prev=%d target=%d ", + __entry->comm, __entry->pid, __entry->prio, + __get_bitmask(lowest), __entry->tutil, + __entry->prev_cpu, __entry->target_cpu) +); +#endif /* CONFIG_SCHED_RT_CAS */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 27b6ed3c9e58..dd5fff2bb1b2 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -10,6 +10,10 @@ #include <linux/tracepoint.h> #include <linux/binfmts.h> +#ifdef CONFIG_SCHED_RT_CAS +#include "eas_sched.h" +#endif + /* * Tracepoint for calling kthread_stop, performed to end a kthread: */ diff --git a/init/Kconfig b/init/Kconfig index 57554d795040..ded631516e22 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -866,6 +866,13 @@ config SCHED_EAS help Check and migrate the CFS process to a more suitable CPU in the tick. +config SCHED_RT_CAS + bool "rt-cas optimization" + depends on SCHED_EAS + default n + help + RT task detects capacity during CPU selection + config SCHED_RT_ACTIVE_LB bool "RT Capacity Aware Misfit Task" depends on SCHED_EAS diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9789a385fecd..945bd3fb2478 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3953,14 +3953,22 @@ static inline unsigned long task_util_est(struct task_struct *p) } #ifdef CONFIG_UCLAMP_TASK +#ifdef CONFIG_SCHED_RT_CAS +unsigned long uclamp_task_util(struct task_struct *p) +#else static inline unsigned long uclamp_task_util(struct task_struct *p) +#endif { return clamp(task_util_est(p), uclamp_eff_value(p, UCLAMP_MIN), uclamp_eff_value(p, UCLAMP_MAX)); } #else +#ifdef CONFIG_SCHED_RT_CAS +unsigned long uclamp_task_util(struct task_struct *p) +#else static inline unsigned long uclamp_task_util(struct task_struct *p) +#endif { return task_util_est(p); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 9adcbf0e0bee..5926d6f6efbb 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -17,6 +17,10 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; +#ifdef CONFIG_SCHED_RT_CAS +unsigned int sysctl_sched_enable_rt_cas = 1; +#endif + #ifdef CONFIG_SCHED_RT_ACTIVE_LB unsigned int sysctl_sched_enable_rt_active_lb = 1; #endif @@ -1709,6 +1713,170 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) return NULL; } +#ifdef CONFIG_SCHED_RT_CAS +int find_cas_cpu(struct sched_domain *sd, + struct task_struct *task, struct cpumask *lowest_mask) +{ + struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + struct sched_group *sg = NULL; + struct sched_group *sg_target = NULL; + struct sched_group *sg_backup = NULL; + struct cpumask search_cpu, backup_search_cpu; + int cpu = -1; + int target_cpu = -1; + unsigned long cpu_capacity; + unsigned long boosted_tutil = uclamp_task_util(task); + unsigned long target_capacity = ULONG_MAX; + unsigned long util; + unsigned long target_cpu_util = ULONG_MAX; + int prev_cpu = task_cpu(task); +#ifdef CONFIG_SCHED_RTG + struct cpumask *rtg_target = NULL; +#endif + bool boosted = uclamp_boosted(task); + + if (!sysctl_sched_enable_rt_cas) + return -1; + + rcu_read_lock(); + +#ifdef CONFIG_SCHED_RTG + rtg_target = find_rtg_target(task); +#endif + + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, 0)); + if (!sd) { + rcu_read_unlock(); + return -1; + } + + sg = sd->groups; + do { + if (!cpumask_intersects(lowest_mask, sched_group_span(sg))) + continue; + + if (boosted) { + if (cpumask_test_cpu(rd->max_cap_orig_cpu, + sched_group_span(sg))) { + sg_target = sg; + break; + } + } + + cpu = group_first_cpu(sg); +#ifdef CONFIG_SCHED_RTG + /* honor the rtg tasks */ + if (rtg_target) { + if (cpumask_test_cpu(cpu, rtg_target)) { + sg_target = sg; + break; + } + + /* active LB or big_task favor cpus with more capacity */ + if (task->state == TASK_RUNNING || boosted) { + if (capacity_orig_of(cpu) > + capacity_orig_of(cpumask_any(rtg_target))) { + sg_target = sg; + break; + } + + sg_backup = sg; + continue; + } + } +#endif + /* + * 1. add margin to support task migration + * 2. if task_util is high then all cpus, make sure the + * sg_backup with the most powerful cpus is selected + */ + if (!rt_task_fits_capacity(task, cpu)) { + sg_backup = sg; + continue; + } + + /* support task boost */ + cpu_capacity = capacity_orig_of(cpu); + if (boosted_tutil > cpu_capacity) { + sg_backup = sg; + continue; + } + + /* sg_target: select the sg with smaller capacity */ + if (cpu_capacity < target_capacity) { + target_capacity = cpu_capacity; + sg_target = sg; + } + } while (sg = sg->next, sg != sd->groups); + + if (!sg_target) + sg_target = sg_backup; + + if (sg_target) { + cpumask_and(&search_cpu, lowest_mask, sched_group_span(sg_target)); + cpumask_copy(&backup_search_cpu, lowest_mask); + cpumask_andnot(&backup_search_cpu, &backup_search_cpu, &search_cpu); + } else { + cpumask_copy(&search_cpu, lowest_mask); + cpumask_clear(&backup_search_cpu); + } + +retry: + cpu = cpumask_first(&search_cpu); + do { + trace_sched_find_cas_cpu_each(task, cpu, target_cpu, + cpu_isolated(cpu), + idle_cpu(cpu), boosted_tutil, cpu_util(cpu), + capacity_orig_of(cpu)); + + if (cpu_isolated(cpu)) + continue; + + if (!cpumask_test_cpu(cpu, task->cpus_ptr)) + continue; + + /* find best cpu with smallest max_capacity */ + if (target_cpu != -1 && + capacity_orig_of(cpu) > capacity_orig_of(target_cpu)) + continue; + + util = cpu_util(cpu); + + /* Find the least loaded CPU */ + if (util > target_cpu_util) + continue; + + /* + * If the preivous CPU has same load, keep it as + * target_cpu + */ + if (target_cpu_util == util && target_cpu == prev_cpu) + continue; + + /* + * If candidate CPU is the previous CPU, select it. + * If all above conditions are same, select the least + * cumulative window demand CPU. + */ + target_cpu_util = util; + target_cpu = cpu; + } while ((cpu = cpumask_next(cpu, &search_cpu)) < nr_cpu_ids); + + if (target_cpu != -1 && cpumask_test_cpu(target_cpu, lowest_mask)) { + goto done; + } else if (!cpumask_empty(&backup_search_cpu)) { + cpumask_copy(&search_cpu, &backup_search_cpu); + cpumask_clear(&backup_search_cpu); + goto retry; + } + +done: + trace_sched_find_cas_cpu(task, lowest_mask, boosted_tutil, prev_cpu, target_cpu); + rcu_read_unlock(); + return target_cpu; +} +#endif + static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) @@ -1718,6 +1886,9 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); int ret; +#ifdef CONFIG_SCHED_RT_CAS + int cas_cpu; +#endif /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) @@ -1744,6 +1915,12 @@ static int find_lowest_rq(struct task_struct *task) if (!ret) return -1; /* No targets found */ +#ifdef CONFIG_SCHED_RT_CAS + cas_cpu = find_cas_cpu(sd, task, lowest_mask); + if (cas_cpu != -1) + return cas_cpu; +#endif + /* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 09ad491bed45..e4c65d96185e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -87,6 +87,10 @@ struct rq; struct cpuidle_state; +#ifdef CONFIG_SCHED_RT_CAS +extern unsigned long uclamp_task_util(struct task_struct *p); +#endif + #ifdef CONFIG_SCHED_WALT extern unsigned int sched_ravg_window; extern unsigned int walt_cpu_util_freq_divisor; @@ -893,6 +897,9 @@ struct root_domain { * CPUs of the rd. Protected by RCU. */ struct perf_domain __rcu *pd; +#ifdef CONFIG_SCHED_RT_CAS + int max_cap_orig_cpu; +#endif }; extern void init_defrootdomain(void); @@ -2582,6 +2589,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, return clamp(util, min_util, max_util); } +static inline bool uclamp_boosted(struct task_struct *p) +{ + return uclamp_eff_value(p, UCLAMP_MIN) > 0; +} + /* * When uclamp is compiled in, the aggregation at rq level is 'turned off' * by default in the fast path and only gets turned on once userspace performs @@ -2602,6 +2614,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, return util; } +static inline bool uclamp_boosted(struct task_struct *p) +{ + return false; +} + static inline bool uclamp_is_used(void) { return false; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index b30b62f0d683..9191e5daaa3c 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -522,6 +522,10 @@ static int init_rootdomain(struct root_domain *rd) if (cpupri_init(&rd->cpupri) != 0) goto free_cpudl; + +#ifdef CONFIG_SCHED_RT_CAS + rd->max_cap_orig_cpu = -1; +#endif return 0; free_cpudl: @@ -2121,9 +2125,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att /* Attach the domains */ rcu_read_lock(); for_each_cpu(i, cpu_map) { +#ifdef CONFIG_SCHED_RT_CAS + int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu); +#endif + rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); +#ifdef CONFIG_SCHED_RT_CAS + if (max_cpu < 0 || arch_scale_cpu_capacity(i) > + arch_scale_cpu_capacity(max_cpu)) + WRITE_ONCE(d.rd->max_cap_orig_cpu, i); +#endif + /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d5fef7aba276..e34d6937594c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1659,6 +1659,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_RT_CAS + { + .procname = "sched_enable_rt_cas", + .data = &sysctl_sched_enable_rt_cas, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_SCHED_RT_ACTIVE_LB { .procname = "sched_enable_rt_active_lb", -- 2.25.1
participants (1)
-
gc1202