2023-12-12 12:10:53 +01:00
|
|
|
From b6a7058a13f345d5aa5426466f9104da43d47ce4 Mon Sep 17 00:00:00 2001
|
|
|
|
From: Piotr Gorski <lucjan.lucjanov@gmail.com>
|
|
|
|
Date: Tue, 5 Dec 2023 15:49:10 +0100
|
2023-11-04 20:11:33 +01:00
|
|
|
Subject: [PATCH] bore-cachy
|
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
|
2023-11-04 20:11:33 +01:00
|
|
|
---
|
2023-12-12 12:10:53 +01:00
|
|
|
include/linux/sched.h | 10 ++
|
|
|
|
init/Kconfig | 19 ++++
|
|
|
|
kernel/sched/core.c | 128 ++++++++++++++++++++++++
|
|
|
|
kernel/sched/debug.c | 3 +
|
|
|
|
kernel/sched/fair.c | 216 +++++++++++++++++++++++++++++++++++++---
|
|
|
|
kernel/sched/features.h | 4 +
|
|
|
|
6 files changed, 366 insertions(+), 14 deletions(-)
|
2023-11-04 20:11:33 +01:00
|
|
|
|
|
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
2023-12-12 12:10:53 +01:00
|
|
|
index 77f01ac38..01f2839ad 100644
|
2023-11-04 20:11:33 +01:00
|
|
|
--- a/include/linux/sched.h
|
|
|
|
+++ b/include/linux/sched.h
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -559,6 +559,16 @@ struct sched_entity {
|
2023-11-04 20:11:33 +01:00
|
|
|
u64 sum_exec_runtime;
|
|
|
|
u64 prev_sum_exec_runtime;
|
|
|
|
u64 vruntime;
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ u64 burst_time;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ u8 prev_burst_penalty;
|
|
|
|
+ u8 curr_burst_penalty;
|
|
|
|
+ u8 burst_penalty;
|
|
|
|
+ u8 slice_score;
|
|
|
|
+ u8 child_burst;
|
|
|
|
+ u16 child_burst_cnt;
|
|
|
|
+ u64 child_burst_last_cached;
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
s64 vlag;
|
|
|
|
u64 slice;
|
|
|
|
|
|
|
|
diff --git a/init/Kconfig b/init/Kconfig
|
2023-12-12 12:10:53 +01:00
|
|
|
index 9dee4c100..49d343e97 100644
|
2023-11-04 20:11:33 +01:00
|
|
|
--- a/init/Kconfig
|
|
|
|
+++ b/init/Kconfig
|
|
|
|
@@ -1278,6 +1278,25 @@ config CHECKPOINT_RESTORE
|
|
|
|
|
|
|
|
If unsure, say N here.
|
|
|
|
|
|
|
|
+config SCHED_BORE
|
|
|
|
+ bool "Burst-Oriented Response Enhancer"
|
|
|
|
+ default y
|
|
|
|
+ help
|
|
|
|
+ In Desktop and Mobile computing, one might prefer interactive
|
|
|
|
+ tasks to keep responsive no matter what they run in the background.
|
|
|
|
+
|
|
|
|
+ Enabling this kernel feature modifies the scheduler to discriminate
|
|
|
|
+ tasks by their burst time (runtime since it last went sleeping or
|
|
|
|
+ yielding state) and prioritize those that run less bursty.
|
|
|
|
+ Such tasks usually include window compositor, widgets backend,
|
|
|
|
+ terminal emulator, video playback, games and so on.
|
|
|
|
+ With a little impact to scheduling fairness, it may improve
|
|
|
|
+ responsiveness especially under heavy background workload.
|
|
|
|
+
|
|
|
|
+ You can turn it off by setting the sysctl kernel.sched_bore = 0.
|
|
|
|
+
|
2023-12-12 12:10:53 +01:00
|
|
|
+ If unsure, say Y here.
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
config SCHED_AUTOGROUP
|
|
|
|
bool "Automatic process group scheduling"
|
|
|
|
select CGROUPS
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
2023-12-12 12:10:53 +01:00
|
|
|
index a854b7183..a98cfa7ab 100644
|
2023-11-04 20:11:33 +01:00
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
+++ b/kernel/sched/core.c
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -4488,6 +4488,123 @@ int wake_up_state(struct task_struct *p, unsigned int state)
|
2023-11-04 20:11:33 +01:00
|
|
|
return try_to_wake_up(p, state, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
2023-12-12 12:10:53 +01:00
|
|
|
+extern bool sched_bore;
|
|
|
|
+extern u8 sched_burst_fork_atavistic;
|
|
|
|
+extern uint sched_burst_cache_lifetime;
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
+void __init sched_init_bore(void) {
|
|
|
|
+ init_task.se.burst_time = 0;
|
|
|
|
+ init_task.se.prev_burst_penalty = 0;
|
|
|
|
+ init_task.se.curr_burst_penalty = 0;
|
|
|
|
+ init_task.se.burst_penalty = 0;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ init_task.se.slice_score = 0;
|
|
|
|
+ init_task.se.child_burst_last_cached = 0;
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void inline sched_fork_bore(struct task_struct *p) {
|
|
|
|
+ p->se.burst_time = 0;
|
|
|
|
+ p->se.curr_burst_penalty = 0;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ p->se.slice_score = 0;
|
|
|
|
+ p->se.child_burst_last_cached = 0;
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static u32 count_child_tasks(struct task_struct *p) {
|
|
|
|
+ struct task_struct *child;
|
|
|
|
+ u32 cnt = 0;
|
|
|
|
+ list_for_each_entry(child, &p->children, sibling) {cnt++;}
|
|
|
|
+ return cnt;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline bool child_burst_cache_expired(struct task_struct *p, u64 now) {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ return (p->se.child_burst_last_cached + sched_burst_cache_lifetime < now);
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void __update_child_burst_cache(
|
|
|
|
+ struct task_struct *p, u32 cnt, u32 sum, u64 now) {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ u8 avg = 0;
|
2023-11-04 20:11:33 +01:00
|
|
|
+ if (cnt) avg = sum / cnt;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ p->se.child_burst = max(avg, p->se.burst_penalty);
|
|
|
|
+ p->se.child_burst_cnt = cnt;
|
|
|
|
+ p->se.child_burst_last_cached = now;
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void update_child_burst_cache(struct task_struct *p, u64 now) {
|
|
|
|
+ struct task_struct *child;
|
|
|
|
+ u32 cnt = 0;
|
|
|
|
+ u32 sum = 0;
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(child, &p->children, sibling) {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ if (child->sched_class != &fair_sched_class) continue;
|
2023-11-04 20:11:33 +01:00
|
|
|
+ cnt++;
|
|
|
|
+ sum += child->se.burst_penalty;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ __update_child_burst_cache(p, cnt, sum, now);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void update_child_burst_cache_atavistic(
|
|
|
|
+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
|
|
|
|
+ struct task_struct *child, *dec;
|
|
|
|
+ u32 cnt = 0, dcnt = 0;
|
|
|
|
+ u32 sum = 0;
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(child, &p->children, sibling) {
|
|
|
|
+ dec = child;
|
|
|
|
+ while ((dcnt = count_child_tasks(dec)) == 1)
|
|
|
|
+ dec = list_first_entry(&dec->children, struct task_struct, sibling);
|
|
|
|
+
|
|
|
|
+ if (!dcnt || !depth) {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ if (dec->sched_class != &fair_sched_class) continue;
|
2023-11-04 20:11:33 +01:00
|
|
|
+ cnt++;
|
|
|
|
+ sum += dec->se.burst_penalty;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ if (!child_burst_cache_expired(dec, now)) {
|
|
|
|
+ cnt += dec->se.child_burst_cnt;
|
|
|
|
+ sum += (u32)dec->se.child_burst * dec->se.child_burst_cnt;
|
|
|
|
+ continue;
|
2023-11-04 20:11:33 +01:00
|
|
|
+ }
|
2023-12-12 12:10:53 +01:00
|
|
|
+ update_child_burst_cache_atavistic(dec, now, depth - 1, &cnt, &sum);
|
2023-11-04 20:11:33 +01:00
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ __update_child_burst_cache(p, cnt, sum, now);
|
|
|
|
+ *acnt += cnt;
|
|
|
|
+ *asum += sum;
|
|
|
|
+}
|
|
|
|
+
|
2023-12-12 12:10:53 +01:00
|
|
|
+static void sched_post_fork_bore(struct task_struct *p) {
|
2023-11-04 20:11:33 +01:00
|
|
|
+ struct sched_entity *se = &p->se;
|
|
|
|
+ struct task_struct *anc;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ u64 now;
|
2023-11-04 20:11:33 +01:00
|
|
|
+ u32 cnt = 0, sum = 0, depth;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ u8 burst_cache;
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
+ if (likely(sched_bore)) {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ now = ktime_get_ns();
|
2023-11-04 20:11:33 +01:00
|
|
|
+ read_lock(&tasklist_lock);
|
|
|
|
+
|
2023-12-12 12:10:53 +01:00
|
|
|
+ anc = p->real_parent;
|
|
|
|
+ depth = sched_burst_fork_atavistic;
|
|
|
|
+ if (likely(depth)) {
|
|
|
|
+ while ((anc->real_parent != anc) && (count_child_tasks(anc) == 1))
|
|
|
|
+ anc = anc->real_parent;
|
|
|
|
+ if (child_burst_cache_expired(anc, now))
|
|
|
|
+ update_child_burst_cache_atavistic(
|
|
|
|
+ anc, now, depth - 1, &cnt, &sum);
|
|
|
|
+ } else
|
|
|
|
+ if (child_burst_cache_expired(anc, now))
|
|
|
|
+ update_child_burst_cache(anc, now);
|
|
|
|
+
|
|
|
|
+ burst_cache = anc->se.child_burst;
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
+ read_unlock(&tasklist_lock);
|
|
|
|
+ se->prev_burst_penalty = max(se->prev_burst_penalty, burst_cache);
|
|
|
|
+ }
|
|
|
|
+ se->burst_penalty = se->prev_burst_penalty;
|
|
|
|
+}
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* Perform scheduler related setup for a newly forked process p.
|
|
|
|
* p is forked by current.
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -4504,6 +4621,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
2023-11-04 20:11:33 +01:00
|
|
|
p->se.prev_sum_exec_runtime = 0;
|
|
|
|
p->se.nr_migrations = 0;
|
|
|
|
p->se.vruntime = 0;
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ sched_fork_bore(p);
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
p->se.vlag = 0;
|
2023-12-12 12:10:53 +01:00
|
|
|
p->se.slice = sysctl_sched_base_slice;
|
2023-11-04 20:11:33 +01:00
|
|
|
INIT_LIST_HEAD(&p->se.group_node);
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -4823,6 +4943,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
2023-11-04 20:11:33 +01:00
|
|
|
|
|
|
|
void sched_post_fork(struct task_struct *p)
|
|
|
|
{
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
2023-12-12 12:10:53 +01:00
|
|
|
+ sched_post_fork_bore(p);
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
uclamp_post_fork(p);
|
|
|
|
}
|
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -9922,6 +10045,11 @@ void __init sched_init(void)
|
2023-11-04 20:11:33 +01:00
|
|
|
BUG_ON(&dl_sched_class != &stop_sched_class + 1);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ sched_init_bore();
|
2023-12-12 12:10:53 +01:00
|
|
|
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 3.5.7 by Masahito Suzuki");
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
+
|
|
|
|
wait_bit_init();
|
|
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
|
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
2023-12-12 12:10:53 +01:00
|
|
|
index 4c3d0d9f3..e37fdfad1 100644
|
2023-11-04 20:11:33 +01:00
|
|
|
--- a/kernel/sched/debug.c
|
|
|
|
+++ b/kernel/sched/debug.c
|
|
|
|
@@ -595,6 +595,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
|
|
|
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
|
|
|
|
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
|
|
|
|
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
2023-12-12 12:10:53 +01:00
|
|
|
+ SEQ_printf(m, " %2d", p->se.slice_score);
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif
|
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
|
|
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
|
|
|
|
#endif
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
2023-12-12 12:10:53 +01:00
|
|
|
index fa9fff0f9..5e4f0ccff 100644
|
2023-11-04 20:11:33 +01:00
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
@@ -19,6 +19,9 @@
|
|
|
|
*
|
|
|
|
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
|
|
|
|
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
|
|
|
+ *
|
|
|
|
+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler
|
|
|
|
+ * Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com>
|
|
|
|
*/
|
|
|
|
#include <linux/energy_model.h>
|
|
|
|
#include <linux/mmap_lock.h>
|
|
|
|
@@ -66,17 +69,28 @@
|
|
|
|
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
|
|
|
|
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
|
|
|
|
*
|
|
|
|
- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
|
|
|
|
+ * (BORE default SCHED_TUNABLESCALING_NONE = *1 constant)
|
|
|
|
+ * (EEVDF default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
|
|
|
|
*/
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
|
|
|
|
+#else // CONFIG_SCHED_BORE
|
|
|
|
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Minimal preemption granularity for CPU-bound tasks:
|
|
|
|
*
|
|
|
|
- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
|
|
|
+ * (BORE default: 3 msec constant, units: nanoseconds)
|
|
|
|
+ * (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
|
|
|
*/
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+unsigned int sysctl_sched_base_slice = 3000000ULL;
|
|
|
|
+static unsigned int normalized_sysctl_sched_base_slice = 3000000ULL;
|
|
|
|
+#else // CONFIG_SCHED_BORE
|
|
|
|
unsigned int sysctl_sched_base_slice = 750000ULL;
|
|
|
|
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After fork, child runs first. If set to 0 (default) then
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -86,6 +100,68 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
|
2023-11-04 20:11:33 +01:00
|
|
|
|
|
|
|
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
|
|
|
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
2023-12-12 12:10:53 +01:00
|
|
|
+bool __read_mostly sched_bore = 1;
|
|
|
|
+bool __read_mostly sched_burst_score_rounding = 0;
|
|
|
|
+bool __read_mostly sched_burst_smoothness_long = 1;
|
|
|
|
+bool __read_mostly sched_burst_smoothness_short = 0;
|
|
|
|
+u8 __read_mostly sched_burst_fork_atavistic = 2;
|
|
|
|
+u8 __read_mostly sched_burst_penalty_offset = 22;
|
|
|
|
+uint __read_mostly sched_burst_penalty_scale = 1280;
|
|
|
|
+uint __read_mostly sched_burst_cache_lifetime = 60000000;
|
|
|
|
+static u8 sixty_four = 64;
|
|
|
|
+static uint maxval_12_bits = 4095;
|
|
|
|
+
|
|
|
|
+#define MAX_BURST_PENALTY (39U <<2)
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
+static inline u32 log2plus1_u64_u32f8(u64 v) {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ u32 msb = fls64(v);
|
|
|
|
+ s32 excess_bits = msb - 9;
|
|
|
|
+ u8 fractional = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits;
|
|
|
|
+ return msb << 8 | fractional;
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline u32 calc_burst_penalty(u64 burst_time) {
|
|
|
|
+ u32 greed, tolerance, penalty, scaled_penalty;
|
|
|
|
+
|
|
|
|
+ greed = log2plus1_u64_u32f8(burst_time);
|
|
|
|
+ tolerance = sched_burst_penalty_offset << 8;
|
|
|
|
+ penalty = max(0, (s32)greed - (s32)tolerance);
|
2023-12-12 12:10:53 +01:00
|
|
|
+ scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
+ return min(MAX_BURST_PENALTY, scaled_penalty);
|
|
|
|
+}
|
|
|
|
+
|
2023-12-12 12:10:53 +01:00
|
|
|
+static inline void update_burst_penalty(struct sched_entity *se) {
|
2023-11-04 20:11:33 +01:00
|
|
|
+ se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
|
|
|
|
+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
|
|
|
|
+}
|
|
|
|
+
|
2023-12-12 12:10:53 +01:00
|
|
|
+static inline void update_slice_score(struct sched_entity *se) {
|
|
|
|
+ u32 penalty = se->burst_penalty;
|
|
|
|
+ if (sched_burst_score_rounding) penalty += 0x2U;
|
|
|
|
+ se->slice_score = penalty >> 2;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline u64 scale_slice(u64 delta, struct sched_entity *se) {
|
|
|
|
+ return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->slice_score], 22);
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline u32 binary_smooth(u32 new, u32 old) {
|
|
|
|
+ int increment = new - old;
|
|
|
|
+ return (0 <= increment)?
|
2023-12-12 12:10:53 +01:00
|
|
|
+ old + ( increment >> (int)sched_burst_smoothness_long):
|
|
|
|
+ old - (-increment >> (int)sched_burst_smoothness_short);
|
2023-11-04 20:11:33 +01:00
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void restart_burst(struct sched_entity *se) {
|
|
|
|
+ se->burst_penalty = se->prev_burst_penalty =
|
|
|
|
+ binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
|
|
|
|
+ se->curr_burst_penalty = 0;
|
|
|
|
+ se->burst_time = 0;
|
|
|
|
+}
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
+
|
|
|
|
int sched_thermal_decay_shift;
|
|
|
|
static int __init setup_sched_thermal_decay_shift(char *str)
|
|
|
|
{
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -145,6 +221,70 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
|
2023-11-04 20:11:33 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
static struct ctl_table sched_fair_sysctls[] = {
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ {
|
|
|
|
+ .procname = "sched_bore",
|
|
|
|
+ .data = &sched_bore,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .maxlen = sizeof(bool),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = &proc_dobool,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ .procname = "sched_burst_cache_lifetime",
|
|
|
|
+ .data = &sched_burst_cache_lifetime,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .maxlen = sizeof(uint),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = proc_douintvec,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ .procname = "sched_burst_fork_atavistic",
|
|
|
|
+ .data = &sched_burst_fork_atavistic,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .maxlen = sizeof(u8),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = &proc_dou8vec_minmax,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .extra1 = SYSCTL_ZERO,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .extra2 = SYSCTL_THREE,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ .procname = "sched_burst_penalty_offset",
|
|
|
|
+ .data = &sched_burst_penalty_offset,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .maxlen = sizeof(u8),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = &proc_dou8vec_minmax,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .extra1 = SYSCTL_ZERO,
|
|
|
|
+ .extra2 = &sixty_four,
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ .procname = "sched_burst_penalty_scale",
|
|
|
|
+ .data = &sched_burst_penalty_scale,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .maxlen = sizeof(uint),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = &proc_douintvec_minmax,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .extra1 = SYSCTL_ZERO,
|
|
|
|
+ .extra2 = &maxval_12_bits,
|
|
|
|
+ },
|
|
|
|
+ {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .procname = "sched_burst_score_rounding",
|
|
|
|
+ .data = &sched_burst_score_rounding,
|
|
|
|
+ .maxlen = sizeof(bool),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = &proc_dobool,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ },
|
|
|
|
+ {
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .procname = "sched_burst_smoothness_long",
|
|
|
|
+ .data = &sched_burst_smoothness_long,
|
|
|
|
+ .maxlen = sizeof(bool),
|
2023-11-04 20:11:33 +01:00
|
|
|
+ .mode = 0644,
|
2023-12-12 12:10:53 +01:00
|
|
|
+ .proc_handler = &proc_dobool,
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ .procname = "sched_burst_smoothness_short",
|
|
|
|
+ .data = &sched_burst_smoothness_short,
|
|
|
|
+ .maxlen = sizeof(bool),
|
|
|
|
+ .mode = 0644,
|
|
|
|
+ .proc_handler = &proc_dobool,
|
2023-11-04 20:11:33 +01:00
|
|
|
+ },
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
{
|
|
|
|
.procname = "sched_child_runs_first",
|
|
|
|
.data = &sysctl_sched_child_runs_first,
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -313,6 +453,9 @@ static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
|
2023-11-04 20:11:33 +01:00
|
|
|
if (unlikely(se->load.weight != NICE_0_LOAD))
|
|
|
|
delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
|
|
|
|
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
2023-12-12 12:10:53 +01:00
|
|
|
+ if (likely(sched_bore)) delta = scale_slice(delta, se);
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
return delta;
|
|
|
|
}
|
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -668,7 +811,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
|
2023-11-04 20:11:33 +01:00
|
|
|
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true
|
|
|
|
* For this to be so, the result of this function must have a left bias.
|
|
|
|
*/
|
|
|
|
-u64 avg_vruntime(struct cfs_rq *cfs_rq)
|
|
|
|
+static u64 avg_key(struct cfs_rq *cfs_rq)
|
|
|
|
{
|
|
|
|
struct sched_entity *curr = cfs_rq->curr;
|
|
|
|
s64 avg = cfs_rq->avg_vruntime;
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -688,7 +831,11 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
|
2023-11-04 20:11:33 +01:00
|
|
|
avg = div_s64(avg, load);
|
|
|
|
}
|
|
|
|
|
|
|
|
- return cfs_rq->min_vruntime + avg;
|
|
|
|
+ return avg;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+inline u64 avg_vruntime(struct cfs_rq *cfs_rq) {
|
|
|
|
+ return cfs_rq->min_vruntime + avg_key(cfs_rq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -709,13 +856,8 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
|
|
|
|
*/
|
|
|
|
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
{
|
|
|
|
- s64 lag, limit;
|
|
|
|
-
|
|
|
|
SCHED_WARN_ON(!se->on_rq);
|
|
|
|
- lag = avg_vruntime(cfs_rq) - se->vruntime;
|
|
|
|
-
|
|
|
|
- limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
|
|
|
|
- se->vlag = clamp(lag, -limit, limit);
|
|
|
|
+ se->vlag = avg_vruntime(cfs_rq) - se->vruntime;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -981,7 +1123,6 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
|
2023-11-04 20:11:33 +01:00
|
|
|
return se;
|
|
|
|
}
|
|
|
|
|
|
|
|
-#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
|
|
|
{
|
|
|
|
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -995,6 +1136,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
2023-11-04 20:11:33 +01:00
|
|
|
/**************************************************************
|
|
|
|
* Scheduling class statistics methods:
|
|
|
|
*/
|
|
|
|
+#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
int sched_update_scaling(void)
|
|
|
|
{
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -1031,6 +1173,9 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
/*
|
|
|
|
* EEVDF: vd_i = ve_i + r_i / w_i
|
|
|
|
*/
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ update_slice_score(se);
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -1173,7 +1318,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
2023-11-04 20:11:33 +01:00
|
|
|
curr->sum_exec_runtime += delta_exec;
|
|
|
|
schedstat_add(cfs_rq->exec_clock, delta_exec);
|
|
|
|
|
|
|
|
- curr->vruntime += calc_delta_fair(delta_exec, curr);
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ curr->burst_time += delta_exec;
|
|
|
|
+ update_burst_penalty(curr);
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
+ curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
|
|
|
|
update_deadline(cfs_rq, curr);
|
|
|
|
update_min_vruntime(cfs_rq);
|
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -5066,6 +5215,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|
|
|
s64 lag = 0;
|
|
|
|
|
|
|
|
se->slice = sysctl_sched_base_slice;
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ update_slice_score(se);
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
vslice = calc_delta_fair(se->slice, se);
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -5080,7 +5232,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|
|
|
struct sched_entity *curr = cfs_rq->curr;
|
|
|
|
unsigned long load;
|
|
|
|
|
|
|
|
- lag = se->vlag;
|
|
|
|
+ u64 slice = se->slice;
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ if (unlikely(!sched_bore))
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
+ slice *= 2;
|
|
|
|
+ s64 limit = calc_delta_fair(max_t(u64, slice, TICK_NSEC), se);
|
|
|
|
+ lag = clamp(se->vlag, -limit, limit);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we want to place a task and preserve lag, we have to
|
|
|
|
@@ -5142,6 +5300,21 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
2023-11-04 20:11:33 +01:00
|
|
|
if (WARN_ON_ONCE(!load))
|
|
|
|
load = 1;
|
|
|
|
lag = div_s64(lag, load);
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ if (flags & ENQUEUE_MIGRATED && likely(sched_bore)) {
|
|
|
|
+ s64 left_vruntime = vruntime, right_vruntime = vruntime;
|
2023-12-12 12:10:53 +01:00
|
|
|
+ struct sched_entity *first = __pick_first_entity(cfs_rq),
|
|
|
|
+ *last = __pick_last_entity(cfs_rq);
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
2023-12-12 12:10:53 +01:00
|
|
|
+ if (first) left_vruntime = first->vruntime;
|
|
|
|
+ if (last) right_vruntime = last->vruntime;
|
2023-11-04 20:11:33 +01:00
|
|
|
+
|
|
|
|
+ lag = clamp(lag,
|
|
|
|
+ (s64)vruntime - right_vruntime,
|
|
|
|
+ (s64)vruntime - left_vruntime);
|
|
|
|
+ }
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
}
|
|
|
|
|
|
|
|
se->vruntime = vruntime - lag;
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -6698,6 +6871,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|
|
|
bool was_sched_idle = sched_idle_rq(rq);
|
2023-11-04 20:11:33 +01:00
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
util_est_dequeue(&rq->cfs, p);
|
2023-11-04 20:11:33 +01:00
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
2023-12-12 12:10:53 +01:00
|
|
|
+ if (task_sleep) {
|
|
|
|
+ update_curr(cfs_rq_of(se));
|
|
|
|
+ restart_burst(se);
|
|
|
|
+ }
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
for_each_sched_entity(se) {
|
|
|
|
cfs_rq = cfs_rq_of(se);
|
|
|
|
@@ -8429,8 +8608,13 @@ static void yield_task_fair(struct rq *rq)
|
2023-11-04 20:11:33 +01:00
|
|
|
/*
|
|
|
|
* Are we the only task in the tree?
|
|
|
|
*/
|
|
|
|
- if (unlikely(rq->nr_running == 1))
|
|
|
|
+ if (unlikely(rq->nr_running == 1)) {
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ restart_burst(se);
|
2023-12-12 12:10:53 +01:00
|
|
|
+ update_slice_score(se);
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
return;
|
|
|
|
+ }
|
|
|
|
|
|
|
|
clear_buddies(cfs_rq, se);
|
|
|
|
|
2023-12-12 12:10:53 +01:00
|
|
|
@@ -8439,6 +8623,10 @@ static void yield_task_fair(struct rq *rq)
|
2023-11-04 20:11:33 +01:00
|
|
|
* Update run-time statistics of the 'current'.
|
|
|
|
*/
|
|
|
|
update_curr(cfs_rq);
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+ restart_burst(se);
|
2023-12-12 12:10:53 +01:00
|
|
|
+ update_slice_score(se);
|
2023-11-04 20:11:33 +01:00
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
/*
|
|
|
|
* Tell update_rq_clock() that we've just updated,
|
|
|
|
* so we don't do microscopic update in schedule()
|
|
|
|
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
|
2023-12-12 12:10:53 +01:00
|
|
|
index f77016823..a2e09c04f 100644
|
2023-11-04 20:11:33 +01:00
|
|
|
--- a/kernel/sched/features.h
|
|
|
|
+++ b/kernel/sched/features.h
|
|
|
|
@@ -6,7 +6,11 @@
|
|
|
|
*/
|
|
|
|
SCHED_FEAT(PLACE_LAG, true)
|
|
|
|
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
|
|
|
|
+#ifdef CONFIG_SCHED_BORE
|
|
|
|
+SCHED_FEAT(RUN_TO_PARITY, false)
|
|
|
|
+#else // CONFIG_SCHED_BORE
|
|
|
|
SCHED_FEAT(RUN_TO_PARITY, true)
|
|
|
|
+#endif // CONFIG_SCHED_BORE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prefer to schedule the task we woke last (assuming it failed
|
|
|
|
--
|
2023-12-12 12:10:53 +01:00
|
|
|
2.43.0.rc2
|
|
|
|
|