Update to 6.2.1 and update patches for it

This commit is contained in:
ferrreo 2023-02-25 20:36:40 +00:00
parent 185ea71a08
commit 99e843f61e
3 changed files with 874 additions and 350 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
From 78440b24f24a021daf660c0bd212c936e50e5f0a Mon Sep 17 00:00:00 2001 From 10300b929dc0a52e458b6bcd9af801f6df967d42 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev> From: Peter Jung <admin@ptr1337.dev>
Date: Fri, 17 Feb 2023 15:38:09 +0100 Date: Fri, 24 Feb 2023 11:16:15 +0100
Subject: [PATCH] Add latency priority for CFS class Subject: [PATCH] Add latency priority for CFS class
This patchset restarts the work about adding a latency priority to describe This patchset restarts the work about adding a latency priority to describe
@ -38,8 +38,6 @@ sensitive task (priority < 0) is preempted by high priority task (RT/DL)
or fails to preempt them. This patch ensures that tasks will have at least or fails to preempt them. This patch ensures that tasks will have at least
a slice of sched_min_granularity in priority at wakeup. a slice of sched_min_granularity in priority at wakeup.
Patch [9] removes useless check after adding a latency rb tree.
I have also backported the patchset on a dragonboard RB3 with an android I have also backported the patchset on a dragonboard RB3 with an android
mainline kernel based on v5.18 for a quick test. I have used the mainline kernel based on v5.18 for a quick test. I have used the
TouchLatency app which is part of AOSP and described to be a very good TouchLatency app which is part of AOSP and described to be a very good
@ -69,7 +67,7 @@ reconsider the augmented rbtree once the use of negative latency_nice will
be more widlely deployed. At now, the different tests that I have done, be more widlely deployed. At now, the different tests that I have done,
have not shown improvements with augmented rbtree. have not shown improvements with augmented rbtree.
Below are some hackbench results: Below are some hackbench results (from v10):
2 rbtrees augmented rbtree augmented rbtree 2 rbtrees augmented rbtree augmented rbtree
sorted by vruntime sorted by wakeup_vruntime sorted by vruntime sorted by wakeup_vruntime
sched pipe sched pipe
@ -95,6 +93,14 @@ vs tip -0,02 % -3,56 % -4,01 %
[1] https://source.android.com/docs/core/debug/eval_perf#touchlatency [1] https://source.android.com/docs/core/debug/eval_perf#touchlatency
Change since v11:
- init latency_node of task group entity
Change since v10:
- remove sched_latency_to_weight array and use a calc_latency_offset() instead
- save latency_prio instead for task group instead of latency offset
- enqueue back an entity when changing the latency nice prio fo a task group
Change since v9: Change since v9:
- Rebase - Rebase
- add tags - add tags
@ -156,13 +162,12 @@ Parth Shah (3):
task task
sched: Allow sched_{get,set}attr to change latency_nice of the task sched: Allow sched_{get,set}attr to change latency_nice of the task
Vincent Guittot (6): Vincent Guittot (5):
sched/fair: fix unfairness at wakeup sched/fair: fix unfairness at wakeup
sched/fair: Take into account latency priority at wakeup sched/fair: Take into account latency priority at wakeup
sched/fair: Add sched group latency support sched/fair: Add sched group latency support
sched/core: Support latency priority with sched core sched/core: Support latency priority with sched core
sched/fair: Add latency list sched/fair: Add latency list
sched/fair: remove check_preempt_from_others
Signed-off-by: Peter Jung <admin@ptr1337.dev> Signed-off-by: Peter Jung <admin@ptr1337.dev>
--- ---
@ -170,14 +175,14 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
include/linux/sched.h | 4 + include/linux/sched.h | 4 +
include/linux/sched/prio.h | 27 +++ include/linux/sched/prio.h | 27 +++
include/uapi/linux/sched.h | 4 +- include/uapi/linux/sched.h | 4 +-
include/uapi/linux/sched/types.h | 19 +++ include/uapi/linux/sched/types.h | 19 ++
init/init_task.c | 1 + init/init_task.c | 1 +
kernel/sched/core.c | 106 ++++++++++++ kernel/sched/core.c | 65 +++++++
kernel/sched/debug.c | 1 + kernel/sched/debug.c | 1 +
kernel/sched/fair.c | 209 ++++++++++++++++++++---- kernel/sched/fair.c | 222 ++++++++++++++++++++----
kernel/sched/sched.h | 45 ++++- kernel/sched/sched.h | 50 +++++-
tools/include/uapi/linux/sched.h | 4 +- tools/include/uapi/linux/sched.h | 4 +-
11 files changed, 394 insertions(+), 36 deletions(-) 11 files changed, 371 insertions(+), 36 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 74cec76be9f2..2e511d4a4c6a 100644 index 74cec76be9f2..2e511d4a4c6a 100644
@ -343,27 +348,22 @@ index ff6c4b9bfe6b..071deff8dbd1 100644
.cpus_ptr = &init_task.cpus_mask, .cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL, .user_cpus_ptr = NULL,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5237639786b7..5d6a283a4da9 100644 index 5237639786b7..e1a9f9898b30 100644
--- a/kernel/sched/core.c --- a/kernel/sched/core.c
+++ b/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -1283,6 +1283,16 @@ static void set_load_weight(struct task_struct *p, bool update_load) @@ -1283,6 +1283,11 @@ static void set_load_weight(struct task_struct *p, bool update_load)
} }
} }
+static void set_latency_offset(struct task_struct *p) +static void set_latency_offset(struct task_struct *p)
+{ +{
+ long weight = sched_latency_to_weight[p->latency_prio]; + p->se.latency_offset = calc_latency_offset(p->latency_prio);
+ s64 offset;
+
+ offset = weight * get_sleep_latency(false);
+ offset = div_s64(offset, NICE_LATENCY_WEIGHT_MAX);
+ p->se.latency_offset = (long)offset;
+} +}
+ +
#ifdef CONFIG_UCLAMP_TASK #ifdef CONFIG_UCLAMP_TASK
/* /*
* Serializes updates of utilization clamp values * Serializes updates of utilization clamp values
@@ -4432,6 +4442,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) @@ -4432,6 +4437,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.dur_avg = 0; p->se.dur_avg = 0;
p->se.prev_sleep_sum_runtime = 0; p->se.prev_sleep_sum_runtime = 0;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
@ -371,7 +371,7 @@ index 5237639786b7..5d6a283a4da9 100644
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL; p->se.cfs_rq = NULL;
@@ -4684,6 +4695,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) @@ -4684,6 +4690,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = p->normal_prio = p->static_prio; p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false); set_load_weight(p, false);
@ -381,7 +381,7 @@ index 5237639786b7..5d6a283a4da9 100644
/* /*
* We don't need the reset flag anymore after the fork. It has * We don't need the reset flag anymore after the fork. It has
* fulfilled its duty: * fulfilled its duty:
@@ -7444,6 +7458,16 @@ static void __setscheduler_params(struct task_struct *p, @@ -7444,6 +7453,16 @@ static void __setscheduler_params(struct task_struct *p,
p->rt_priority = attr->sched_priority; p->rt_priority = attr->sched_priority;
p->normal_prio = normal_prio(p); p->normal_prio = normal_prio(p);
set_load_weight(p, true); set_load_weight(p, true);
@ -398,7 +398,7 @@ index 5237639786b7..5d6a283a4da9 100644
} }
/* /*
@@ -7586,6 +7610,13 @@ static int __sched_setscheduler(struct task_struct *p, @@ -7586,6 +7605,13 @@ static int __sched_setscheduler(struct task_struct *p,
return retval; return retval;
} }
@ -412,7 +412,7 @@ index 5237639786b7..5d6a283a4da9 100644
if (pi) if (pi)
cpuset_read_lock(); cpuset_read_lock();
@@ -7620,6 +7651,9 @@ static int __sched_setscheduler(struct task_struct *p, @@ -7620,6 +7646,9 @@ static int __sched_setscheduler(struct task_struct *p,
goto change; goto change;
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
goto change; goto change;
@ -422,7 +422,7 @@ index 5237639786b7..5d6a283a4da9 100644
p->sched_reset_on_fork = reset_on_fork; p->sched_reset_on_fork = reset_on_fork;
retval = 0; retval = 0;
@@ -7708,6 +7742,7 @@ static int __sched_setscheduler(struct task_struct *p, @@ -7708,6 +7737,7 @@ static int __sched_setscheduler(struct task_struct *p,
__setscheduler_params(p, attr); __setscheduler_params(p, attr);
__setscheduler_prio(p, newprio); __setscheduler_prio(p, newprio);
} }
@ -430,7 +430,7 @@ index 5237639786b7..5d6a283a4da9 100644
__setscheduler_uclamp(p, attr); __setscheduler_uclamp(p, attr);
if (queued) { if (queued) {
@@ -7918,6 +7953,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a @@ -7918,6 +7948,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
size < SCHED_ATTR_SIZE_VER1) size < SCHED_ATTR_SIZE_VER1)
return -EINVAL; return -EINVAL;
@ -440,7 +440,7 @@ index 5237639786b7..5d6a283a4da9 100644
/* /*
* XXX: Do we want to be lenient like existing syscalls; or do we want * XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values? * to be strict and return an error on out-of-bounds values?
@@ -8155,6 +8193,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, @@ -8155,6 +8188,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
get_params(p, &kattr); get_params(p, &kattr);
kattr.sched_flags &= SCHED_FLAG_ALL; kattr.sched_flags &= SCHED_FLAG_ALL;
@ -449,7 +449,7 @@ index 5237639786b7..5d6a283a4da9 100644
#ifdef CONFIG_UCLAMP_TASK #ifdef CONFIG_UCLAMP_TASK
/* /*
* This could race with another potential updater, but this is fine * This could race with another potential updater, but this is fine
@@ -11027,6 +11067,47 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css, @@ -11027,6 +11062,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
{ {
return sched_group_set_idle(css_tg(css), idle); return sched_group_set_idle(css_tg(css), idle);
} }
@ -457,47 +457,25 @@ index 5237639786b7..5d6a283a4da9 100644
+static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css, +static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft) + struct cftype *cft)
+{ +{
+ int prio, delta, last_delta = INT_MAX; + return LATENCY_TO_NICE(css_tg(css)->latency_prio);
+ s64 weight;
+
+ weight = css_tg(css)->latency_offset * NICE_LATENCY_WEIGHT_MAX;
+ weight = div_s64(weight, get_sleep_latency(false));
+
+ /* Find the closest nice value to the current weight */
+ for (prio = 0; prio < ARRAY_SIZE(sched_latency_to_weight); prio++) {
+ delta = abs(sched_latency_to_weight[prio] - weight);
+ if (delta >= last_delta)
+ break;
+ last_delta = delta;
+ }
+
+ return LATENCY_TO_NICE(prio-1);
+} +}
+ +
+static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css, +static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 nice) + struct cftype *cft, s64 nice)
+{ +{
+ s64 latency_offset; + int prio;
+ long weight;
+ int idx;
+ +
+ if (nice < MIN_LATENCY_NICE || nice > MAX_LATENCY_NICE) + if (nice < MIN_LATENCY_NICE || nice > MAX_LATENCY_NICE)
+ return -ERANGE; + return -ERANGE;
+ +
+ idx = NICE_TO_LATENCY(nice); + prio = NICE_TO_LATENCY(nice);
+ idx = array_index_nospec(idx, LATENCY_NICE_WIDTH);
+ weight = sched_latency_to_weight[idx];
+ +
+ latency_offset = weight * get_sleep_latency(false); + return sched_group_set_latency(css_tg(css), prio);
+ latency_offset = div_s64(latency_offset, NICE_LATENCY_WEIGHT_MAX);
+
+ return sched_group_set_latency(css_tg(css), latency_offset);
+} +}
+
#endif #endif
static struct cftype cpu_legacy_files[] = { static struct cftype cpu_legacy_files[] = {
@@ -11041,6 +11122,11 @@ static struct cftype cpu_legacy_files[] = { @@ -11041,6 +11095,11 @@ static struct cftype cpu_legacy_files[] = {
.read_s64 = cpu_idle_read_s64, .read_s64 = cpu_idle_read_s64,
.write_s64 = cpu_idle_write_s64, .write_s64 = cpu_idle_write_s64,
}, },
@ -509,7 +487,7 @@ index 5237639786b7..5d6a283a4da9 100644
#endif #endif
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
{ {
@@ -11258,6 +11344,12 @@ static struct cftype cpu_files[] = { @@ -11258,6 +11317,12 @@ static struct cftype cpu_files[] = {
.read_s64 = cpu_idle_read_s64, .read_s64 = cpu_idle_read_s64,
.write_s64 = cpu_idle_write_s64, .write_s64 = cpu_idle_write_s64,
}, },
@ -522,27 +500,6 @@ index 5237639786b7..5d6a283a4da9 100644
#endif #endif
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
{ {
@@ -11368,6 +11460,20 @@ const u32 sched_prio_to_wmult[40] = {
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
+/*
+ * latency weight for wakeup preemption
+ */
+const int sched_latency_to_weight[40] = {
+ /* -20 */ -1024, -973, -922, -870, -819,
+ /* -15 */ -768, -717, -666, -614, -563,
+ /* -10 */ -512, -461, -410, -358, -307,
+ /* -5 */ -256, -205, -154, -102, -51,
+ /* 0 */ 0, 51, 102, 154, 205,
+ /* 5 */ 256, 307, 358, 410, 461,
+ /* 10 */ 512, 563, 614, 666, 717,
+ /* 15 */ 768, 819, 870, 922, 973,
+};
+
void call_trace_sched_update_nr_running(struct rq *rq, int count)
{
trace_sched_update_nr_running_tp(rq, count);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8d64fba16cfe..177934290ec4 100644 index 8d64fba16cfe..177934290ec4 100644
--- a/kernel/sched/debug.c --- a/kernel/sched/debug.c
@ -556,10 +513,10 @@ index 8d64fba16cfe..177934290ec4 100644
P(dl.runtime); P(dl.runtime);
P(dl.deadline); P(dl.deadline);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b38a1ce1be49..5ef893ce5734 100644 index b38a1ce1be49..e0a5049f6b80 100644
--- a/kernel/sched/fair.c --- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -698,7 +698,76 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) @@ -698,7 +698,85 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
return __node_2_se(last); return __node_2_se(last);
} }
@ -598,10 +555,12 @@ index b38a1ce1be49..5ef893ce5734 100644
+ return; + return;
+ +
+ /* + /*
+ * An execution time less than sysctl_sched_min_granularity means that + * The entity is always added the latency list at wakeup.
+ * Then, a not waking up entity that is put back in the list after an
+ * execution time less than sysctl_sched_min_granularity, means that
+ * the entity has been preempted by a higher sched class or an entity + * the entity has been preempted by a higher sched class or an entity
+ * with higher latency constraint. + * with higher latency constraint. In thi case, the entity is also put
+ * Put it back in the list so it gets a chance to run 1st during the + * back in the latency list so it gets a chance to run 1st during the
+ * next slice. + * next slice.
+ */ + */
+ if (!(flags & ENQUEUE_WAKEUP)) { + if (!(flags & ENQUEUE_WAKEUP)) {
@ -610,16 +569,23 @@ index b38a1ce1be49..5ef893ce5734 100644
+ if (delta_exec >= sysctl_sched_min_granularity) + if (delta_exec >= sysctl_sched_min_granularity)
+ return; + return;
+ } + }
+
+ rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less); + rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less);
+} +}
+ +
+static void __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se) +/*
+ * Dequeue an entity from the latency rb-tree and return true if it was really
+ * part of the rb-tree:
+ */
+static bool __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{ +{
+ if (!RB_EMPTY_NODE(&se->latency_node)) { + if (!RB_EMPTY_NODE(&se->latency_node)) {
+ rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline); + rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline);
+ RB_CLEAR_NODE(&se->latency_node); + RB_CLEAR_NODE(&se->latency_node);
+ return true;
+ } + }
+
+ return false;
+} +}
+ +
+static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq)
@ -631,12 +597,12 @@ index b38a1ce1be49..5ef893ce5734 100644
+ +
+ return __latency_node_2_se(left); + return __latency_node_2_se(left);
+} +}
+
+#ifdef CONFIG_SCHED_DEBUG +#ifdef CONFIG_SCHED_DEBUG
/************************************************************** /**************************************************************
* Scheduling class statistics methods: * Scheduling class statistics methods:
*/ */
@@ -4672,33 +4741,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) @@ -4672,33 +4750,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
u64 vruntime = cfs_rq->min_vruntime; u64 vruntime = cfs_rq->min_vruntime;
u64 sleep_time; u64 sleep_time;
@ -679,7 +645,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/* /*
* Pull vruntime of the entity being placed to the base level of * Pull vruntime of the entity being placed to the base level of
@@ -4792,8 +4845,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -4792,8 +4854,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
check_schedstat_required(); check_schedstat_required();
update_stats_enqueue_fair(cfs_rq, se, flags); update_stats_enqueue_fair(cfs_rq, se, flags);
check_spread(cfs_rq, se); check_spread(cfs_rq, se);
@ -691,7 +657,7 @@ index b38a1ce1be49..5ef893ce5734 100644
se->on_rq = 1; se->on_rq = 1;
if (cfs_rq->nr_running == 1) { if (cfs_rq->nr_running == 1) {
@@ -4879,8 +4934,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -4879,8 +4943,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se); clear_buddies(cfs_rq, se);
@ -703,7 +669,7 @@ index b38a1ce1be49..5ef893ce5734 100644
se->on_rq = 0; se->on_rq = 0;
account_entity_dequeue(cfs_rq, se); account_entity_dequeue(cfs_rq, se);
@@ -4911,6 +4968,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -4911,6 +4977,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_idle_cfs_rq_clock_pelt(cfs_rq); update_idle_cfs_rq_clock_pelt(cfs_rq);
} }
@ -712,7 +678,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/* /*
* Preempt the current task with a newly woken task if needed: * Preempt the current task with a newly woken task if needed:
*/ */
@@ -4919,7 +4978,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) @@ -4919,7 +4987,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{ {
unsigned long ideal_runtime, delta_exec; unsigned long ideal_runtime, delta_exec;
struct sched_entity *se; struct sched_entity *se;
@ -721,7 +687,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/* /*
* When many tasks blow up the sched_period; it is possible that * When many tasks blow up the sched_period; it is possible that
@@ -4950,10 +5009,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) @@ -4950,10 +5018,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
se = __pick_first_entity(cfs_rq); se = __pick_first_entity(cfs_rq);
delta = curr->vruntime - se->vruntime; delta = curr->vruntime - se->vruntime;
@ -736,7 +702,7 @@ index b38a1ce1be49..5ef893ce5734 100644
resched_curr(rq_of(cfs_rq)); resched_curr(rq_of(cfs_rq));
} }
@@ -4971,6 +5032,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -4971,6 +5041,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/ */
update_stats_wait_end_fair(cfs_rq, se); update_stats_wait_end_fair(cfs_rq, se);
__dequeue_entity(cfs_rq, se); __dequeue_entity(cfs_rq, se);
@ -744,7 +710,7 @@ index b38a1ce1be49..5ef893ce5734 100644
update_load_avg(cfs_rq, se, UPDATE_TG); update_load_avg(cfs_rq, se, UPDATE_TG);
} }
@@ -5009,7 +5071,7 @@ static struct sched_entity * @@ -5009,7 +5080,7 @@ static struct sched_entity *
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{ {
struct sched_entity *left = __pick_first_entity(cfs_rq); struct sched_entity *left = __pick_first_entity(cfs_rq);
@ -753,7 +719,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/* /*
* If curr is set we have to see if its left of the leftmost entity * If curr is set we have to see if its left of the leftmost entity
@@ -5051,6 +5113,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) @@ -5051,6 +5122,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
se = cfs_rq->last; se = cfs_rq->last;
} }
@ -766,7 +732,7 @@ index b38a1ce1be49..5ef893ce5734 100644
return se; return se;
} }
@@ -5074,6 +5142,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) @@ -5074,6 +5151,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
update_stats_wait_start_fair(cfs_rq, prev); update_stats_wait_start_fair(cfs_rq, prev);
/* Put 'current' back into the tree. */ /* Put 'current' back into the tree. */
__enqueue_entity(cfs_rq, prev); __enqueue_entity(cfs_rq, prev);
@ -774,7 +740,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/* in !on_rq case, update occurred at dequeue */ /* in !on_rq case, update occurred at dequeue */
update_load_avg(cfs_rq, prev, 0); update_load_avg(cfs_rq, prev, 0);
} }
@@ -7735,6 +7804,23 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) @@ -7735,6 +7813,23 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
@ -798,7 +764,7 @@ index b38a1ce1be49..5ef893ce5734 100644
static unsigned long wakeup_gran(struct sched_entity *se) static unsigned long wakeup_gran(struct sched_entity *se)
{ {
unsigned long gran = sysctl_sched_wakeup_granularity; unsigned long gran = sysctl_sched_wakeup_granularity;
@@ -7773,11 +7859,24 @@ static int @@ -7773,11 +7868,24 @@ static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{ {
s64 gran, vdiff = curr->vruntime - se->vruntime; s64 gran, vdiff = curr->vruntime - se->vruntime;
@ -825,17 +791,17 @@ index b38a1ce1be49..5ef893ce5734 100644
if (vdiff > gran) if (vdiff > gran)
return 1; return 1;
@@ -11995,6 +12094,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) @@ -11995,6 +12103,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
delta = (s64)(sea->vruntime - seb->vruntime) + delta = (s64)(sea->vruntime - seb->vruntime) +
(s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi); (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+ /* Take into account latency prio */ + /* Take into account latency offset */
+ delta -= wakeup_latency_gran(sea, seb); + delta -= wakeup_latency_gran(sea, seb);
+ +
return delta > 0; return delta > 0;
} }
#else #else
@@ -12265,6 +12367,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) @@ -12265,6 +12376,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq) void init_cfs_rq(struct cfs_rq *cfs_rq)
{ {
cfs_rq->tasks_timeline = RB_ROOT_CACHED; cfs_rq->tasks_timeline = RB_ROOT_CACHED;
@ -843,56 +809,60 @@ index b38a1ce1be49..5ef893ce5734 100644
u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20))); u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
raw_spin_lock_init(&cfs_rq->removed.lock); raw_spin_lock_init(&cfs_rq->removed.lock);
@@ -12320,6 +12423,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) @@ -12320,6 +12432,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
goto err; goto err;
tg->shares = NICE_0_LOAD; tg->shares = NICE_0_LOAD;
+ tg->latency_offset = 0; + tg->latency_prio = DEFAULT_LATENCY_PRIO;
init_cfs_bandwidth(tg_cfs_bandwidth(tg)); init_cfs_bandwidth(tg_cfs_bandwidth(tg));
@@ -12418,6 +12522,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, @@ -12418,6 +12531,10 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
} }
se->my_q = cfs_rq; se->my_q = cfs_rq;
+ +
+ se->latency_offset = tg->latency_offset; + se->latency_offset = calc_latency_offset(tg->latency_prio);
+ RB_CLEAR_NODE(&se->latency_node);
+ +
/* guarantee group entities always have weight */ /* guarantee group entities always have weight */
update_load_set(&se->load, NICE_0_LOAD); update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent; se->parent = parent;
@@ -12548,6 +12655,42 @@ int sched_group_set_idle(struct task_group *tg, long idle) @@ -12548,6 +12665,45 @@ int sched_group_set_idle(struct task_group *tg, long idle)
return 0; return 0;
} }
+int sched_group_set_latency(struct task_group *tg, s64 latency) +int sched_group_set_latency(struct task_group *tg, int prio)
+{ +{
+ long latency_offset;
+ int i; + int i;
+ +
+ if (tg == &root_task_group) + if (tg == &root_task_group)
+ return -EINVAL; + return -EINVAL;
+ +
+ if (abs(latency) > sysctl_sched_latency)
+ return -EINVAL;
+
+ mutex_lock(&shares_mutex); + mutex_lock(&shares_mutex);
+ +
+ if (tg->latency_offset == latency) { + if (tg->latency_prio == prio) {
+ mutex_unlock(&shares_mutex); + mutex_unlock(&shares_mutex);
+ return 0; + return 0;
+ } + }
+ +
+ tg->latency_offset = latency; + tg->latency_prio = prio;
+ latency_offset = calc_latency_offset(prio);
+ +
+ for_each_possible_cpu(i) { + for_each_possible_cpu(i) {
+ struct sched_entity *se = tg->se[i]; + struct sched_entity *se = tg->se[i];
+ struct rq *rq = cpu_rq(i); + struct rq *rq = cpu_rq(i);
+ struct rq_flags rf; + struct rq_flags rf;
+ bool queued;
+ +
+ rq_lock_irqsave(rq, &rf); + rq_lock_irqsave(rq, &rf);
+ +
+ __dequeue_latency(se->cfs_rq, se); + queued = __dequeue_latency(se->cfs_rq, se);
+ WRITE_ONCE(se->latency_offset, latency); + WRITE_ONCE(se->latency_offset, latency_offset);
+ if (queued)
+ __enqueue_latency(se->cfs_rq, se, ENQUEUE_WAKEUP);
+
+ +
+ rq_unlock_irqrestore(rq, &rf); + rq_unlock_irqrestore(rq, &rf);
+ } + }
@ -905,40 +875,28 @@ index b38a1ce1be49..5ef893ce5734 100644
void free_fair_sched_group(struct task_group *tg) { } void free_fair_sched_group(struct task_group *tg) { }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9e8bb6278604..c47198dbf740 100644 index 9e8bb6278604..a9fedf20c869 100644
--- a/kernel/sched/sched.h --- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -125,6 +125,11 @@ extern int sched_rr_timeslice; @@ -378,6 +378,8 @@ struct task_group {
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
+/* Maximum nice latency weight used to scale the latency_offset */
+
+#define NICE_LATENCY_SHIFT (SCHED_FIXEDPOINT_SHIFT)
+#define NICE_LATENCY_WEIGHT_MAX (1L << NICE_LATENCY_SHIFT)
+
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
* The extra resolution improves shares distribution and load balancing of
@@ -378,6 +383,8 @@ struct task_group {
/* A positive value indicates that this is a SCHED_IDLE group. */ /* A positive value indicates that this is a SCHED_IDLE group. */
int idle; int idle;
+ /* latency constraint of the group. */ + /* latency priority of the group. */
+ int latency_offset; + int latency_prio;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
@@ -488,6 +495,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); @@ -488,6 +490,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern int sched_group_set_idle(struct task_group *tg, long idle); extern int sched_group_set_idle(struct task_group *tg, long idle);
+extern int sched_group_set_latency(struct task_group *tg, s64 latency); +extern int sched_group_set_latency(struct task_group *tg, int prio);
+ +
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void set_task_rq_fair(struct sched_entity *se, extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next); struct cfs_rq *prev, struct cfs_rq *next);
@@ -566,6 +575,7 @@ struct cfs_rq { @@ -566,6 +570,7 @@ struct cfs_rq {
#endif #endif
struct rb_root_cached tasks_timeline; struct rb_root_cached tasks_timeline;
@ -946,15 +904,7 @@ index 9e8bb6278604..c47198dbf740 100644
/* /*
* 'curr' points to currently running entity on this cfs_rq. * 'curr' points to currently running entity on this cfs_rq.
@@ -2123,6 +2133,7 @@ static_assert(WF_TTWU == SD_BALANCE_WAKE); @@ -2461,9 +2466,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const int sched_prio_to_weight[40];
extern const u32 sched_prio_to_wmult[40];
+extern const int sched_latency_to_weight[40];
/*
* {de,en}queue flags:
@@ -2461,9 +2472,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate; extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost; extern const_debug unsigned int sysctl_sched_migration_cost;
@ -965,7 +915,7 @@ index 9e8bb6278604..c47198dbf740 100644
extern unsigned int sysctl_sched_idle_min_granularity; extern unsigned int sysctl_sched_idle_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_wakeup_granularity;
extern int sysctl_resched_latency_warn_ms; extern int sysctl_resched_latency_warn_ms;
@@ -2478,6 +2489,38 @@ extern unsigned int sysctl_numa_balancing_scan_size; @@ -2478,6 +2483,49 @@ extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_hot_threshold; extern unsigned int sysctl_numa_balancing_hot_threshold;
#endif #endif
@ -988,6 +938,17 @@ index 9e8bb6278604..c47198dbf740 100644
+ return thresh; + return thresh;
+} +}
+ +
+/*
+ * Calculate the latency offset for a priority level.
+ * We use a linear mapping of the priority in the range:
+ * [-sysctl_sched_latency:sysctl_sched_latency]
+ */
+static inline long calc_latency_offset(int prio)
+{
+ return (long)get_sleep_latency(false) * LATENCY_TO_NICE(prio) /
+ (LATENCY_NICE_WIDTH/2);
+}
+
+static inline unsigned long get_latency_max(void) +static inline unsigned long get_latency_max(void)
+{ +{
+ unsigned long thresh = get_sleep_latency(false); + unsigned long thresh = get_sleep_latency(false);

View File

@ -2,7 +2,7 @@
echo "Pika Kernel - Getting source" echo "Pika Kernel - Getting source"
wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.2.tar.gz wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.2.1.tar.gz
tar -zxf ./linux-6.2.tar.gz tar -zxf ./linux-6.2.1.tar.gz
cd linux-6.2 cd linux-6.2.1