Update to 6.2.1 and update patches for it

This commit is contained in:
ferrreo 2023-02-25 20:36:40 +00:00
parent 185ea71a08
commit 99e843f61e
3 changed files with 874 additions and 350 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
From 78440b24f24a021daf660c0bd212c936e50e5f0a Mon Sep 17 00:00:00 2001
From 10300b929dc0a52e458b6bcd9af801f6df967d42 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Fri, 17 Feb 2023 15:38:09 +0100
Date: Fri, 24 Feb 2023 11:16:15 +0100
Subject: [PATCH] Add latency priority for CFS class
This patchset restarts the work about adding a latency priority to describe
@ -38,8 +38,6 @@ sensitive task (priority < 0) is preempted by high priority task (RT/DL)
or fails to preempt them. This patch ensures that tasks will have at least
a slice of sched_min_granularity in priority at wakeup.
Patch [9] removes useless check after adding a latency rb tree.
I have also backported the patchset on a dragonboard RB3 with an android
mainline kernel based on v5.18 for a quick test. I have used the
TouchLatency app which is part of AOSP and described to be a very good
@ -69,32 +67,40 @@ reconsider the augmented rbtree once the use of negative latency_nice will
be more widlely deployed. At now, the different tests that I have done,
have not shown improvements with augmented rbtree.
Below are some hackbench results:
Below are some hackbench results (from v10):
2 rbtrees augmented rbtree augmented rbtree
sorted by vruntime sorted by wakeup_vruntime
sched pipe
sched pipe
avg 26311,000 25976,667 25839,556
stdev 0,15 % 0,28 % 0,24 %
vs tip 0,50 % -0,78 % -1,31 %
hackbench 1 group
hackbench 1 group
avg 1,315 1,344 1,359
stdev 0,88 % 1,55 % 1,82 %
vs tip -0,47 % -2,68 % -3,87 %
hackbench 4 groups
hackbench 4 groups
avg 1,339 1,365 1,367
stdev 2,39 % 2,26 % 3,58 %
vs tip -0,08 % -2,01 % -2,22 %
hackbench 8 groups
hackbench 8 groups
avg 1,233 1,286 1,301
stdev 0,74 % 1,09 % 1,52 %
vs tip 0,29 % -4,05 % -5,27 %
hackbench 16 groups
hackbench 16 groups
avg 1,268 1,313 1,319
stdev 0,85 % 1,60 % 0,68 %
vs tip -0,02 % -3,56 % -4,01 %
[1] https://source.android.com/docs/core/debug/eval_perf#touchlatency
Change since v11:
- init latency_node of task group entity
Change since v10:
- remove sched_latency_to_weight array and use a calc_latency_offset() instead
- save latency_prio instead for task group instead of latency offset
- enqueue back an entity when changing the latency nice prio fo a task group
Change since v9:
- Rebase
- add tags
@ -156,13 +162,12 @@ Parth Shah (3):
task
sched: Allow sched_{get,set}attr to change latency_nice of the task
Vincent Guittot (6):
Vincent Guittot (5):
sched/fair: fix unfairness at wakeup
sched/fair: Take into account latency priority at wakeup
sched/fair: Add sched group latency support
sched/core: Support latency priority with sched core
sched/fair: Add latency list
sched/fair: remove check_preempt_from_others
Signed-off-by: Peter Jung <admin@ptr1337.dev>
---
@ -170,14 +175,14 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
include/linux/sched.h | 4 +
include/linux/sched/prio.h | 27 +++
include/uapi/linux/sched.h | 4 +-
include/uapi/linux/sched/types.h | 19 +++
include/uapi/linux/sched/types.h | 19 ++
init/init_task.c | 1 +
kernel/sched/core.c | 106 ++++++++++++
kernel/sched/core.c | 65 +++++++
kernel/sched/debug.c | 1 +
kernel/sched/fair.c | 209 ++++++++++++++++++++----
kernel/sched/sched.h | 45 ++++-
kernel/sched/fair.c | 222 ++++++++++++++++++++----
kernel/sched/sched.h | 50 +++++-
tools/include/uapi/linux/sched.h | 4 +-
11 files changed, 394 insertions(+), 36 deletions(-)
11 files changed, 371 insertions(+), 36 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 74cec76be9f2..2e511d4a4c6a 100644
@ -343,27 +348,22 @@ index ff6c4b9bfe6b..071deff8dbd1 100644
.cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5237639786b7..5d6a283a4da9 100644
index 5237639786b7..e1a9f9898b30 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1283,6 +1283,16 @@ static void set_load_weight(struct task_struct *p, bool update_load)
@@ -1283,6 +1283,11 @@ static void set_load_weight(struct task_struct *p, bool update_load)
}
}
+static void set_latency_offset(struct task_struct *p)
+{
+ long weight = sched_latency_to_weight[p->latency_prio];
+ s64 offset;
+
+ offset = weight * get_sleep_latency(false);
+ offset = div_s64(offset, NICE_LATENCY_WEIGHT_MAX);
+ p->se.latency_offset = (long)offset;
+ p->se.latency_offset = calc_latency_offset(p->latency_prio);
+}
+
#ifdef CONFIG_UCLAMP_TASK
/*
* Serializes updates of utilization clamp values
@@ -4432,6 +4442,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
@@ -4432,6 +4437,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.dur_avg = 0;
p->se.prev_sleep_sum_runtime = 0;
INIT_LIST_HEAD(&p->se.group_node);
@ -371,7 +371,7 @@ index 5237639786b7..5d6a283a4da9 100644
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
@@ -4684,6 +4695,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
@@ -4684,6 +4690,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false);
@ -381,7 +381,7 @@ index 5237639786b7..5d6a283a4da9 100644
/*
* We don't need the reset flag anymore after the fork. It has
* fulfilled its duty:
@@ -7444,6 +7458,16 @@ static void __setscheduler_params(struct task_struct *p,
@@ -7444,6 +7453,16 @@ static void __setscheduler_params(struct task_struct *p,
p->rt_priority = attr->sched_priority;
p->normal_prio = normal_prio(p);
set_load_weight(p, true);
@ -398,7 +398,7 @@ index 5237639786b7..5d6a283a4da9 100644
}
/*
@@ -7586,6 +7610,13 @@ static int __sched_setscheduler(struct task_struct *p,
@@ -7586,6 +7605,13 @@ static int __sched_setscheduler(struct task_struct *p,
return retval;
}
@ -412,7 +412,7 @@ index 5237639786b7..5d6a283a4da9 100644
if (pi)
cpuset_read_lock();
@@ -7620,6 +7651,9 @@ static int __sched_setscheduler(struct task_struct *p,
@@ -7620,6 +7646,9 @@ static int __sched_setscheduler(struct task_struct *p,
goto change;
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
goto change;
@ -422,7 +422,7 @@ index 5237639786b7..5d6a283a4da9 100644
p->sched_reset_on_fork = reset_on_fork;
retval = 0;
@@ -7708,6 +7742,7 @@ static int __sched_setscheduler(struct task_struct *p,
@@ -7708,6 +7737,7 @@ static int __sched_setscheduler(struct task_struct *p,
__setscheduler_params(p, attr);
__setscheduler_prio(p, newprio);
}
@ -430,7 +430,7 @@ index 5237639786b7..5d6a283a4da9 100644
__setscheduler_uclamp(p, attr);
if (queued) {
@@ -7918,6 +7953,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
@@ -7918,6 +7948,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
size < SCHED_ATTR_SIZE_VER1)
return -EINVAL;
@ -440,7 +440,7 @@ index 5237639786b7..5d6a283a4da9 100644
/*
* XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values?
@@ -8155,6 +8193,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
@@ -8155,6 +8188,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
get_params(p, &kattr);
kattr.sched_flags &= SCHED_FLAG_ALL;
@ -449,7 +449,7 @@ index 5237639786b7..5d6a283a4da9 100644
#ifdef CONFIG_UCLAMP_TASK
/*
* This could race with another potential updater, but this is fine
@@ -11027,6 +11067,47 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
@@ -11027,6 +11062,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
{
return sched_group_set_idle(css_tg(css), idle);
}
@ -457,47 +457,25 @@ index 5237639786b7..5d6a283a4da9 100644
+static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ int prio, delta, last_delta = INT_MAX;
+ s64 weight;
+
+ weight = css_tg(css)->latency_offset * NICE_LATENCY_WEIGHT_MAX;
+ weight = div_s64(weight, get_sleep_latency(false));
+
+ /* Find the closest nice value to the current weight */
+ for (prio = 0; prio < ARRAY_SIZE(sched_latency_to_weight); prio++) {
+ delta = abs(sched_latency_to_weight[prio] - weight);
+ if (delta >= last_delta)
+ break;
+ last_delta = delta;
+ }
+
+ return LATENCY_TO_NICE(prio-1);
+ return LATENCY_TO_NICE(css_tg(css)->latency_prio);
+}
+
+static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 nice)
+{
+ s64 latency_offset;
+ long weight;
+ int idx;
+ int prio;
+
+ if (nice < MIN_LATENCY_NICE || nice > MAX_LATENCY_NICE)
+ return -ERANGE;
+
+ idx = NICE_TO_LATENCY(nice);
+ idx = array_index_nospec(idx, LATENCY_NICE_WIDTH);
+ weight = sched_latency_to_weight[idx];
+ prio = NICE_TO_LATENCY(nice);
+
+ latency_offset = weight * get_sleep_latency(false);
+ latency_offset = div_s64(latency_offset, NICE_LATENCY_WEIGHT_MAX);
+
+ return sched_group_set_latency(css_tg(css), latency_offset);
+ return sched_group_set_latency(css_tg(css), prio);
+}
+
#endif
static struct cftype cpu_legacy_files[] = {
@@ -11041,6 +11122,11 @@ static struct cftype cpu_legacy_files[] = {
@@ -11041,6 +11095,11 @@ static struct cftype cpu_legacy_files[] = {
.read_s64 = cpu_idle_read_s64,
.write_s64 = cpu_idle_write_s64,
},
@ -509,7 +487,7 @@ index 5237639786b7..5d6a283a4da9 100644
#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
@@ -11258,6 +11344,12 @@ static struct cftype cpu_files[] = {
@@ -11258,6 +11317,12 @@ static struct cftype cpu_files[] = {
.read_s64 = cpu_idle_read_s64,
.write_s64 = cpu_idle_write_s64,
},
@ -522,27 +500,6 @@ index 5237639786b7..5d6a283a4da9 100644
#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
@@ -11368,6 +11460,20 @@ const u32 sched_prio_to_wmult[40] = {
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
+/*
+ * latency weight for wakeup preemption
+ */
+const int sched_latency_to_weight[40] = {
+ /* -20 */ -1024, -973, -922, -870, -819,
+ /* -15 */ -768, -717, -666, -614, -563,
+ /* -10 */ -512, -461, -410, -358, -307,
+ /* -5 */ -256, -205, -154, -102, -51,
+ /* 0 */ 0, 51, 102, 154, 205,
+ /* 5 */ 256, 307, 358, 410, 461,
+ /* 10 */ 512, 563, 614, 666, 717,
+ /* 15 */ 768, 819, 870, 922, 973,
+};
+
void call_trace_sched_update_nr_running(struct rq *rq, int count)
{
trace_sched_update_nr_running_tp(rq, count);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8d64fba16cfe..177934290ec4 100644
--- a/kernel/sched/debug.c
@ -556,10 +513,10 @@ index 8d64fba16cfe..177934290ec4 100644
P(dl.runtime);
P(dl.deadline);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b38a1ce1be49..5ef893ce5734 100644
index b38a1ce1be49..e0a5049f6b80 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -698,7 +698,76 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
@@ -698,7 +698,85 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
return __node_2_se(last);
}
@ -598,10 +555,12 @@ index b38a1ce1be49..5ef893ce5734 100644
+ return;
+
+ /*
+ * An execution time less than sysctl_sched_min_granularity means that
+ * The entity is always added the latency list at wakeup.
+ * Then, a not waking up entity that is put back in the list after an
+ * execution time less than sysctl_sched_min_granularity, means that
+ * the entity has been preempted by a higher sched class or an entity
+ * with higher latency constraint.
+ * Put it back in the list so it gets a chance to run 1st during the
+ * with higher latency constraint. In thi case, the entity is also put
+ * back in the latency list so it gets a chance to run 1st during the
+ * next slice.
+ */
+ if (!(flags & ENQUEUE_WAKEUP)) {
@ -610,16 +569,23 @@ index b38a1ce1be49..5ef893ce5734 100644
+ if (delta_exec >= sysctl_sched_min_granularity)
+ return;
+ }
+
+ rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less);
+}
+
+static void __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
+/*
+ * Dequeue an entity from the latency rb-tree and return true if it was really
+ * part of the rb-tree:
+ */
+static bool __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if (!RB_EMPTY_NODE(&se->latency_node)) {
+ rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline);
+ RB_CLEAR_NODE(&se->latency_node);
+ return true;
+ }
+
+ return false;
+}
+
+static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq)
@ -631,12 +597,12 @@ index b38a1ce1be49..5ef893ce5734 100644
+
+ return __latency_node_2_se(left);
+}
+
+#ifdef CONFIG_SCHED_DEBUG
/**************************************************************
* Scheduling class statistics methods:
*/
@@ -4672,33 +4741,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
@@ -4672,33 +4750,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
u64 vruntime = cfs_rq->min_vruntime;
u64 sleep_time;
@ -679,7 +645,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/*
* Pull vruntime of the entity being placed to the base level of
@@ -4792,8 +4845,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -4792,8 +4854,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
check_schedstat_required();
update_stats_enqueue_fair(cfs_rq, se, flags);
check_spread(cfs_rq, se);
@ -691,7 +657,7 @@ index b38a1ce1be49..5ef893ce5734 100644
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
@@ -4879,8 +4934,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -4879,8 +4943,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se);
@ -703,7 +669,7 @@ index b38a1ce1be49..5ef893ce5734 100644
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
@@ -4911,6 +4968,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -4911,6 +4977,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_idle_cfs_rq_clock_pelt(cfs_rq);
}
@ -712,7 +678,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/*
* Preempt the current task with a newly woken task if needed:
*/
@@ -4919,7 +4978,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@ -4919,7 +4987,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
unsigned long ideal_runtime, delta_exec;
struct sched_entity *se;
@ -721,7 +687,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/*
* When many tasks blow up the sched_period; it is possible that
@@ -4950,10 +5009,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@ -4950,10 +5018,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
se = __pick_first_entity(cfs_rq);
delta = curr->vruntime - se->vruntime;
@ -736,7 +702,7 @@ index b38a1ce1be49..5ef893ce5734 100644
resched_curr(rq_of(cfs_rq));
}
@@ -4971,6 +5032,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -4971,6 +5041,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
update_stats_wait_end_fair(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
@ -744,7 +710,7 @@ index b38a1ce1be49..5ef893ce5734 100644
update_load_avg(cfs_rq, se, UPDATE_TG);
}
@@ -5009,7 +5071,7 @@ static struct sched_entity *
@@ -5009,7 +5080,7 @@ static struct sched_entity *
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
struct sched_entity *left = __pick_first_entity(cfs_rq);
@ -753,7 +719,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/*
* If curr is set we have to see if its left of the leftmost entity
@@ -5051,6 +5113,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@ -5051,6 +5122,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
se = cfs_rq->last;
}
@ -766,7 +732,7 @@ index b38a1ce1be49..5ef893ce5734 100644
return se;
}
@@ -5074,6 +5142,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
@@ -5074,6 +5151,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
update_stats_wait_start_fair(cfs_rq, prev);
/* Put 'current' back into the tree. */
__enqueue_entity(cfs_rq, prev);
@ -774,7 +740,7 @@ index b38a1ce1be49..5ef893ce5734 100644
/* in !on_rq case, update occurred at dequeue */
update_load_avg(cfs_rq, prev, 0);
}
@@ -7735,6 +7804,23 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
@@ -7735,6 +7813,23 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
#endif /* CONFIG_SMP */
@ -798,7 +764,7 @@ index b38a1ce1be49..5ef893ce5734 100644
static unsigned long wakeup_gran(struct sched_entity *se)
{
unsigned long gran = sysctl_sched_wakeup_granularity;
@@ -7773,11 +7859,24 @@ static int
@@ -7773,11 +7868,24 @@ static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{
s64 gran, vdiff = curr->vruntime - se->vruntime;
@ -825,17 +791,17 @@ index b38a1ce1be49..5ef893ce5734 100644
if (vdiff > gran)
return 1;
@@ -11995,6 +12094,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
@@ -11995,6 +12103,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
delta = (s64)(sea->vruntime - seb->vruntime) +
(s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+ /* Take into account latency prio */
+ /* Take into account latency offset */
+ delta -= wakeup_latency_gran(sea, seb);
+
return delta > 0;
}
#else
@@ -12265,6 +12367,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
@@ -12265,6 +12376,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
@ -843,56 +809,60 @@ index b38a1ce1be49..5ef893ce5734 100644
u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
#ifdef CONFIG_SMP
raw_spin_lock_init(&cfs_rq->removed.lock);
@@ -12320,6 +12423,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
@@ -12320,6 +12432,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
goto err;
tg->shares = NICE_0_LOAD;
+ tg->latency_offset = 0;
+ tg->latency_prio = DEFAULT_LATENCY_PRIO;
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
@@ -12418,6 +12522,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
@@ -12418,6 +12531,10 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
}
se->my_q = cfs_rq;
+
+ se->latency_offset = tg->latency_offset;
+ se->latency_offset = calc_latency_offset(tg->latency_prio);
+ RB_CLEAR_NODE(&se->latency_node);
+
/* guarantee group entities always have weight */
update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent;
@@ -12548,6 +12655,42 @@ int sched_group_set_idle(struct task_group *tg, long idle)
@@ -12548,6 +12665,45 @@ int sched_group_set_idle(struct task_group *tg, long idle)
return 0;
}
+int sched_group_set_latency(struct task_group *tg, s64 latency)
+int sched_group_set_latency(struct task_group *tg, int prio)
+{
+ long latency_offset;
+ int i;
+
+ if (tg == &root_task_group)
+ return -EINVAL;
+
+ if (abs(latency) > sysctl_sched_latency)
+ return -EINVAL;
+
+ mutex_lock(&shares_mutex);
+
+ if (tg->latency_offset == latency) {
+ if (tg->latency_prio == prio) {
+ mutex_unlock(&shares_mutex);
+ return 0;
+ }
+
+ tg->latency_offset = latency;
+ tg->latency_prio = prio;
+ latency_offset = calc_latency_offset(prio);
+
+ for_each_possible_cpu(i) {
+ struct sched_entity *se = tg->se[i];
+ struct rq *rq = cpu_rq(i);
+ struct rq_flags rf;
+ bool queued;
+
+ rq_lock_irqsave(rq, &rf);
+
+ __dequeue_latency(se->cfs_rq, se);
+ WRITE_ONCE(se->latency_offset, latency);
+ queued = __dequeue_latency(se->cfs_rq, se);
+ WRITE_ONCE(se->latency_offset, latency_offset);
+ if (queued)
+ __enqueue_latency(se->cfs_rq, se, ENQUEUE_WAKEUP);
+
+
+ rq_unlock_irqrestore(rq, &rf);
+ }
@ -905,40 +875,28 @@ index b38a1ce1be49..5ef893ce5734 100644
void free_fair_sched_group(struct task_group *tg) { }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9e8bb6278604..c47198dbf740 100644
index 9e8bb6278604..a9fedf20c869 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -125,6 +125,11 @@ extern int sched_rr_timeslice;
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
+/* Maximum nice latency weight used to scale the latency_offset */
+
+#define NICE_LATENCY_SHIFT (SCHED_FIXEDPOINT_SHIFT)
+#define NICE_LATENCY_WEIGHT_MAX (1L << NICE_LATENCY_SHIFT)
+
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
* The extra resolution improves shares distribution and load balancing of
@@ -378,6 +383,8 @@ struct task_group {
@@ -378,6 +378,8 @@ struct task_group {
/* A positive value indicates that this is a SCHED_IDLE group. */
int idle;
+ /* latency constraint of the group. */
+ int latency_offset;
+ /* latency priority of the group. */
+ int latency_prio;
#ifdef CONFIG_SMP
/*
@@ -488,6 +495,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
@@ -488,6 +490,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern int sched_group_set_idle(struct task_group *tg, long idle);
+extern int sched_group_set_latency(struct task_group *tg, s64 latency);
+extern int sched_group_set_latency(struct task_group *tg, int prio);
+
#ifdef CONFIG_SMP
extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next);
@@ -566,6 +575,7 @@ struct cfs_rq {
@@ -566,6 +570,7 @@ struct cfs_rq {
#endif
struct rb_root_cached tasks_timeline;
@ -946,15 +904,7 @@ index 9e8bb6278604..c47198dbf740 100644
/*
* 'curr' points to currently running entity on this cfs_rq.
@@ -2123,6 +2133,7 @@ static_assert(WF_TTWU == SD_BALANCE_WAKE);
extern const int sched_prio_to_weight[40];
extern const u32 sched_prio_to_wmult[40];
+extern const int sched_latency_to_weight[40];
/*
* {de,en}queue flags:
@@ -2461,9 +2472,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
@@ -2461,9 +2466,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
@ -965,7 +915,7 @@ index 9e8bb6278604..c47198dbf740 100644
extern unsigned int sysctl_sched_idle_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern int sysctl_resched_latency_warn_ms;
@@ -2478,6 +2489,38 @@ extern unsigned int sysctl_numa_balancing_scan_size;
@@ -2478,6 +2483,49 @@ extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_hot_threshold;
#endif
@ -988,6 +938,17 @@ index 9e8bb6278604..c47198dbf740 100644
+ return thresh;
+}
+
+/*
+ * Calculate the latency offset for a priority level.
+ * We use a linear mapping of the priority in the range:
+ * [-sysctl_sched_latency:sysctl_sched_latency]
+ */
+static inline long calc_latency_offset(int prio)
+{
+ return (long)get_sleep_latency(false) * LATENCY_TO_NICE(prio) /
+ (LATENCY_NICE_WIDTH/2);
+}
+
+static inline unsigned long get_latency_max(void)
+{
+ unsigned long thresh = get_sleep_latency(false);

View File

@ -2,7 +2,7 @@
echo "Pika Kernel - Getting source"
wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.2.tar.gz
tar -zxf ./linux-6.2.tar.gz
wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.2.1.tar.gz
tar -zxf ./linux-6.2.1.tar.gz
cd linux-6.2
cd linux-6.2.1