Update to 6.2.1 and update patches for it
This commit is contained in:
parent
185ea71a08
commit
99e843f61e
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
From 78440b24f24a021daf660c0bd212c936e50e5f0a Mon Sep 17 00:00:00 2001
|
||||
From 10300b929dc0a52e458b6bcd9af801f6df967d42 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Jung <admin@ptr1337.dev>
|
||||
Date: Fri, 17 Feb 2023 15:38:09 +0100
|
||||
Date: Fri, 24 Feb 2023 11:16:15 +0100
|
||||
Subject: [PATCH] Add latency priority for CFS class
|
||||
|
||||
This patchset restarts the work about adding a latency priority to describe
|
||||
@ -38,8 +38,6 @@ sensitive task (priority < 0) is preempted by high priority task (RT/DL)
|
||||
or fails to preempt them. This patch ensures that tasks will have at least
|
||||
a slice of sched_min_granularity in priority at wakeup.
|
||||
|
||||
Patch [9] removes useless check after adding a latency rb tree.
|
||||
|
||||
I have also backported the patchset on a dragonboard RB3 with an android
|
||||
mainline kernel based on v5.18 for a quick test. I have used the
|
||||
TouchLatency app which is part of AOSP and described to be a very good
|
||||
@ -69,7 +67,7 @@ reconsider the augmented rbtree once the use of negative latency_nice will
|
||||
be more widlely deployed. At now, the different tests that I have done,
|
||||
have not shown improvements with augmented rbtree.
|
||||
|
||||
Below are some hackbench results:
|
||||
Below are some hackbench results (from v10):
|
||||
2 rbtrees augmented rbtree augmented rbtree
|
||||
sorted by vruntime sorted by wakeup_vruntime
|
||||
sched pipe
|
||||
@ -95,6 +93,14 @@ vs tip -0,02 % -3,56 % -4,01 %
|
||||
|
||||
[1] https://source.android.com/docs/core/debug/eval_perf#touchlatency
|
||||
|
||||
Change since v11:
|
||||
- init latency_node of task group entity
|
||||
|
||||
Change since v10:
|
||||
- remove sched_latency_to_weight array and use a calc_latency_offset() instead
|
||||
- save latency_prio instead for task group instead of latency offset
|
||||
- enqueue back an entity when changing the latency nice prio fo a task group
|
||||
|
||||
Change since v9:
|
||||
- Rebase
|
||||
- add tags
|
||||
@ -156,13 +162,12 @@ Parth Shah (3):
|
||||
task
|
||||
sched: Allow sched_{get,set}attr to change latency_nice of the task
|
||||
|
||||
Vincent Guittot (6):
|
||||
Vincent Guittot (5):
|
||||
sched/fair: fix unfairness at wakeup
|
||||
sched/fair: Take into account latency priority at wakeup
|
||||
sched/fair: Add sched group latency support
|
||||
sched/core: Support latency priority with sched core
|
||||
sched/fair: Add latency list
|
||||
sched/fair: remove check_preempt_from_others
|
||||
|
||||
Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||||
---
|
||||
@ -170,14 +175,14 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||||
include/linux/sched.h | 4 +
|
||||
include/linux/sched/prio.h | 27 +++
|
||||
include/uapi/linux/sched.h | 4 +-
|
||||
include/uapi/linux/sched/types.h | 19 +++
|
||||
include/uapi/linux/sched/types.h | 19 ++
|
||||
init/init_task.c | 1 +
|
||||
kernel/sched/core.c | 106 ++++++++++++
|
||||
kernel/sched/core.c | 65 +++++++
|
||||
kernel/sched/debug.c | 1 +
|
||||
kernel/sched/fair.c | 209 ++++++++++++++++++++----
|
||||
kernel/sched/sched.h | 45 ++++-
|
||||
kernel/sched/fair.c | 222 ++++++++++++++++++++----
|
||||
kernel/sched/sched.h | 50 +++++-
|
||||
tools/include/uapi/linux/sched.h | 4 +-
|
||||
11 files changed, 394 insertions(+), 36 deletions(-)
|
||||
11 files changed, 371 insertions(+), 36 deletions(-)
|
||||
|
||||
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
|
||||
index 74cec76be9f2..2e511d4a4c6a 100644
|
||||
@ -343,27 +348,22 @@ index ff6c4b9bfe6b..071deff8dbd1 100644
|
||||
.cpus_ptr = &init_task.cpus_mask,
|
||||
.user_cpus_ptr = NULL,
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 5237639786b7..5d6a283a4da9 100644
|
||||
index 5237639786b7..e1a9f9898b30 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -1283,6 +1283,16 @@ static void set_load_weight(struct task_struct *p, bool update_load)
|
||||
@@ -1283,6 +1283,11 @@ static void set_load_weight(struct task_struct *p, bool update_load)
|
||||
}
|
||||
}
|
||||
|
||||
+static void set_latency_offset(struct task_struct *p)
|
||||
+{
|
||||
+ long weight = sched_latency_to_weight[p->latency_prio];
|
||||
+ s64 offset;
|
||||
+
|
||||
+ offset = weight * get_sleep_latency(false);
|
||||
+ offset = div_s64(offset, NICE_LATENCY_WEIGHT_MAX);
|
||||
+ p->se.latency_offset = (long)offset;
|
||||
+ p->se.latency_offset = calc_latency_offset(p->latency_prio);
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/*
|
||||
* Serializes updates of utilization clamp values
|
||||
@@ -4432,6 +4442,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
@@ -4432,6 +4437,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
p->se.dur_avg = 0;
|
||||
p->se.prev_sleep_sum_runtime = 0;
|
||||
INIT_LIST_HEAD(&p->se.group_node);
|
||||
@ -371,7 +371,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
p->se.cfs_rq = NULL;
|
||||
@@ -4684,6 +4695,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
@@ -4684,6 +4690,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
p->prio = p->normal_prio = p->static_prio;
|
||||
set_load_weight(p, false);
|
||||
|
||||
@ -381,7 +381,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
/*
|
||||
* We don't need the reset flag anymore after the fork. It has
|
||||
* fulfilled its duty:
|
||||
@@ -7444,6 +7458,16 @@ static void __setscheduler_params(struct task_struct *p,
|
||||
@@ -7444,6 +7453,16 @@ static void __setscheduler_params(struct task_struct *p,
|
||||
p->rt_priority = attr->sched_priority;
|
||||
p->normal_prio = normal_prio(p);
|
||||
set_load_weight(p, true);
|
||||
@ -398,7 +398,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -7586,6 +7610,13 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
@@ -7586,6 +7605,13 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -412,7 +412,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
if (pi)
|
||||
cpuset_read_lock();
|
||||
|
||||
@@ -7620,6 +7651,9 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
@@ -7620,6 +7646,9 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
goto change;
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
|
||||
goto change;
|
||||
@ -422,7 +422,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
|
||||
p->sched_reset_on_fork = reset_on_fork;
|
||||
retval = 0;
|
||||
@@ -7708,6 +7742,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
@@ -7708,6 +7737,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
__setscheduler_params(p, attr);
|
||||
__setscheduler_prio(p, newprio);
|
||||
}
|
||||
@ -430,7 +430,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
__setscheduler_uclamp(p, attr);
|
||||
|
||||
if (queued) {
|
||||
@@ -7918,6 +7953,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
|
||||
@@ -7918,6 +7948,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
|
||||
size < SCHED_ATTR_SIZE_VER1)
|
||||
return -EINVAL;
|
||||
|
||||
@ -440,7 +440,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
/*
|
||||
* XXX: Do we want to be lenient like existing syscalls; or do we want
|
||||
* to be strict and return an error on out-of-bounds values?
|
||||
@@ -8155,6 +8193,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
|
||||
@@ -8155,6 +8188,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
|
||||
get_params(p, &kattr);
|
||||
kattr.sched_flags &= SCHED_FLAG_ALL;
|
||||
|
||||
@ -449,7 +449,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/*
|
||||
* This could race with another potential updater, but this is fine
|
||||
@@ -11027,6 +11067,47 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
|
||||
@@ -11027,6 +11062,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
|
||||
{
|
||||
return sched_group_set_idle(css_tg(css), idle);
|
||||
}
|
||||
@ -457,47 +457,25 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
+static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css,
|
||||
+ struct cftype *cft)
|
||||
+{
|
||||
+ int prio, delta, last_delta = INT_MAX;
|
||||
+ s64 weight;
|
||||
+
|
||||
+ weight = css_tg(css)->latency_offset * NICE_LATENCY_WEIGHT_MAX;
|
||||
+ weight = div_s64(weight, get_sleep_latency(false));
|
||||
+
|
||||
+ /* Find the closest nice value to the current weight */
|
||||
+ for (prio = 0; prio < ARRAY_SIZE(sched_latency_to_weight); prio++) {
|
||||
+ delta = abs(sched_latency_to_weight[prio] - weight);
|
||||
+ if (delta >= last_delta)
|
||||
+ break;
|
||||
+ last_delta = delta;
|
||||
+ }
|
||||
+
|
||||
+ return LATENCY_TO_NICE(prio-1);
|
||||
+ return LATENCY_TO_NICE(css_tg(css)->latency_prio);
|
||||
+}
|
||||
+
|
||||
+static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css,
|
||||
+ struct cftype *cft, s64 nice)
|
||||
+{
|
||||
+ s64 latency_offset;
|
||||
+ long weight;
|
||||
+ int idx;
|
||||
+ int prio;
|
||||
+
|
||||
+ if (nice < MIN_LATENCY_NICE || nice > MAX_LATENCY_NICE)
|
||||
+ return -ERANGE;
|
||||
+
|
||||
+ idx = NICE_TO_LATENCY(nice);
|
||||
+ idx = array_index_nospec(idx, LATENCY_NICE_WIDTH);
|
||||
+ weight = sched_latency_to_weight[idx];
|
||||
+ prio = NICE_TO_LATENCY(nice);
|
||||
+
|
||||
+ latency_offset = weight * get_sleep_latency(false);
|
||||
+ latency_offset = div_s64(latency_offset, NICE_LATENCY_WEIGHT_MAX);
|
||||
+
|
||||
+ return sched_group_set_latency(css_tg(css), latency_offset);
|
||||
+ return sched_group_set_latency(css_tg(css), prio);
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
|
||||
static struct cftype cpu_legacy_files[] = {
|
||||
@@ -11041,6 +11122,11 @@ static struct cftype cpu_legacy_files[] = {
|
||||
@@ -11041,6 +11095,11 @@ static struct cftype cpu_legacy_files[] = {
|
||||
.read_s64 = cpu_idle_read_s64,
|
||||
.write_s64 = cpu_idle_write_s64,
|
||||
},
|
||||
@ -509,7 +487,7 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
#endif
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
{
|
||||
@@ -11258,6 +11344,12 @@ static struct cftype cpu_files[] = {
|
||||
@@ -11258,6 +11317,12 @@ static struct cftype cpu_files[] = {
|
||||
.read_s64 = cpu_idle_read_s64,
|
||||
.write_s64 = cpu_idle_write_s64,
|
||||
},
|
||||
@ -522,27 +500,6 @@ index 5237639786b7..5d6a283a4da9 100644
|
||||
#endif
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
{
|
||||
@@ -11368,6 +11460,20 @@ const u32 sched_prio_to_wmult[40] = {
|
||||
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * latency weight for wakeup preemption
|
||||
+ */
|
||||
+const int sched_latency_to_weight[40] = {
|
||||
+ /* -20 */ -1024, -973, -922, -870, -819,
|
||||
+ /* -15 */ -768, -717, -666, -614, -563,
|
||||
+ /* -10 */ -512, -461, -410, -358, -307,
|
||||
+ /* -5 */ -256, -205, -154, -102, -51,
|
||||
+ /* 0 */ 0, 51, 102, 154, 205,
|
||||
+ /* 5 */ 256, 307, 358, 410, 461,
|
||||
+ /* 10 */ 512, 563, 614, 666, 717,
|
||||
+ /* 15 */ 768, 819, 870, 922, 973,
|
||||
+};
|
||||
+
|
||||
void call_trace_sched_update_nr_running(struct rq *rq, int count)
|
||||
{
|
||||
trace_sched_update_nr_running_tp(rq, count);
|
||||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||||
index 8d64fba16cfe..177934290ec4 100644
|
||||
--- a/kernel/sched/debug.c
|
||||
@ -556,10 +513,10 @@ index 8d64fba16cfe..177934290ec4 100644
|
||||
P(dl.runtime);
|
||||
P(dl.deadline);
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index b38a1ce1be49..5ef893ce5734 100644
|
||||
index b38a1ce1be49..e0a5049f6b80 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -698,7 +698,76 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
||||
@@ -698,7 +698,85 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
||||
|
||||
return __node_2_se(last);
|
||||
}
|
||||
@ -598,10 +555,12 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * An execution time less than sysctl_sched_min_granularity means that
|
||||
+ * The entity is always added the latency list at wakeup.
|
||||
+ * Then, a not waking up entity that is put back in the list after an
|
||||
+ * execution time less than sysctl_sched_min_granularity, means that
|
||||
+ * the entity has been preempted by a higher sched class or an entity
|
||||
+ * with higher latency constraint.
|
||||
+ * Put it back in the list so it gets a chance to run 1st during the
|
||||
+ * with higher latency constraint. In thi case, the entity is also put
|
||||
+ * back in the latency list so it gets a chance to run 1st during the
|
||||
+ * next slice.
|
||||
+ */
|
||||
+ if (!(flags & ENQUEUE_WAKEUP)) {
|
||||
@ -610,16 +569,23 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
+ if (delta_exec >= sysctl_sched_min_granularity)
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
|
||||
+ rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less);
|
||||
+}
|
||||
+
|
||||
+static void __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
+/*
|
||||
+ * Dequeue an entity from the latency rb-tree and return true if it was really
|
||||
+ * part of the rb-tree:
|
||||
+ */
|
||||
+static bool __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
+{
|
||||
+ if (!RB_EMPTY_NODE(&se->latency_node)) {
|
||||
+ rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline);
|
||||
+ RB_CLEAR_NODE(&se->latency_node);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq)
|
||||
@ -631,12 +597,12 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
+
|
||||
+ return __latency_node_2_se(left);
|
||||
+}
|
||||
|
||||
+
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
/**************************************************************
|
||||
* Scheduling class statistics methods:
|
||||
*/
|
||||
@@ -4672,33 +4741,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
@@ -4672,33 +4750,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
u64 vruntime = cfs_rq->min_vruntime;
|
||||
u64 sleep_time;
|
||||
|
||||
@ -679,7 +645,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
|
||||
/*
|
||||
* Pull vruntime of the entity being placed to the base level of
|
||||
@@ -4792,8 +4845,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -4792,8 +4854,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
check_schedstat_required();
|
||||
update_stats_enqueue_fair(cfs_rq, se, flags);
|
||||
check_spread(cfs_rq, se);
|
||||
@ -691,7 +657,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
se->on_rq = 1;
|
||||
|
||||
if (cfs_rq->nr_running == 1) {
|
||||
@@ -4879,8 +4934,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -4879,8 +4943,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
|
||||
clear_buddies(cfs_rq, se);
|
||||
|
||||
@ -703,7 +669,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
se->on_rq = 0;
|
||||
account_entity_dequeue(cfs_rq, se);
|
||||
|
||||
@@ -4911,6 +4968,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -4911,6 +4977,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
update_idle_cfs_rq_clock_pelt(cfs_rq);
|
||||
}
|
||||
|
||||
@ -712,7 +678,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
@@ -4919,7 +4978,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
@@ -4919,7 +4987,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
{
|
||||
unsigned long ideal_runtime, delta_exec;
|
||||
struct sched_entity *se;
|
||||
@ -721,7 +687,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
|
||||
/*
|
||||
* When many tasks blow up the sched_period; it is possible that
|
||||
@@ -4950,10 +5009,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
@@ -4950,10 +5018,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
se = __pick_first_entity(cfs_rq);
|
||||
delta = curr->vruntime - se->vruntime;
|
||||
|
||||
@ -736,7 +702,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
resched_curr(rq_of(cfs_rq));
|
||||
}
|
||||
|
||||
@@ -4971,6 +5032,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -4971,6 +5041,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
*/
|
||||
update_stats_wait_end_fair(cfs_rq, se);
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
@ -744,7 +710,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG);
|
||||
}
|
||||
|
||||
@@ -5009,7 +5071,7 @@ static struct sched_entity *
|
||||
@@ -5009,7 +5080,7 @@ static struct sched_entity *
|
||||
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
{
|
||||
struct sched_entity *left = __pick_first_entity(cfs_rq);
|
||||
@ -753,7 +719,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
|
||||
/*
|
||||
* If curr is set we have to see if its left of the leftmost entity
|
||||
@@ -5051,6 +5113,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
@@ -5051,6 +5122,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
se = cfs_rq->last;
|
||||
}
|
||||
|
||||
@ -766,7 +732,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
return se;
|
||||
}
|
||||
|
||||
@@ -5074,6 +5142,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
@@ -5074,6 +5151,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
update_stats_wait_start_fair(cfs_rq, prev);
|
||||
/* Put 'current' back into the tree. */
|
||||
__enqueue_entity(cfs_rq, prev);
|
||||
@ -774,7 +740,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
/* in !on_rq case, update occurred at dequeue */
|
||||
update_load_avg(cfs_rq, prev, 0);
|
||||
}
|
||||
@@ -7735,6 +7804,23 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
@@ -7735,6 +7813,23 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
@ -798,7 +764,7 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
static unsigned long wakeup_gran(struct sched_entity *se)
|
||||
{
|
||||
unsigned long gran = sysctl_sched_wakeup_granularity;
|
||||
@@ -7773,11 +7859,24 @@ static int
|
||||
@@ -7773,11 +7868,24 @@ static int
|
||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
||||
{
|
||||
s64 gran, vdiff = curr->vruntime - se->vruntime;
|
||||
@ -825,17 +791,17 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
if (vdiff > gran)
|
||||
return 1;
|
||||
|
||||
@@ -11995,6 +12094,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
|
||||
@@ -11995,6 +12103,9 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
|
||||
delta = (s64)(sea->vruntime - seb->vruntime) +
|
||||
(s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
|
||||
|
||||
+ /* Take into account latency prio */
|
||||
+ /* Take into account latency offset */
|
||||
+ delta -= wakeup_latency_gran(sea, seb);
|
||||
+
|
||||
return delta > 0;
|
||||
}
|
||||
#else
|
||||
@@ -12265,6 +12367,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
|
||||
@@ -12265,6 +12376,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
|
||||
void init_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
|
||||
@ -843,56 +809,60 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
|
||||
#ifdef CONFIG_SMP
|
||||
raw_spin_lock_init(&cfs_rq->removed.lock);
|
||||
@@ -12320,6 +12423,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
@@ -12320,6 +12432,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
goto err;
|
||||
|
||||
tg->shares = NICE_0_LOAD;
|
||||
+ tg->latency_offset = 0;
|
||||
+ tg->latency_prio = DEFAULT_LATENCY_PRIO;
|
||||
|
||||
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||
|
||||
@@ -12418,6 +12522,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
@@ -12418,6 +12531,10 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
}
|
||||
|
||||
se->my_q = cfs_rq;
|
||||
+
|
||||
+ se->latency_offset = tg->latency_offset;
|
||||
+ se->latency_offset = calc_latency_offset(tg->latency_prio);
|
||||
+ RB_CLEAR_NODE(&se->latency_node);
|
||||
+
|
||||
/* guarantee group entities always have weight */
|
||||
update_load_set(&se->load, NICE_0_LOAD);
|
||||
se->parent = parent;
|
||||
@@ -12548,6 +12655,42 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
@@ -12548,6 +12665,45 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int sched_group_set_latency(struct task_group *tg, s64 latency)
|
||||
+int sched_group_set_latency(struct task_group *tg, int prio)
|
||||
+{
|
||||
+ long latency_offset;
|
||||
+ int i;
|
||||
+
|
||||
+ if (tg == &root_task_group)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (abs(latency) > sysctl_sched_latency)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ mutex_lock(&shares_mutex);
|
||||
+
|
||||
+ if (tg->latency_offset == latency) {
|
||||
+ if (tg->latency_prio == prio) {
|
||||
+ mutex_unlock(&shares_mutex);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ tg->latency_offset = latency;
|
||||
+ tg->latency_prio = prio;
|
||||
+ latency_offset = calc_latency_offset(prio);
|
||||
+
|
||||
+ for_each_possible_cpu(i) {
|
||||
+ struct sched_entity *se = tg->se[i];
|
||||
+ struct rq *rq = cpu_rq(i);
|
||||
+ struct rq_flags rf;
|
||||
+ bool queued;
|
||||
+
|
||||
+ rq_lock_irqsave(rq, &rf);
|
||||
+
|
||||
+ __dequeue_latency(se->cfs_rq, se);
|
||||
+ WRITE_ONCE(se->latency_offset, latency);
|
||||
+ queued = __dequeue_latency(se->cfs_rq, se);
|
||||
+ WRITE_ONCE(se->latency_offset, latency_offset);
|
||||
+ if (queued)
|
||||
+ __enqueue_latency(se->cfs_rq, se, ENQUEUE_WAKEUP);
|
||||
+
|
||||
+
|
||||
+ rq_unlock_irqrestore(rq, &rf);
|
||||
+ }
|
||||
@ -905,40 +875,28 @@ index b38a1ce1be49..5ef893ce5734 100644
|
||||
|
||||
void free_fair_sched_group(struct task_group *tg) { }
|
||||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
||||
index 9e8bb6278604..c47198dbf740 100644
|
||||
index 9e8bb6278604..a9fedf20c869 100644
|
||||
--- a/kernel/sched/sched.h
|
||||
+++ b/kernel/sched/sched.h
|
||||
@@ -125,6 +125,11 @@ extern int sched_rr_timeslice;
|
||||
*/
|
||||
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
|
||||
|
||||
+/* Maximum nice latency weight used to scale the latency_offset */
|
||||
+
|
||||
+#define NICE_LATENCY_SHIFT (SCHED_FIXEDPOINT_SHIFT)
|
||||
+#define NICE_LATENCY_WEIGHT_MAX (1L << NICE_LATENCY_SHIFT)
|
||||
+
|
||||
/*
|
||||
* Increase resolution of nice-level calculations for 64-bit architectures.
|
||||
* The extra resolution improves shares distribution and load balancing of
|
||||
@@ -378,6 +383,8 @@ struct task_group {
|
||||
@@ -378,6 +378,8 @@ struct task_group {
|
||||
|
||||
/* A positive value indicates that this is a SCHED_IDLE group. */
|
||||
int idle;
|
||||
+ /* latency constraint of the group. */
|
||||
+ int latency_offset;
|
||||
+ /* latency priority of the group. */
|
||||
+ int latency_prio;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
@@ -488,6 +495,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
|
||||
@@ -488,6 +490,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
|
||||
|
||||
extern int sched_group_set_idle(struct task_group *tg, long idle);
|
||||
|
||||
+extern int sched_group_set_latency(struct task_group *tg, s64 latency);
|
||||
+extern int sched_group_set_latency(struct task_group *tg, int prio);
|
||||
+
|
||||
#ifdef CONFIG_SMP
|
||||
extern void set_task_rq_fair(struct sched_entity *se,
|
||||
struct cfs_rq *prev, struct cfs_rq *next);
|
||||
@@ -566,6 +575,7 @@ struct cfs_rq {
|
||||
@@ -566,6 +570,7 @@ struct cfs_rq {
|
||||
#endif
|
||||
|
||||
struct rb_root_cached tasks_timeline;
|
||||
@ -946,15 +904,7 @@ index 9e8bb6278604..c47198dbf740 100644
|
||||
|
||||
/*
|
||||
* 'curr' points to currently running entity on this cfs_rq.
|
||||
@@ -2123,6 +2133,7 @@ static_assert(WF_TTWU == SD_BALANCE_WAKE);
|
||||
|
||||
extern const int sched_prio_to_weight[40];
|
||||
extern const u32 sched_prio_to_wmult[40];
|
||||
+extern const int sched_latency_to_weight[40];
|
||||
|
||||
/*
|
||||
* {de,en}queue flags:
|
||||
@@ -2461,9 +2472,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
|
||||
@@ -2461,9 +2466,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
|
||||
extern const_debug unsigned int sysctl_sched_nr_migrate;
|
||||
extern const_debug unsigned int sysctl_sched_migration_cost;
|
||||
|
||||
@ -965,7 +915,7 @@ index 9e8bb6278604..c47198dbf740 100644
|
||||
extern unsigned int sysctl_sched_idle_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern int sysctl_resched_latency_warn_ms;
|
||||
@@ -2478,6 +2489,38 @@ extern unsigned int sysctl_numa_balancing_scan_size;
|
||||
@@ -2478,6 +2483,49 @@ extern unsigned int sysctl_numa_balancing_scan_size;
|
||||
extern unsigned int sysctl_numa_balancing_hot_threshold;
|
||||
#endif
|
||||
|
||||
@ -988,6 +938,17 @@ index 9e8bb6278604..c47198dbf740 100644
|
||||
+ return thresh;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Calculate the latency offset for a priority level.
|
||||
+ * We use a linear mapping of the priority in the range:
|
||||
+ * [-sysctl_sched_latency:sysctl_sched_latency]
|
||||
+ */
|
||||
+static inline long calc_latency_offset(int prio)
|
||||
+{
|
||||
+ return (long)get_sleep_latency(false) * LATENCY_TO_NICE(prio) /
|
||||
+ (LATENCY_NICE_WIDTH/2);
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long get_latency_max(void)
|
||||
+{
|
||||
+ unsigned long thresh = get_sleep_latency(false);
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
echo "Pika Kernel - Getting source"
|
||||
|
||||
wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.2.tar.gz
|
||||
tar -zxf ./linux-6.2.tar.gz
|
||||
wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.2.1.tar.gz
|
||||
tar -zxf ./linux-6.2.1.tar.gz
|
||||
|
||||
cd linux-6.2
|
||||
cd linux-6.2.1
|
Loading…
Reference in New Issue
Block a user