This commit is contained in:
ferrreo 2023-08-22 18:16:01 +01:00
parent 60dc452fb5
commit ac1b61d8bd
6 changed files with 1054 additions and 959 deletions

10
config
View File

@ -1,15 +1,15 @@
# #
# Automatically generated file; DO NOT EDIT. # Automatically generated file; DO NOT EDIT.
# Linux/x86 6.5.0-rc1 Kernel Configuration # Linux/x86 6.5.0-rc7 Kernel Configuration
# #
CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.1.1 20230525" CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.2.1 20230730"
CONFIG_CC_IS_GCC=y CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=130101 CONFIG_GCC_VERSION=130201
CONFIG_CLANG_VERSION=0 CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=24000 CONFIG_AS_VERSION=24100
CONFIG_LD_IS_BFD=y CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=24000 CONFIG_LD_VERSION=24100
CONFIG_LLD_VERSION=0 CONFIG_LLD_VERSION=0
CONFIG_RUST_IS_AVAILABLE=y CONFIG_RUST_IS_AVAILABLE=y
CONFIG_CC_CAN_LINK=y CONFIG_CC_CAN_LINK=y

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
From 218c51e49185b75b4e36c8f11b5c77686f955a0a Mon Sep 17 00:00:00 2001 From 6d15f875cb0c7fd65fc422c0545d57fc2e124f7c Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev> From: Peter Jung <admin@ptr1337.dev>
Date: Sun, 30 Jul 2023 09:38:51 +0200 Date: Sun, 20 Aug 2023 15:56:13 +0200
Subject: [PATCH] EEVDF Subject: [PATCH] EEVDF-cachy
Signed-off-by: Peter Jung <admin@ptr1337.dev> Signed-off-by: Peter Jung <admin@ptr1337.dev>
--- ---
@ -13,14 +13,14 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
init/init_task.c | 3 +- init/init_task.c | 3 +-
kernel/sched/core.c | 65 +- kernel/sched/core.c | 65 +-
kernel/sched/debug.c | 49 +- kernel/sched/debug.c | 49 +-
kernel/sched/fair.c | 1138 +++++++++++------------ kernel/sched/fair.c | 1150 +++++++++++------------
kernel/sched/features.h | 23 +- kernel/sched/features.h | 24 +-
kernel/sched/sched.h | 21 +- kernel/sched/sched.h | 21 +-
tools/include/uapi/linux/sched.h | 4 +- tools/include/uapi/linux/sched.h | 4 +-
12 files changed, 702 insertions(+), 668 deletions(-) 12 files changed, 715 insertions(+), 668 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 4ef890191196..3a8d3e1e5591 100644 index 4ef8901911961..3a8d3e1e55910 100644
--- a/Documentation/admin-guide/cgroup-v2.rst --- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1121,6 +1121,16 @@ All time durations are in microseconds. @@ -1121,6 +1121,16 @@ All time durations are in microseconds.
@ -41,7 +41,7 @@ index 4ef890191196..3a8d3e1e5591 100644
Memory Memory
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 7ee7ed5de722..6dbc5a1bf6a8 100644 index 7ee7ed5de7227..6dbc5a1bf6a8c 100644
--- a/include/linux/rbtree_augmented.h --- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h
@@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node, @@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node,
@ -78,7 +78,7 @@ index 7ee7ed5de722..6dbc5a1bf6a8 100644
* Template for declaring augmented rbtree callbacks (generic case) * Template for declaring augmented rbtree callbacks (generic case)
* *
diff --git a/include/linux/sched.h b/include/linux/sched.h diff --git a/include/linux/sched.h b/include/linux/sched.h
index 609bde814cb0..c940c4dc8304 100644 index 609bde814cb06..c940c4dc83048 100644
--- a/include/linux/sched.h --- a/include/linux/sched.h
+++ b/include/linux/sched.h +++ b/include/linux/sched.h
@@ -549,13 +549,18 @@ struct sched_entity { @@ -549,13 +549,18 @@ struct sched_entity {
@ -110,7 +110,7 @@ index 609bde814cb0..c940c4dc8304 100644
struct sched_entity se; struct sched_entity se;
struct sched_rt_entity rt; struct sched_rt_entity rt;
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..b2e932c25be6 100644 index 3bac0a8ceab26..b2e932c25be62 100644
--- a/include/uapi/linux/sched.h --- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args { @@ -132,6 +132,7 @@ struct clone_args {
@ -131,7 +131,7 @@ index 3bac0a8ceab2..b2e932c25be6 100644
#endif /* _UAPI_LINUX_SCHED_H */ #endif /* _UAPI_LINUX_SCHED_H */
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index f2c4589d4dbf..db1e8199e8c8 100644 index f2c4589d4dbfe..db1e8199e8c80 100644
--- a/include/uapi/linux/sched/types.h --- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h
@@ -10,6 +10,7 @@ struct sched_param { @@ -10,6 +10,7 @@ struct sched_param {
@ -175,7 +175,7 @@ index f2c4589d4dbf..db1e8199e8c8 100644
#endif /* _UAPI_LINUX_SCHED_TYPES_H */ #endif /* _UAPI_LINUX_SCHED_TYPES_H */
diff --git a/init/init_task.c b/init/init_task.c diff --git a/init/init_task.c b/init/init_task.c
index ff6c4b9bfe6b..511cbcf3510d 100644 index ff6c4b9bfe6b1..511cbcf3510dc 100644
--- a/init/init_task.c --- a/init/init_task.c
+++ b/init/init_task.c +++ b/init/init_task.c
@@ -78,6 +78,7 @@ struct task_struct init_task @@ -78,6 +78,7 @@ struct task_struct init_task
@ -196,7 +196,7 @@ index ff6c4b9bfe6b..511cbcf3510d 100644
.rt = { .rt = {
.run_list = LIST_HEAD_INIT(init_task.rt.run_list), .run_list = LIST_HEAD_INIT(init_task.rt.run_list),
diff --git a/kernel/sched/core.c b/kernel/sched/core.c diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c52c2eba7c73..aff81e12460e 100644 index c52c2eba7c739..aff81e12460ed 100644
--- a/kernel/sched/core.c --- a/kernel/sched/core.c
+++ b/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_struct *p, bool update_load) @@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_struct *p, bool update_load)
@ -358,7 +358,7 @@ index c52c2eba7c73..aff81e12460e 100644
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
{ {
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 066ff1c8ae4e..e7e83181fbb6 100644 index 066ff1c8ae4eb..e7e83181fbb6c 100644
--- a/kernel/sched/debug.c --- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c +++ b/kernel/sched/debug.c
@@ -347,10 +347,7 @@ static __init int sched_init_debug(void) @@ -347,10 +347,7 @@ static __init int sched_init_debug(void)
@ -462,7 +462,7 @@ index 066ff1c8ae4e..e7e83181fbb6 100644
P(dl.runtime); P(dl.runtime);
P(dl.deadline); P(dl.deadline);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2c335df30171..461409c0eac7 100644 index 2c335df301718..e0a4c13dab04f 100644
--- a/kernel/sched/fair.c --- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -47,6 +47,7 @@ @@ -47,6 +47,7 @@
@ -868,7 +868,7 @@ index 2c335df30171..461409c0eac7 100644
} }
struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
@@ -678,14 +845,81 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) @@ -678,14 +845,88 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
return __node_2_se(left); return __node_2_se(left);
} }
@ -904,6 +904,13 @@ index 2c335df30171..461409c0eac7 100644
+ if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr))) + if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
+ curr = NULL; + curr = NULL;
+ +
+ /*
+ * Once selected, run a task until it either becomes non-eligible or
+ * until it gets a new slice. See the HACK in set_next_entity().
+ */
+ if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+ return curr;
+
+ while (node) { + while (node) {
+ struct sched_entity *se = __node_2_se(node); + struct sched_entity *se = __node_2_se(node);
+ +
@ -938,8 +945,7 @@ index 2c335df30171..461409c0eac7 100644
+ +
+ node = node->rb_right; + node = node->rb_right;
+ } + }
+
- return __node_2_se(next);
+ if (!best || (curr && deadline_gt(deadline, best, curr))) + if (!best || (curr && deadline_gt(deadline, best, curr)))
+ best = curr; + best = curr;
+ +
@ -950,12 +956,13 @@ index 2c335df30171..461409c0eac7 100644
+ return left; + return left;
+ } + }
+ } + }
+
- return __node_2_se(next);
+ return best; + return best;
} }
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
@@ -707,104 +941,53 @@ int sched_update_scaling(void) @@ -707,104 +948,53 @@ int sched_update_scaling(void)
{ {
unsigned int factor = get_update_sysctl_factor(); unsigned int factor = get_update_sysctl_factor();
@ -982,12 +989,12 @@ index 2c335df30171..461409c0eac7 100644
{ {
- if (unlikely(se->load.weight != NICE_0_LOAD)) - if (unlikely(se->load.weight != NICE_0_LOAD))
- delta = __calc_delta(delta, NICE_0_LOAD, &se->load); - delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
-
- return delta;
-}
+ u32 weight = sched_prio_to_weight[prio]; + u32 weight = sched_prio_to_weight[prio];
+ u64 base = sysctl_sched_base_slice; + u64 base = sysctl_sched_base_slice;
- return delta;
-}
-
-/* -/*
- * The idea is to set a period in which each task runs once. - * The idea is to set a period in which each task runs once.
- * - *
@ -1088,7 +1095,7 @@ index 2c335df30171..461409c0eac7 100644
} }
#include "pelt.h" #include "pelt.h"
@@ -939,6 +1122,7 @@ static void update_curr(struct cfs_rq *cfs_rq) @@ -939,6 +1129,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
schedstat_add(cfs_rq->exec_clock, delta_exec); schedstat_add(cfs_rq->exec_clock, delta_exec);
curr->vruntime += calc_delta_fair(delta_exec, curr); curr->vruntime += calc_delta_fair(delta_exec, curr);
@ -1096,7 +1103,7 @@ index 2c335df30171..461409c0eac7 100644
update_min_vruntime(cfs_rq); update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) { if (entity_is_task(curr)) {
@@ -3393,16 +3577,36 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } @@ -3393,16 +3584,36 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight) unsigned long weight)
{ {
@ -1133,7 +1140,7 @@ index 2c335df30171..461409c0eac7 100644
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
do { do {
u32 divider = get_pelt_divider(&se->avg); u32 divider = get_pelt_divider(&se->avg);
@@ -3412,9 +3616,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, @@ -3412,9 +3623,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
#endif #endif
enqueue_load_avg(cfs_rq, se); enqueue_load_avg(cfs_rq, se);
@ -1147,7 +1154,7 @@ index 2c335df30171..461409c0eac7 100644
} }
void reweight_task(struct task_struct *p, int prio) void reweight_task(struct task_struct *p, int prio)
@@ -4710,158 +4916,123 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} @@ -4710,158 +4923,123 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
@ -1170,15 +1177,15 @@ index 2c335df30171..461409c0eac7 100644
{ {
- struct cfs_rq *cfs_rq; - struct cfs_rq *cfs_rq;
- u64 sleep_time; - u64 sleep_time;
-
- if (se->exec_start == 0)
- return false;
-
- cfs_rq = cfs_rq_of(se);
+ u64 vslice = calc_delta_fair(se->slice, se); + u64 vslice = calc_delta_fair(se->slice, se);
+ u64 vruntime = avg_vruntime(cfs_rq); + u64 vruntime = avg_vruntime(cfs_rq);
+ s64 lag = 0; + s64 lag = 0;
- if (se->exec_start == 0)
- return false;
-
- cfs_rq = cfs_rq_of(se);
-
- sleep_time = rq_clock_task(rq_of(cfs_rq)); - sleep_time = rq_clock_task(rq_of(cfs_rq));
+ /* + /*
+ * Due to how V is constructed as the weighted average of entities, + * Due to how V is constructed as the weighted average of entities,
@ -1395,7 +1402,7 @@ index 2c335df30171..461409c0eac7 100644
/* /*
* When enqueuing a sched_entity, we must: * When enqueuing a sched_entity, we must:
* - Update loads to have both entity and cfs_rq synced with now. * - Update loads to have both entity and cfs_rq synced with now.
@@ -4873,18 +5044,28 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -4873,18 +5051,28 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/ */
update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH); update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
se_update_runnable(se); se_update_runnable(se);
@ -1427,7 +1434,7 @@ index 2c335df30171..461409c0eac7 100644
if (!curr) if (!curr)
__enqueue_entity(cfs_rq, se); __enqueue_entity(cfs_rq, se);
se->on_rq = 1; se->on_rq = 1;
@@ -4896,17 +5077,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -4896,17 +5084,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
} }
} }
@ -1445,7 +1452,7 @@ index 2c335df30171..461409c0eac7 100644
static void __clear_buddies_next(struct sched_entity *se) static void __clear_buddies_next(struct sched_entity *se)
{ {
for_each_sched_entity(se) { for_each_sched_entity(se) {
@@ -4918,27 +5088,10 @@ static void __clear_buddies_next(struct sched_entity *se) @@ -4918,27 +5095,10 @@ static void __clear_buddies_next(struct sched_entity *se)
} }
} }
@ -1473,7 +1480,7 @@ index 2c335df30171..461409c0eac7 100644
} }
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -4972,20 +5125,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -4972,20 +5132,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se); clear_buddies(cfs_rq, se);
@ -1495,7 +1502,7 @@ index 2c335df30171..461409c0eac7 100644
/* return excess runtime on last dequeue */ /* return excess runtime on last dequeue */
return_cfs_rq_runtime(cfs_rq); return_cfs_rq_runtime(cfs_rq);
@@ -5004,52 +5149,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -5004,52 +5156,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_idle_cfs_rq_clock_pelt(cfs_rq); update_idle_cfs_rq_clock_pelt(cfs_rq);
} }
@ -1548,7 +1555,19 @@ index 2c335df30171..461409c0eac7 100644
static void static void
set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
@@ -5088,9 +5187,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -5065,6 +5171,11 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
update_stats_wait_end_fair(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
update_load_avg(cfs_rq, se, UPDATE_TG);
+ /*
+ * HACK, stash a copy of deadline at the point of pick in vlag,
+ * which isn't used until dequeue.
+ */
+ se->vlag = se->deadline;
}
update_stats_curr_start(cfs_rq, se);
@@ -5088,9 +5199,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->prev_sum_exec_runtime = se->sum_exec_runtime; se->prev_sum_exec_runtime = se->sum_exec_runtime;
} }
@ -1558,30 +1577,33 @@ index 2c335df30171..461409c0eac7 100644
/* /*
* Pick the next process, keeping these things in mind, in this order: * Pick the next process, keeping these things in mind, in this order:
* 1) keep things fair between processes/task groups * 1) keep things fair between processes/task groups
@@ -5101,50 +5197,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); @@ -5101,50 +5209,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
static struct sched_entity * static struct sched_entity *
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{ {
- struct sched_entity *left = __pick_first_entity(cfs_rq); - struct sched_entity *left = __pick_first_entity(cfs_rq);
- struct sched_entity *se; - struct sched_entity *se;
- -
/* - /*
- * If curr is set we have to see if its left of the leftmost entity - * If curr is set we have to see if its left of the leftmost entity
- * still in the tree, provided there was anything in the tree at all. - * still in the tree, provided there was anything in the tree at all.
+ * Enabling NEXT_BUDDY will affect latency but not fairness. - */
*/
- if (!left || (curr && entity_before(curr, left))) - if (!left || (curr && entity_before(curr, left)))
- left = curr; - left = curr;
- -
- se = left; /* ideally we run the leftmost entity */ - se = left; /* ideally we run the leftmost entity */
- -
- /* /*
- * Avoid running the skip buddy, if running something else can - * Avoid running the skip buddy, if running something else can
- * be done without getting too unfair. - * be done without getting too unfair.
- */ + * Enabling NEXT_BUDDY will affect latency but not fairness.
*/
- if (cfs_rq->skip && cfs_rq->skip == se) { - if (cfs_rq->skip && cfs_rq->skip == se) {
- struct sched_entity *second; - struct sched_entity *second;
- + if (sched_feat(NEXT_BUDDY) &&
+ cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
+ return cfs_rq->next;
- if (se == curr) { - if (se == curr) {
- second = __pick_first_entity(cfs_rq); - second = __pick_first_entity(cfs_rq);
- } else { - } else {
@ -1589,10 +1611,7 @@ index 2c335df30171..461409c0eac7 100644
- if (!second || (curr && entity_before(curr, second))) - if (!second || (curr && entity_before(curr, second)))
- second = curr; - second = curr;
- } - }
+ if (sched_feat(NEXT_BUDDY) && -
+ cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
+ return cfs_rq->next;
- if (second && wakeup_preempt_entity(second, left) < 1) - if (second && wakeup_preempt_entity(second, left) < 1)
- se = second; - se = second;
- } - }
@ -1614,7 +1633,7 @@ index 2c335df30171..461409c0eac7 100644
} }
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -5161,8 +5221,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) @@ -5161,8 +5233,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
/* throttle cfs_rqs exceeding runtime */ /* throttle cfs_rqs exceeding runtime */
check_cfs_rq_runtime(cfs_rq); check_cfs_rq_runtime(cfs_rq);
@ -1623,7 +1642,7 @@ index 2c335df30171..461409c0eac7 100644
if (prev->on_rq) { if (prev->on_rq) {
update_stats_wait_start_fair(cfs_rq, prev); update_stats_wait_start_fair(cfs_rq, prev);
/* Put 'current' back into the tree. */ /* Put 'current' back into the tree. */
@@ -5203,9 +5261,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) @@ -5203,9 +5273,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
hrtimer_active(&rq_of(cfs_rq)->hrtick_timer)) hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
return; return;
#endif #endif
@ -1633,7 +1652,7 @@ index 2c335df30171..461409c0eac7 100644
} }
@@ -6228,13 +6283,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} @@ -6228,13 +6295,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
static void hrtick_start_fair(struct rq *rq, struct task_struct *p) static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
{ {
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
@ -1648,7 +1667,7 @@ index 2c335df30171..461409c0eac7 100644
s64 delta = slice - ran; s64 delta = slice - ran;
if (delta < 0) { if (delta < 0) {
@@ -6258,8 +6312,7 @@ static void hrtick_update(struct rq *rq) @@ -6258,8 +6324,7 @@ static void hrtick_update(struct rq *rq)
if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class) if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class)
return; return;
@ -1658,7 +1677,7 @@ index 2c335df30171..461409c0eac7 100644
} }
#else /* !CONFIG_SCHED_HRTICK */ #else /* !CONFIG_SCHED_HRTICK */
static inline void static inline void
@@ -6300,17 +6353,6 @@ static int sched_idle_rq(struct rq *rq) @@ -6300,17 +6365,6 @@ static int sched_idle_rq(struct rq *rq)
rq->nr_running); rq->nr_running);
} }
@ -1676,7 +1695,7 @@ index 2c335df30171..461409c0eac7 100644
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static int sched_idle_cpu(int cpu) static int sched_idle_cpu(int cpu)
{ {
@@ -7816,18 +7858,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) @@ -7816,18 +7870,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
{ {
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
@ -1695,7 +1714,7 @@ index 2c335df30171..461409c0eac7 100644
if (!task_on_rq_migrating(p)) { if (!task_on_rq_migrating(p)) {
remove_entity_load_avg(se); remove_entity_load_avg(se);
@@ -7865,66 +7895,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) @@ -7865,66 +7907,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
@ -1762,7 +1781,7 @@ index 2c335df30171..461409c0eac7 100644
static void set_next_buddy(struct sched_entity *se) static void set_next_buddy(struct sched_entity *se)
{ {
for_each_sched_entity(se) { for_each_sched_entity(se) {
@@ -7936,12 +7906,6 @@ static void set_next_buddy(struct sched_entity *se) @@ -7936,12 +7918,6 @@ static void set_next_buddy(struct sched_entity *se)
} }
} }
@ -1775,7 +1794,7 @@ index 2c335df30171..461409c0eac7 100644
/* /*
* Preempt the current task with a newly woken task if needed: * Preempt the current task with a newly woken task if needed:
*/ */
@@ -7950,7 +7914,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ @@ -7950,7 +7926,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
struct task_struct *curr = rq->curr; struct task_struct *curr = rq->curr;
struct sched_entity *se = &curr->se, *pse = &p->se; struct sched_entity *se = &curr->se, *pse = &p->se;
struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct cfs_rq *cfs_rq = task_cfs_rq(curr);
@ -1783,7 +1802,7 @@ index 2c335df30171..461409c0eac7 100644
int next_buddy_marked = 0; int next_buddy_marked = 0;
int cse_is_idle, pse_is_idle; int cse_is_idle, pse_is_idle;
@@ -7966,7 +7929,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ @@ -7966,7 +7941,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
return; return;
@ -1792,7 +1811,7 @@ index 2c335df30171..461409c0eac7 100644
set_next_buddy(pse); set_next_buddy(pse);
next_buddy_marked = 1; next_buddy_marked = 1;
} }
@@ -8011,35 +7974,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ @@ -8011,35 +7986,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (cse_is_idle != pse_is_idle) if (cse_is_idle != pse_is_idle)
return; return;
@ -1835,7 +1854,7 @@ index 2c335df30171..461409c0eac7 100644
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
@@ -8240,8 +8187,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) @@ -8240,8 +8199,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
/* /*
* sched_yield() is very simple * sched_yield() is very simple
@ -1844,7 +1863,7 @@ index 2c335df30171..461409c0eac7 100644
*/ */
static void yield_task_fair(struct rq *rq) static void yield_task_fair(struct rq *rq)
{ {
@@ -8257,21 +8202,19 @@ static void yield_task_fair(struct rq *rq) @@ -8257,21 +8214,19 @@ static void yield_task_fair(struct rq *rq)
clear_buddies(cfs_rq, se); clear_buddies(cfs_rq, se);
@ -1878,7 +1897,7 @@ index 2c335df30171..461409c0eac7 100644
} }
static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
@@ -8514,8 +8457,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) @@ -8514,8 +8469,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
* Buddy candidates are cache hot: * Buddy candidates are cache hot:
*/ */
if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running && if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
@ -1888,7 +1907,7 @@ index 2c335df30171..461409c0eac7 100644
return 1; return 1;
if (sysctl_sched_migration_cost == -1) if (sysctl_sched_migration_cost == -1)
@@ -12025,8 +11967,8 @@ static void rq_offline_fair(struct rq *rq) @@ -12025,8 +11979,8 @@ static void rq_offline_fair(struct rq *rq)
static inline bool static inline bool
__entity_slice_used(struct sched_entity *se, int min_nr_tasks) __entity_slice_used(struct sched_entity *se, int min_nr_tasks)
{ {
@ -1898,7 +1917,7 @@ index 2c335df30171..461409c0eac7 100644
return (rtime * min_nr_tasks > slice); return (rtime * min_nr_tasks > slice);
} }
@@ -12182,8 +12124,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) @@ -12182,8 +12136,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
*/ */
static void task_fork_fair(struct task_struct *p) static void task_fork_fair(struct task_struct *p)
{ {
@ -1908,7 +1927,7 @@ index 2c335df30171..461409c0eac7 100644
struct rq *rq = this_rq(); struct rq *rq = this_rq();
struct rq_flags rf; struct rq_flags rf;
@@ -12192,22 +12134,9 @@ static void task_fork_fair(struct task_struct *p) @@ -12192,22 +12146,9 @@ static void task_fork_fair(struct task_struct *p)
cfs_rq = task_cfs_rq(current); cfs_rq = task_cfs_rq(current);
curr = cfs_rq->curr; curr = cfs_rq->curr;
@ -1933,7 +1952,7 @@ index 2c335df30171..461409c0eac7 100644
rq_unlock(rq, &rf); rq_unlock(rq, &rf);
} }
@@ -12236,34 +12165,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) @@ -12236,34 +12177,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
} }
@ -1968,7 +1987,7 @@ index 2c335df30171..461409c0eac7 100644
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* /*
* Propagate the changes of the sched_entity across the tg tree to make it * Propagate the changes of the sched_entity across the tg tree to make it
@@ -12334,16 +12235,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se) @@ -12334,16 +12247,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
static void detach_task_cfs_rq(struct task_struct *p) static void detach_task_cfs_rq(struct task_struct *p)
{ {
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
@ -1985,7 +2004,7 @@ index 2c335df30171..461409c0eac7 100644
detach_entity_cfs_rq(se); detach_entity_cfs_rq(se);
} }
@@ -12351,12 +12242,8 @@ static void detach_task_cfs_rq(struct task_struct *p) @@ -12351,12 +12254,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
static void attach_task_cfs_rq(struct task_struct *p) static void attach_task_cfs_rq(struct task_struct *p)
{ {
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
@ -1998,7 +2017,7 @@ index 2c335df30171..461409c0eac7 100644
} }
static void switched_from_fair(struct rq *rq, struct task_struct *p) static void switched_from_fair(struct rq *rq, struct task_struct *p)
@@ -12467,6 +12354,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) @@ -12467,6 +12366,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
goto err; goto err;
tg->shares = NICE_0_LOAD; tg->shares = NICE_0_LOAD;
@ -2006,7 +2025,7 @@ index 2c335df30171..461409c0eac7 100644
init_cfs_bandwidth(tg_cfs_bandwidth(tg)); init_cfs_bandwidth(tg_cfs_bandwidth(tg));
@@ -12565,6 +12453,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, @@ -12565,6 +12465,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
} }
se->my_q = cfs_rq; se->my_q = cfs_rq;
@ -2016,7 +2035,7 @@ index 2c335df30171..461409c0eac7 100644
/* guarantee group entities always have weight */ /* guarantee group entities always have weight */
update_load_set(&se->load, NICE_0_LOAD); update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent; se->parent = parent;
@@ -12695,6 +12586,29 @@ int sched_group_set_idle(struct task_group *tg, long idle) @@ -12695,6 +12598,29 @@ int sched_group_set_idle(struct task_group *tg, long idle)
return 0; return 0;
} }
@ -2046,7 +2065,7 @@ index 2c335df30171..461409c0eac7 100644
#else /* CONFIG_FAIR_GROUP_SCHED */ #else /* CONFIG_FAIR_GROUP_SCHED */
void free_fair_sched_group(struct task_group *tg) { } void free_fair_sched_group(struct task_group *tg) { }
@@ -12721,7 +12635,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task @@ -12721,7 +12647,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
* idle runqueue: * idle runqueue:
*/ */
if (rq->cfs.load.weight) if (rq->cfs.load.weight)
@ -2056,10 +2075,10 @@ index 2c335df30171..461409c0eac7 100644
return rr_interval; return rr_interval;
} }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index ee7f23c76bd3..54334ca5c5c6 100644 index ee7f23c76bd33..546d212ef40d8 100644
--- a/kernel/sched/features.h --- a/kernel/sched/features.h
+++ b/kernel/sched/features.h +++ b/kernel/sched/features.h
@@ -1,16 +1,11 @@ @@ -1,16 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
-/* -/*
- * Only give sleepers 50% of their service deficit. This allows - * Only give sleepers 50% of their service deficit. This allows
@ -2077,10 +2096,11 @@ index ee7f23c76bd3..54334ca5c5c6 100644
-SCHED_FEAT(START_DEBIT, true) -SCHED_FEAT(START_DEBIT, true)
+SCHED_FEAT(PLACE_LAG, true) +SCHED_FEAT(PLACE_LAG, true)
+SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) +SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
+SCHED_FEAT(RUN_TO_PARITY, true)
/* /*
* Prefer to schedule the task we woke last (assuming it failed * Prefer to schedule the task we woke last (assuming it failed
@@ -19,13 +14,6 @@ SCHED_FEAT(START_DEBIT, true) @@ -19,13 +15,6 @@ SCHED_FEAT(START_DEBIT, true)
*/ */
SCHED_FEAT(NEXT_BUDDY, false) SCHED_FEAT(NEXT_BUDDY, false)
@ -2094,7 +2114,7 @@ index ee7f23c76bd3..54334ca5c5c6 100644
/* /*
* Consider buddies to be cache hot, decreases the likeliness of a * Consider buddies to be cache hot, decreases the likeliness of a
* cache buddy being migrated away, increases cache locality. * cache buddy being migrated away, increases cache locality.
@@ -98,6 +86,3 @@ SCHED_FEAT(UTIL_EST, true) @@ -98,6 +87,3 @@ SCHED_FEAT(UTIL_EST, true)
SCHED_FEAT(UTIL_EST_FASTUP, true) SCHED_FEAT(UTIL_EST_FASTUP, true)
SCHED_FEAT(LATENCY_WARN, false) SCHED_FEAT(LATENCY_WARN, false)
@ -2102,7 +2122,7 @@ index ee7f23c76bd3..54334ca5c5c6 100644
-SCHED_FEAT(ALT_PERIOD, true) -SCHED_FEAT(ALT_PERIOD, true)
-SCHED_FEAT(BASE_SLICE, true) -SCHED_FEAT(BASE_SLICE, true)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e93e006a942b..67cd7e1fd501 100644 index e93e006a942b9..67cd7e1fd5016 100644
--- a/kernel/sched/sched.h --- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -372,6 +372,8 @@ struct task_group { @@ -372,6 +372,8 @@ struct task_group {
@ -2182,7 +2202,7 @@ index e93e006a942b..67cd7e1fd501 100644
+ +
#endif /* _KERNEL_SCHED_SCHED_H */ #endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index 3bac0a8ceab2..b2e932c25be6 100644 index 3bac0a8ceab26..b2e932c25be62 100644
--- a/tools/include/uapi/linux/sched.h --- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args { @@ -132,6 +132,7 @@ struct clone_args {

View File

@ -1,51 +1,48 @@
From 377657f92d256b364813e3f8b2a58edfc9833815 Mon Sep 17 00:00:00 2001 From f353b9eb23586e55b99a6bfe7da9563be5fcca29 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev> From: Piotr Gorski <lucjan.lucjanov@gmail.com>
Date: Sun, 30 Jul 2023 09:43:51 +0200 Date: Sat, 12 Aug 2023 21:05:20 +0200
Subject: [PATCH] bore-eevdf Subject: [PATCH] bore-eevdf
Signed-off-by: Peter Jung <admin@ptr1337.dev> Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
--- ---
include/linux/sched.h | 30 ++++++ include/linux/sched.h | 29 ++++++
init/Kconfig | 20 ++++ init/Kconfig | 20 ++++
kernel/sched/core.c | 118 +++++++++++++++++++++ kernel/sched/core.c | 122 +++++++++++++++++++++++
kernel/sched/debug.c | 4 + kernel/sched/debug.c | 4 +
kernel/sched/fair.c | 228 ++++++++++++++++++++++++++++++++++++++-- kernel/sched/fair.c | 219 +++++++++++++++++++++++++++++++++++++++---
kernel/sched/features.h | 4 + kernel/sched/sched.h | 1 +
kernel/sched/sched.h | 1 + 6 files changed, 384 insertions(+), 11 deletions(-)
7 files changed, 397 insertions(+), 8 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h diff --git a/include/linux/sched.h b/include/linux/sched.h
index c940c4dc8304..8663c0813f81 100644 index c940c4dc8..984931de0 100644
--- a/include/linux/sched.h --- a/include/linux/sched.h
+++ b/include/linux/sched.h +++ b/include/linux/sched.h
@@ -545,6 +545,26 @@ struct sched_statistics { @@ -545,6 +545,24 @@ struct sched_statistics {
#endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SCHEDSTATS */
} ____cacheline_aligned; } ____cacheline_aligned;
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+union union16 { +typedef union {
+ u16 u16; + u16 u16;
+ s16 s16; + s16 s16;
+ u8 u8[2]; + u8 u8[2];
+ s8 s8[2]; + s8 s8[2];
+}; +} x16;
+typedef union union16 x16;
+ +
+union union32 { +typedef union {
+ u32 u32; + u32 u32;
+ s32 s32; + s32 s32;
+ u16 u16[2]; + u16 u16[2];
+ s16 s16[2]; + s16 s16[2];
+ u8 u8[4]; + u8 u8[4];
+ s8 s8[4]; + s8 s8[4];
+}; +} x32;
+typedef union union32 x32;
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
+ +
struct sched_entity { struct sched_entity {
/* For load-balancing: */ /* For load-balancing: */
struct load_weight load; struct load_weight load;
@@ -559,6 +579,12 @@ struct sched_entity { @@ -559,6 +577,12 @@ struct sched_entity {
u64 sum_exec_runtime; u64 sum_exec_runtime;
u64 prev_sum_exec_runtime; u64 prev_sum_exec_runtime;
u64 vruntime; u64 vruntime;
@ -58,19 +55,20 @@ index c940c4dc8304..8663c0813f81 100644
s64 vlag; s64 vlag;
u64 slice; u64 slice;
@@ -990,6 +1016,10 @@ struct task_struct { @@ -990,6 +1014,11 @@ struct task_struct {
struct list_head children; struct list_head children;
struct list_head sibling; struct list_head sibling;
struct task_struct *group_leader; struct task_struct *group_leader;
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+ u16 child_burst_cache; + u16 child_burst_cache;
+ u16 child_burst_count_cache;
+ u64 child_burst_last_cached; + u64 child_burst_last_cached;
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
/* /*
* 'ptraced' is the list of tasks this task is using ptrace() on. * 'ptraced' is the list of tasks this task is using ptrace() on.
diff --git a/init/Kconfig b/init/Kconfig diff --git a/init/Kconfig b/init/Kconfig
index 71755cc8ed3e..c697be79e594 100644 index 71755cc8e..c697be79e 100644
--- a/init/Kconfig --- a/init/Kconfig
+++ b/init/Kconfig +++ b/init/Kconfig
@@ -1277,6 +1277,26 @@ config CHECKPOINT_RESTORE @@ -1277,6 +1277,26 @@ config CHECKPOINT_RESTORE
@ -101,19 +99,21 @@ index 71755cc8ed3e..c697be79e594 100644
bool "Automatic process group scheduling" bool "Automatic process group scheduling"
select CGROUPS select CGROUPS
diff --git a/kernel/sched/core.c b/kernel/sched/core.c diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index aff81e12460e..839605620f63 100644 index aff81e124..a4eba9e47 100644
--- a/kernel/sched/core.c --- a/kernel/sched/core.c
+++ b/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -4491,6 +4491,113 @@ int wake_up_state(struct task_struct *p, unsigned int state) @@ -4491,6 +4491,117 @@ int wake_up_state(struct task_struct *p, unsigned int state)
return try_to_wake_up(p, state, 0); return try_to_wake_up(p, state, 0);
} }
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+extern unsigned int sched_burst_cache_lifetime; +extern unsigned int sched_burst_cache_lifetime;
+extern unsigned int sched_bore;
+extern unsigned int sched_burst_fork_atavistic; +extern unsigned int sched_burst_fork_atavistic;
+ +
+void __init sched_init_bore(void) { +void __init sched_init_bore(void) {
+ init_task.child_burst_cache = 0; + init_task.child_burst_cache = 0;
+ init_task.child_burst_count_cache = 0;
+ init_task.child_burst_last_cached = 0; + init_task.child_burst_last_cached = 0;
+ init_task.se.burst_time = 0; + init_task.se.burst_time = 0;
+ init_task.se.prev_burst_penalty = 0; + init_task.se.prev_burst_penalty = 0;
@ -123,6 +123,7 @@ index aff81e12460e..839605620f63 100644
+ +
+void inline sched_fork_bore(struct task_struct *p) { +void inline sched_fork_bore(struct task_struct *p) {
+ p->child_burst_cache = 0; + p->child_burst_cache = 0;
+ p->child_burst_count_cache = 0;
+ p->child_burst_last_cached = 0; + p->child_burst_last_cached = 0;
+ p->se.burst_time = 0; + p->se.burst_time = 0;
+ p->se.curr_burst_penalty = 0; + p->se.curr_burst_penalty = 0;
@ -144,6 +145,7 @@ index aff81e12460e..839605620f63 100644
+ u16 avg = 0; + u16 avg = 0;
+ if (cnt) avg = DIV_ROUND_CLOSEST(sum, cnt); + if (cnt) avg = DIV_ROUND_CLOSEST(sum, cnt);
+ p->child_burst_cache = max(avg, p->se.burst_penalty); + p->child_burst_cache = max(avg, p->se.burst_penalty);
+ p->child_burst_count_cache = cnt;
+ p->child_burst_last_cached = now; + p->child_burst_last_cached = now;
+} +}
+ +
@ -178,8 +180,8 @@ index aff81e12460e..839605620f63 100644
+ if (child_burst_cache_expired(dec, now)) + if (child_burst_cache_expired(dec, now))
+ update_child_burst_cache_atavistic(dec, now, depth - 1, &cnt, &sum); + update_child_burst_cache_atavistic(dec, now, depth - 1, &cnt, &sum);
+ else { + else {
+ cnt += dcnt; + cnt += dec->child_burst_count_cache;
+ sum += (dec->child_burst_cache) * dcnt; + sum += (u32)dec->child_burst_cache * dec->child_burst_count_cache;
+ } + }
+ } + }
+ } + }
@ -198,7 +200,7 @@ index aff81e12460e..839605620f63 100644
+ +
+ read_lock(&tasklist_lock); + read_lock(&tasklist_lock);
+ +
+ if (sched_burst_fork_atavistic) { + if (likely(sched_bore) && likely(sched_burst_fork_atavistic)) {
+ while ((anc->real_parent != anc) && (count_child_tasks(anc) == 1)) + while ((anc->real_parent != anc) && (count_child_tasks(anc) == 1))
+ anc = anc->real_parent; + anc = anc->real_parent;
+ if (child_burst_cache_expired(anc, now)) + if (child_burst_cache_expired(anc, now))
@ -218,7 +220,7 @@ index aff81e12460e..839605620f63 100644
/* /*
* Perform scheduler related setup for a newly forked process p. * Perform scheduler related setup for a newly forked process p.
* p is forked by current. * p is forked by current.
@@ -4507,6 +4614,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) @@ -4507,6 +4618,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0; p->se.nr_migrations = 0;
p->se.vruntime = 0; p->se.vruntime = 0;
@ -228,7 +230,7 @@ index aff81e12460e..839605620f63 100644
p->se.vlag = 0; p->se.vlag = 0;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
@@ -4828,6 +4938,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) @@ -4828,6 +4942,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
void sched_post_fork(struct task_struct *p) void sched_post_fork(struct task_struct *p)
{ {
@ -238,20 +240,20 @@ index aff81e12460e..839605620f63 100644
uclamp_post_fork(p); uclamp_post_fork(p);
} }
@@ -9954,6 +10067,11 @@ void __init sched_init(void) @@ -9954,6 +10071,11 @@ void __init sched_init(void)
BUG_ON(&dl_sched_class != &stop_sched_class + 1); BUG_ON(&dl_sched_class != &stop_sched_class + 1);
#endif #endif
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+ sched_init_bore(); + sched_init_bore();
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 3.0 Beta2 by Masahito Suzuki"); + printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 3.1.2 by Masahito Suzuki");
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
+ +
wait_bit_init(); wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index e7e83181fbb6..ff41a524c1ee 100644 index e7e83181f..ff41a524c 100644
--- a/kernel/sched/debug.c --- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c +++ b/kernel/sched/debug.c
@@ -348,6 +348,7 @@ static __init int sched_init_debug(void) @@ -348,6 +348,7 @@ static __init int sched_init_debug(void)
@ -273,7 +275,7 @@ index e7e83181fbb6..ff41a524c1ee 100644
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
#endif #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 461409c0eac7..90ce27fb0a3f 100644 index 461409c0e..1293fe037 100644
--- a/kernel/sched/fair.c --- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -19,6 +19,9 @@ @@ -19,6 +19,9 @@
@ -309,7 +311,7 @@ index 461409c0eac7..90ce27fb0a3f 100644
/* /*
* After fork, child runs first. If set to 0 (default) then * After fork, child runs first. If set to 0 (default) then
@@ -84,8 +87,93 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; @@ -84,8 +87,85 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
*/ */
unsigned int sysctl_sched_child_runs_first __read_mostly; unsigned int sysctl_sched_child_runs_first __read_mostly;
@ -330,8 +332,8 @@ index 461409c0eac7..90ce27fb0a3f 100644
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+unsigned int __read_mostly sched_bore = 1; +unsigned int __read_mostly sched_bore = 1;
+unsigned int __read_mostly sched_burst_cache_lifetime = 60000000; +unsigned int __read_mostly sched_burst_cache_lifetime = 60000000;
+unsigned int __read_mostly sched_burst_penalty_offset = 18; +unsigned int __read_mostly sched_burst_penalty_offset = 22;
+unsigned int __read_mostly sched_burst_penalty_scale = 1292; +unsigned int __read_mostly sched_burst_penalty_scale = 1366;
+unsigned int __read_mostly sched_burst_smoothness_up = 1; +unsigned int __read_mostly sched_burst_smoothness_up = 1;
+unsigned int __read_mostly sched_burst_smoothness_down = 0; +unsigned int __read_mostly sched_burst_smoothness_down = 0;
+unsigned int __read_mostly sched_burst_fork_atavistic = 2; +unsigned int __read_mostly sched_burst_fork_atavistic = 2;
@ -339,27 +341,22 @@ index 461409c0eac7..90ce27fb0a3f 100644
+static int sixty_four = 64; +static int sixty_four = 64;
+static int maxval_12_bits = 4095; +static int maxval_12_bits = 4095;
+ +
+#define MAX_BURST_PENALTY ((u32)(40UL << 8) - 1) +#define MAX_BURST_PENALTY ((40U << 8) - 1)
+ +
+static inline u32 log2plus1_u64_u32f8(u64 v) { +static inline u32 log2plus1_u64_u32f8(u64 v) {
+ x32 result; + x32 result;
+ int msb = fls64(v); + int msb = fls64(v);
+ result.u8[0] = v << (64 - msb) >> 55; + int excess_bits = msb - 9;
+ result.u8[0] = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits;
+ result.u8[1] = msb; + result.u8[1] = msb;
+ return result.u32; + return result.u32;
+} +}
+ +
+static inline u32 u8h_u32(u8 v) { +static inline u32 calc_burst_penalty(u64 burst_time) {
+ x32 result;
+ result.u8[1] = v;
+ return result.u32;
+}
+
+static inline u32 calc_burst_penalty(struct sched_entity *se) {
+ u32 greed, tolerance, penalty, scaled_penalty; + u32 greed, tolerance, penalty, scaled_penalty;
+ +
+ greed = log2plus1_u64_u32f8(se->burst_time); + greed = log2plus1_u64_u32f8(burst_time);
+ tolerance = u8h_u32(sched_burst_penalty_offset); + tolerance = sched_burst_penalty_offset << 8;
+ penalty = max(0, (s32)greed - (s32)tolerance); + penalty = max(0, (s32)greed - (s32)tolerance);
+ scaled_penalty = penalty * sched_burst_penalty_scale >> 10; + scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
+ +
@ -367,19 +364,21 @@ index 461409c0eac7..90ce27fb0a3f 100644
+} +}
+ +
+static void update_burst_penalty(struct sched_entity *se) { +static void update_burst_penalty(struct sched_entity *se) {
+ se->curr_burst_penalty = calc_burst_penalty(se); + se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); + se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
+} +}
+ +
+static inline u64 penalty_scale(u64 delta, struct sched_entity *se) { +static inline u64 penalty_scale(u64 delta, struct sched_entity *se, bool half) {
+ u8 score = ((x16*)&se->burst_penalty)->u8[1]; + u32 score = ((x16*)&se->burst_penalty)->u8[1];
+ if (half) score >>= 1;
+ return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22); + return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);
+} +}
+ +
+static inline u32 binary_smooth(u32 new, u32 old) { +static inline u32 binary_smooth(u32 new, u32 old) {
+ return (new >= old)? + int increment = new - old;
+ old + ((new - old) >> sched_burst_smoothness_up): + return (0 <= increment)?
+ old - ((old - new) >> sched_burst_smoothness_down); + old + ( increment >> sched_burst_smoothness_up):
+ old - (-increment >> sched_burst_smoothness_down);
+} +}
+ +
+static void restart_burst(struct sched_entity *se) { +static void restart_burst(struct sched_entity *se) {
@ -389,21 +388,16 @@ index 461409c0eac7..90ce27fb0a3f 100644
+ se->burst_time = 0; + se->burst_time = 0;
+} +}
+ +
+#define calc_delta_fair(delta, se) __calc_delta_fair(delta, se, true) +static inline void vruntime_backstep(s64 *vdiff, struct sched_entity *se) {
+#define calc_delta_fair_unscaled(delta, se) __calc_delta_fair(delta, se, false) + u64 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+static inline u64 + *vdiff += delta_exec - penalty_scale(delta_exec, se, false);
+__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale);
+
+static s64 wakeup_preempt_backstep_delta(u64 rtime, struct sched_entity *se) {
+ u64 delta = calc_delta_fair_unscaled(rtime, se);
+ return delta - penalty_scale(delta, se);
+} +}
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
+ +
int sched_thermal_decay_shift; int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str) static int __init setup_sched_thermal_decay_shift(char *str)
{ {
@@ -145,6 +233,69 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; @@ -145,6 +225,69 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = { static struct ctl_table sched_fair_sysctls[] = {
@ -473,7 +467,7 @@ index 461409c0eac7..90ce27fb0a3f 100644
{ {
.procname = "sched_child_runs_first", .procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first, .data = &sysctl_sched_child_runs_first,
@@ -238,6 +389,7 @@ static void update_sysctl(void) @@ -238,6 +381,7 @@ static void update_sysctl(void)
#define SET_SYSCTL(name) \ #define SET_SYSCTL(name) \
(sysctl_##name = (factor) * normalized_sysctl_##name) (sysctl_##name = (factor) * normalized_sysctl_##name)
SET_SYSCTL(sched_base_slice); SET_SYSCTL(sched_base_slice);
@ -481,13 +475,14 @@ index 461409c0eac7..90ce27fb0a3f 100644
#undef SET_SYSCTL #undef SET_SYSCTL
} }
@@ -308,11 +460,19 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight @@ -308,11 +452,20 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
/* /*
* delta /= w * delta /= w
*/ */
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+static inline u64 +#define calc_delta_fair_half(delta, se) __calc_delta_fair(delta, se, true)
+__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale) +#define calc_delta_fair(delta, se) __calc_delta_fair(delta, se, false)
+static inline u64 __calc_delta_fair(u64 delta, struct sched_entity *se, bool half)
+#else // CONFIG_SCHED_BORE +#else // CONFIG_SCHED_BORE
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se) static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
@ -496,24 +491,12 @@ index 461409c0eac7..90ce27fb0a3f 100644
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+ if (bscale && likely(sched_bore)) delta = penalty_scale(delta, se); + if (likely(sched_bore)) delta = penalty_scale(delta, se, half);
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
return delta; return delta;
} }
@@ -706,7 +866,11 @@ void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -944,6 +1097,7 @@ int sched_update_scaling(void)
SCHED_WARN_ON(!se->on_rq);
lag = avg_vruntime(cfs_rq) - se->vruntime;
+#ifdef CONFIG_SCHED_BORE
+ limit = calc_delta_fair_unscaled(max_t(u64, 2*se->slice, TICK_NSEC), se);
+#else // CONFIG_SCHED_BORE
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+#endif // CONFIG_SCHED_BORE
se->vlag = clamp(lag, -limit, limit);
}
@@ -944,6 +1108,7 @@ int sched_update_scaling(void)
#define WRT_SYSCTL(name) \ #define WRT_SYSCTL(name) \
(normalized_sysctl_##name = sysctl_##name / (factor)) (normalized_sysctl_##name = sysctl_##name / (factor))
WRT_SYSCTL(sched_base_slice); WRT_SYSCTL(sched_base_slice);
@ -521,18 +504,29 @@ index 461409c0eac7..90ce27fb0a3f 100644
#undef WRT_SYSCTL #undef WRT_SYSCTL
return 0; return 0;
@@ -1121,6 +1286,10 @@ static void update_curr(struct cfs_rq *cfs_rq) @@ -1121,7 +1275,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->sum_exec_runtime += delta_exec; curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec); schedstat_add(cfs_rq->exec_clock, delta_exec);
- curr->vruntime += calc_delta_fair(delta_exec, curr);
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+ curr->burst_time += delta_exec; + curr->burst_time += delta_exec;
+ update_burst_penalty(curr); + update_burst_penalty(curr);
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
curr->vruntime += calc_delta_fair(delta_exec, curr); + curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
update_deadline(cfs_rq, curr); update_deadline(cfs_rq, curr);
update_min_vruntime(cfs_rq); update_min_vruntime(cfs_rq);
@@ -5187,6 +5356,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -4919,7 +5077,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
- u64 vslice = calc_delta_fair(se->slice, se);
+ u64 vslice = calc_delta_fair_half(se->slice, se);
u64 vruntime = avg_vruntime(cfs_rq);
s64 lag = 0;
@@ -5187,6 +5345,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->prev_sum_exec_runtime = se->sum_exec_runtime; se->prev_sum_exec_runtime = se->sum_exec_runtime;
} }
@ -542,7 +536,7 @@ index 461409c0eac7..90ce27fb0a3f 100644
/* /*
* Pick the next process, keeping these things in mind, in this order: * Pick the next process, keeping these things in mind, in this order:
* 1) keep things fair between processes/task groups * 1) keep things fair between processes/task groups
@@ -5197,14 +5369,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -5197,14 +5358,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
static struct sched_entity * static struct sched_entity *
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{ {
@ -561,18 +555,14 @@ index 461409c0eac7..90ce27fb0a3f 100644
} }
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -6452,6 +6626,38 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) @@ -6452,6 +6615,30 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
hrtick_update(rq); hrtick_update(rq);
} }
+static unsigned long wakeup_gran(struct sched_entity *se) +static unsigned long wakeup_gran(struct sched_entity *se)
+{ +{
+ unsigned long gran = sysctl_sched_wakeup_granularity; + unsigned long gran = sysctl_sched_wakeup_granularity;
+#ifdef CONFIG_SCHED_BORE
+ return calc_delta_fair_unscaled(gran, se);
+#else // CONFIG_SCHED_BORE
+ return calc_delta_fair(gran, se); + return calc_delta_fair(gran, se);
+#endif // CONFIG_SCHED_BORE
+} +}
+ +
+static int +static int
@ -580,11 +570,7 @@ index 461409c0eac7..90ce27fb0a3f 100644
+{ +{
+ s64 gran, vdiff = curr->vruntime - se->vruntime; + s64 gran, vdiff = curr->vruntime - se->vruntime;
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+ if (likely(sched_bore)) { + if (likely(sched_bore)) vruntime_backstep(&vdiff, curr);
+ u64 rtime = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+ vdiff += wakeup_preempt_backstep_delta(rtime, curr)
+ - wakeup_preempt_backstep_delta(rtime, se);
+ }
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
+ +
+ if (vdiff <= 0) + if (vdiff <= 0)
@ -600,7 +586,7 @@ index 461409c0eac7..90ce27fb0a3f 100644
static void set_next_buddy(struct sched_entity *se); static void set_next_buddy(struct sched_entity *se);
/* /*
@@ -6470,6 +6676,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) @@ -6470,6 +6657,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
util_est_dequeue(&rq->cfs, p); util_est_dequeue(&rq->cfs, p);
for_each_sched_entity(se) { for_each_sched_entity(se) {
@ -610,7 +596,7 @@ index 461409c0eac7..90ce27fb0a3f 100644
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags); dequeue_entity(cfs_rq, se, flags);
@@ -7980,7 +8189,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ @@ -7980,7 +8170,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
/* /*
* XXX pick_eevdf(cfs_rq) != se ? * XXX pick_eevdf(cfs_rq) != se ?
*/ */
@ -619,34 +605,32 @@ index 461409c0eac7..90ce27fb0a3f 100644
goto preempt; goto preempt;
return; return;
@@ -8193,6 +8402,9 @@ static void yield_task_fair(struct rq *rq) @@ -8197,8 +8387,12 @@ static void yield_task_fair(struct rq *rq)
struct task_struct *curr = rq->curr; /*
struct cfs_rq *cfs_rq = task_cfs_rq(curr); * Are we the only task in the tree?
struct sched_entity *se = &curr->se; */
- if (unlikely(rq->nr_running == 1))
+ if (unlikely(rq->nr_running == 1)) {
+#ifdef CONFIG_SCHED_BORE
+ restart_burst(se);
+#endif // CONFIG_SCHED_BORE
return;
+ }
clear_buddies(cfs_rq, se);
@@ -8207,6 +8401,9 @@ static void yield_task_fair(struct rq *rq)
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
+#ifdef CONFIG_SCHED_BORE +#ifdef CONFIG_SCHED_BORE
+ restart_burst(se); + restart_burst(se);
+#endif // CONFIG_SCHED_BORE +#endif // CONFIG_SCHED_BORE
/* /*
* Are we the only task in the tree? * Tell update_rq_clock() that we've just updated,
diff --git a/kernel/sched/features.h b/kernel/sched/features.h * so we don't do microscopic update in schedule()
index 54334ca5c5c6..416ec4bcdb0f 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -12,7 +12,11 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
* wakeup-preemption), since its likely going to consume data we
* touched, increases cache locality.
*/
+#ifdef CONFIG_SCHED_BORE
+SCHED_FEAT(NEXT_BUDDY, true)
+#else // CONFIG_SCHED_BORE
SCHED_FEAT(NEXT_BUDDY, false)
+#endif // CONFIG_SCHED_BORE
/*
* Consider buddies to be cache hot, decreases the likeliness of a
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 67cd7e1fd501..04d065015d6c 100644 index 67cd7e1fd..04d065015 100644
--- a/kernel/sched/sched.h --- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -2506,6 +2506,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate; @@ -2506,6 +2506,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate;
@ -658,4 +642,4 @@ index 67cd7e1fd501..04d065015d6c 100644
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern int sysctl_resched_latency_warn_ms; extern int sysctl_resched_latency_warn_ms;
-- --
2.41.0 2.42.0.rc0.25.ga82fb66fed

View File

@ -1,573 +0,0 @@
From ab6268d199fa749e274a48b00c443538ae492b16 Mon Sep 17 00:00:00 2001
From: Piotr Gorski <lucjan.lucjanov@gmail.com>
Date: Wed, 9 Aug 2023 14:07:31 +0200
Subject: [PATCH] amd-6.5: merge changes from dev tree
Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
---
.../admin-guide/kernel-parameters.txt | 5 +
Documentation/admin-guide/pm/amd-pstate.rst | 55 +++++
drivers/acpi/cppc_acpi.c | 13 ++
drivers/acpi/processor_driver.c | 6 +
drivers/cpufreq/amd-pstate.c | 191 ++++++++++++++++--
drivers/cpufreq/cpufreq.c | 13 ++
include/acpi/cppc_acpi.h | 5 +
include/linux/amd-pstate.h | 1 +
include/linux/cpufreq.h | 4 +
9 files changed, 272 insertions(+), 21 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995f..1f53c395a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -363,6 +363,11 @@
selects a performance level in this range and appropriate
to the current workload.
+ amd_prefcore=
+ [X86]
+ enable
+ Enable AMD Pstate Preferred Core.
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 1cf40f692..4a30cf235 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -353,6 +353,49 @@ is activated. In this mode, driver requests minimum and maximum performance
level and the platform autonomously selects a performance level in this range
and appropriate to the current workload.
+AMD Pstate Preferred Core
+=================================
+
+The core frequency is subjected to the process variation in semiconductors.
+Not all cores are able to reach the maximum frequency respecting the
+infrastructure limits. Consequently, AMD has redefined the concept of
+maximum frequency of a part. This means that a fraction of cores can reach
+maximum frequency. To find the best process scheduling policy for a given
+scenario, OS needs to know the core ordering informed by the platform through
+highest performance capability register of the CPPC interface.
+
+``AMD Pstate Preferred Core`` use ITMT arch provides functions and data structures
+for enabling the scheduler to favor scheduling on cores can be get a higher frequency
+with lower voltage under preferred core. And it has the ability to dynamically
+change the preferred core based on the workload and platform conditions and
+accounting for thermals and aging.
+
+The priority metric will be initialized by the AMD Pstate driver. The AMD Pstate
+driver will also determine whether or not ``AMD Pstate Preferred Core`` is
+supported by the platform.
+
+AMD Pstate driver will provide an initial core ordering when the system boots.
+The platform uses the CPPC interfaces to communicate the core ranking to the
+operating system and scheduler to make sure that OS is choosing the cores
+with highest performance firstly for scheduling the process. When AMD Pstate
+driver receives a message with the highest performance change, it will
+update the core ranking and set the cpu's priority.
+
+AMD Preferred Core Switch
+=================================
+Kernel Parameters
+-----------------
+
+``AMD Pstate Preferred Core`` has two states: enable and disable.
+Enable/disable states can be chosen by different kernel parameters.
+Default disable ``AMD Pstate Preferred Core``.
+
+``amd_prefcore=enable``
+
+If ``amd_prefcore=enable`` is passed to kernel command line option
+then enable ``AMD Pstate Preferred Core`` if the processor and power
+firmware can support preferred core feature.
+
User Space Interface in ``sysfs`` - General
===========================================
@@ -385,6 +428,18 @@ control its functionality at the system level. They are located in the
to the operation mode represented by that string - or to be
unregistered in the "disable" case.
+``prefcore_state``
+ Preferred Core state of the driver: "enabled" or "disabled".
+
+ "enabled"
+ Enable the AMD Preferred Core.
+
+ "disabled"
+ Disable the AMD Preferred Core
+
+
+ This attribute is read-only to check the state of Preferred Core.
+
``cpupower`` tool support for ``amd-pstate``
===============================================
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 7ff269a78..ad388a0e8 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
}
+/**
+ * cppc_get_highest_perf - Get the highest performance register value.
+ * @cpunum: CPU from which to get highest performance.
+ * @highest_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+{
+ return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
+}
+EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
+
/**
* cppc_get_epp_perf - Get the epp register value.
* @cpunum: CPU from which to get epp preference value.
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 4bd16b3f0..29b2fb68a 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -27,6 +27,7 @@
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
+#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85
MODULE_AUTHOR("Paul Diefenbaugh");
MODULE_DESCRIPTION("ACPI Processor Driver");
@@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
acpi_bus_generate_netlink_event(device->pnp.device_class,
dev_name(&device->dev), event, 0);
break;
+ case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
+ cpufreq_update_highest_perf(pr->id);
+ acpi_bus_generate_netlink_event(device->pnp.device_class,
+ dev_name(&device->dev), event, 0);
+ break;
default:
acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
break;
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 81fba0dcb..ba10aa971 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/static_call.h>
#include <linux/amd-pstate.h>
+#include <linux/topology.h>
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
@@ -49,6 +50,8 @@
#define AMD_PSTATE_TRANSITION_LATENCY 20000
#define AMD_PSTATE_TRANSITION_DELAY 1000
+#define AMD_PSTATE_PREFCORE_THRESHOLD 166
+#define AMD_PSTATE_MAX_CPPC_PERF 255
/*
* TODO: We need more time to fine tune processors with shared memory solution
@@ -65,6 +68,14 @@ static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
static bool cppc_enabled;
+/*
+ * CPPC Preferred Core feature is supported by power firmware
+ */
+static bool prefcore_enabled = false;
+
+/* Disable AMD Pstate Preferred Core loading */
+static bool no_prefcore __read_mostly = true;
+
/*
* AMD Energy Preference Performance (EPP)
* The EPP is used in the CCLK DPM controller to drive
@@ -290,27 +301,26 @@ static inline int amd_pstate_enable(bool enable)
static int pstate_init_perf(struct amd_cpudata *cpudata)
{
u64 cap1;
- u32 highest_perf;
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
&cap1);
if (ret)
return ret;
- /*
- * TODO: Introduce AMD specific power feature.
- *
- * CPPC entry doesn't indicate the highest performance in some ASICs.
+ /* For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
+ * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
+ * the default max perf.
*/
- highest_perf = amd_get_highest_perf();
- if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
-
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
+ if (!prefcore_enabled)
+ WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ else
+ WRITE_ONCE(cpudata->highest_perf, AMD_PSTATE_PREFCORE_THRESHOLD);
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+ WRITE_ONCE(cpudata->prefcore_highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
return 0;
}
@@ -318,22 +328,21 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
static int cppc_init_perf(struct amd_cpudata *cpudata)
{
struct cppc_perf_caps cppc_perf;
- u32 highest_perf;
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
if (ret)
return ret;
- highest_perf = amd_get_highest_perf();
- if (highest_perf > cppc_perf.highest_perf)
- highest_perf = cppc_perf.highest_perf;
-
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
+ if (!prefcore_enabled)
+ WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf);
+ else
+ WRITE_ONCE(cpudata->highest_perf, AMD_PSTATE_PREFCORE_THRESHOLD);
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
cppc_perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+ WRITE_ONCE(cpudata->prefcore_highest_perf, cppc_perf.highest_perf);
if (cppc_state == AMD_PSTATE_ACTIVE)
return 0;
@@ -676,6 +685,118 @@ static void amd_perf_ctl_reset(unsigned int cpu)
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
+/*
+ * Set AMD Pstate Preferred Core enable can't be done directly from cpufreq callbacks
+ * due to locking, so queue the work for later.
+ */
+static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
+
+/**
+ * Get the highest performance register value.
+ * @cpu: CPU from which to get highest performance.
+ * @highest_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+static int amd_pstate_get_highest_perf(int cpu, u64 *highest_perf)
+{
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ u64 cap1;
+
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
+ if (ret)
+ return ret;
+ WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ } else {
+ ret = cppc_get_highest_perf(cpu, highest_perf);
+ }
+
+ return (ret);
+}
+
+static void amd_pstate_init_prefcore(void)
+{
+ int cpu, ret;
+ u64 highest_perf;
+
+ if (no_prefcore)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
+ if (ret)
+ break;
+
+ sched_set_itmt_core_prio(highest_perf, cpu);
+ }
+
+ /*
+ * This code can be run during CPU online under the
+ * CPU hotplug locks, so sched_set_amd_prefcore_support()
+ * cannot be called from here. Queue up a work item
+ * to invoke it.
+ */
+ schedule_work(&sched_prefcore_work);
+}
+
+static void amd_pstate_update_highest_perf(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+ struct amd_cpudata *cpudata;
+ u32 prev_high = 0, cur_high = 0;
+ u64 highest_perf;
+ int ret;
+
+ if (!prefcore_enabled)
+ return;
+
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
+ if (ret)
+ return;
+
+ policy = cpufreq_cpu_get(cpu);
+ cpudata = policy->driver_data;
+ cur_high = highest_perf;
+ prev_high = READ_ONCE(cpudata->prefcore_highest_perf);
+
+ if (prev_high != cur_high) {
+ WRITE_ONCE(cpudata->prefcore_highest_perf, cur_high);
+ sched_set_itmt_core_prio(cur_high, cpu);
+ }
+
+ cpufreq_cpu_put(policy);
+}
+
+/*
+ * Check if AMD Pstate Preferred core feature is supported and enabled
+ * 1) no_prefcore is used to enable or disable AMD Pstate Preferred Core
+ * loading when user would like to enable or disable it. Without that,
+ * AMD Pstate Preferred Core will be disabled by default if the processor
+ * and power firmware can support preferred core feature.
+ * 2) prefcore_enabled is used to indicate whether CPPC preferred core is enabled.
+ */
+static void check_prefcore_supported(int cpu)
+{
+ u64 highest_perf;
+ int ret;
+
+ if (no_prefcore)
+ return;
+
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
+ if (ret)
+ return;
+
+ if(highest_perf < AMD_PSTATE_MAX_CPPC_PERF)
+ prefcore_enabled = true;
+}
+
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -697,6 +818,9 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
cpudata->cpu = policy->cpu;
+ /* check if CPPC preferred core feature is enabled*/
+ check_prefcore_supported(policy->cpu);
+
ret = amd_pstate_init_perf(cpudata);
if (ret)
goto free_cpudata1;
@@ -1012,8 +1136,8 @@ static int amd_pstate_update_status(const char *buf, size_t size)
return 0;
}
-static ssize_t show_status(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
ssize_t ret;
@@ -1024,7 +1148,7 @@ static ssize_t show_status(struct kobject *kobj,
return ret;
}
-static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+static ssize_t status_store(struct device *a, struct device_attribute *b,
const char *buf, size_t count)
{
char *p = memchr(buf, '\n', count);
@@ -1037,13 +1161,20 @@ static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
return ret < 0 ? ret : count;
}
+static ssize_t prefcore_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", prefcore_enabled ? "enabled" : "disabled");
+}
+
cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
cpufreq_freq_attr_rw(energy_performance_preference);
cpufreq_freq_attr_ro(energy_performance_available_preferences);
-define_one_global_rw(status);
+static DEVICE_ATTR_RW(status);
+static DEVICE_ATTR_RO(prefcore_state);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
@@ -1062,7 +1193,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
};
static struct attribute *pstate_global_attributes[] = {
- &status.attr,
+ &dev_attr_status.attr,
+ &dev_attr_prefcore_state.attr,
NULL
};
@@ -1114,6 +1246,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
cpudata->cpu = policy->cpu;
cpudata->epp_policy = 0;
+ /* check if CPPC preferred core feature is supported*/
+ check_prefcore_supported(policy->cpu);
+
ret = amd_pstate_init_perf(cpudata);
if (ret)
goto free_cpudata1;
@@ -1392,6 +1527,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.suspend = amd_pstate_cpu_suspend,
.resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
+ .update_highest_perf = amd_pstate_update_highest_perf,
.name = "amd-pstate",
.attr = amd_pstate_attr,
};
@@ -1406,6 +1542,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
+ .update_highest_perf = amd_pstate_update_highest_perf,
.name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
@@ -1506,6 +1643,8 @@ static int __init amd_pstate_init(void)
}
}
+ amd_pstate_init_prefcore();
+
return ret;
global_attr_free:
@@ -1527,7 +1666,17 @@ static int __init amd_pstate_param(char *str)
return amd_pstate_set_driver(mode_idx);
}
+
+static int __init amd_prefcore_param(char *str)
+{
+ if (!strcmp(str, "enable"))
+ no_prefcore = false;
+
+ return 0;
+}
+
early_param("amd_pstate", amd_pstate_param);
+early_param("amd_prefcore", amd_prefcore_param);
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 50bbc969f..842357abf 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2675,6 +2675,19 @@ void cpufreq_update_limits(unsigned int cpu)
}
EXPORT_SYMBOL_GPL(cpufreq_update_limits);
+/**
+ * cpufreq_update_highest_perf - Update highest performance for a given CPU.
+ * @cpu: CPU to update the highest performance for.
+ *
+ * Invoke the driver's ->update_highest_perf callback if present
+ */
+void cpufreq_update_highest_perf(unsigned int cpu)
+{
+ if (cpufreq_driver->update_highest_perf)
+ cpufreq_driver->update_highest_perf(cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_update_highest_perf);
+
/*********************************************************************
* BOOST *
*********************************************************************/
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 6126c977e..c0b69ffe7 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -139,6 +139,7 @@ struct cppc_cpudata {
#ifdef CONFIG_ACPI_CPPC_LIB
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
+extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_set_enable(int cpu, bool enable);
@@ -165,6 +166,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
{
return -ENOTSUPP;
}
+static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+{
+ return -ENOTSUPP;
+}
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
return -ENOTSUPP;
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 446394f84..fa86bc953 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -70,6 +70,7 @@ struct amd_cpudata {
u32 nominal_perf;
u32 lowest_nonlinear_perf;
u32 lowest_perf;
+ u32 prefcore_highest_perf;
u32 max_freq;
u32 min_freq;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 172ff51c1..766c83a4f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -231,6 +231,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
void refresh_frequency_limits(struct cpufreq_policy *policy);
void cpufreq_update_policy(unsigned int cpu);
void cpufreq_update_limits(unsigned int cpu);
+void cpufreq_update_highest_perf(unsigned int cpu);
bool have_governor_per_policy(void);
bool cpufreq_supports_freq_invariance(void);
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
@@ -376,6 +377,9 @@ struct cpufreq_driver {
/* Called to update policy limits on firmware notifications. */
void (*update_limits)(unsigned int cpu);
+ /* Called to update highest performance on firmware notifications. */
+ void (*update_highest_perf)(unsigned int cpu);
+
/* optional */
int (*bios_limit)(int cpu, unsigned int *limit);
--
2.42.0.rc0.25.ga82fb66fed

View File

@ -16,5 +16,3 @@ patch -Np1 < "../patches/0002-eevdfbore.patch"
patch -Np1 < "../patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch" patch -Np1 < "../patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch"
# Allow pre polaris cards to use the amdgpu kernel module # Allow pre polaris cards to use the amdgpu kernel module
patch -Np1 < "../patches/0005-amdgpu-si-cik-default.patch" patch -Np1 < "../patches/0005-amdgpu-si-cik-default.patch"
# AMD Patch for CPPC
patch -Np1 < "../patches/0006-AMD-cppc.patch"