6.5RC5
This commit is contained in:
parent
9ec4cbb0e4
commit
60dc452fb5
11
config
11
config
@ -594,7 +594,9 @@ CONFIG_CALL_DEPTH_TRACKING=y
|
||||
# CONFIG_CALL_THUNKS_DEBUG is not set
|
||||
CONFIG_CPU_IBPB_ENTRY=y
|
||||
CONFIG_CPU_IBRS_ENTRY=y
|
||||
CONFIG_CPU_SRSO=y
|
||||
CONFIG_SLS=y
|
||||
# CONFIG_GDS_FORCE_MITIGATION is not set
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y
|
||||
|
||||
@ -1333,11 +1335,10 @@ CONFIG_TCP_CONG_YEAH=m
|
||||
CONFIG_TCP_CONG_ILLINOIS=m
|
||||
CONFIG_TCP_CONG_DCTCP=m
|
||||
CONFIG_TCP_CONG_CDG=m
|
||||
CONFIG_TCP_CONG_BBR=m
|
||||
CONFIG_TCP_CONG_BBR2=y
|
||||
CONFIG_DEFAULT_BBR2=y
|
||||
CONFIG_TCP_CONG_BBR=y
|
||||
CONFIG_DEFAULT_BBR=y
|
||||
# CONFIG_DEFAULT_RENO is not set
|
||||
CONFIG_DEFAULT_TCP_CONG="bbr2"
|
||||
CONFIG_DEFAULT_TCP_CONG="bbr"
|
||||
CONFIG_TCP_MD5SIG=y
|
||||
CONFIG_IPV6=y
|
||||
CONFIG_IPV6_ROUTER_PREF=y
|
||||
@ -2613,7 +2614,7 @@ CONFIG_ZRAM_DEF_COMP_ZSTD=y
|
||||
# CONFIG_ZRAM_DEF_COMP_842 is not set
|
||||
CONFIG_ZRAM_DEF_COMP="zstd"
|
||||
CONFIG_ZRAM_WRITEBACK=y
|
||||
# CONFIG_ZRAM_MEMORY_TRACKING is not set
|
||||
CONFIG_ZRAM_MEMORY_TRACKING=y
|
||||
CONFIG_ZRAM_MULTI_COMP=y
|
||||
CONFIG_BLK_DEV_LOOP=m
|
||||
CONFIG_BLK_DEV_LOOP_MIN_COUNT=0
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
From 0af97bb369de3bfe15d724e9bb0e3c971c6f9f20 Mon Sep 17 00:00:00 2001
|
||||
From 218c51e49185b75b4e36c8f11b5c77686f955a0a Mon Sep 17 00:00:00 2001
|
||||
From: Peter Jung <admin@ptr1337.dev>
|
||||
Date: Mon, 10 Jul 2023 17:12:45 +0200
|
||||
Subject: [PATCH] EEVDF-cachy
|
||||
Date: Sun, 30 Jul 2023 09:38:51 +0200
|
||||
Subject: [PATCH] EEVDF
|
||||
|
||||
Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||||
---
|
||||
@ -13,11 +13,11 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||||
init/init_task.c | 3 +-
|
||||
kernel/sched/core.c | 65 +-
|
||||
kernel/sched/debug.c | 49 +-
|
||||
kernel/sched/fair.c | 1157 +++++++++++------------
|
||||
kernel/sched/features.h | 24 +-
|
||||
kernel/sched/sched.h | 22 +-
|
||||
kernel/sched/fair.c | 1138 +++++++++++------------
|
||||
kernel/sched/features.h | 23 +-
|
||||
kernel/sched/sched.h | 21 +-
|
||||
tools/include/uapi/linux/sched.h | 4 +-
|
||||
12 files changed, 733 insertions(+), 658 deletions(-)
|
||||
12 files changed, 702 insertions(+), 668 deletions(-)
|
||||
|
||||
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
|
||||
index 4ef890191196..3a8d3e1e5591 100644
|
||||
@ -78,7 +78,7 @@ index 7ee7ed5de722..6dbc5a1bf6a8 100644
|
||||
* Template for declaring augmented rbtree callbacks (generic case)
|
||||
*
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index efc9f4bdc4ca..e99a9aa6a972 100644
|
||||
index 609bde814cb0..c940c4dc8304 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -549,13 +549,18 @@ struct sched_entity {
|
||||
@ -196,7 +196,7 @@ index ff6c4b9bfe6b..511cbcf3510d 100644
|
||||
.rt = {
|
||||
.run_list = LIST_HEAD_INIT(init_task.rt.run_list),
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 83e36547af17..8a541fe2d462 100644
|
||||
index c52c2eba7c73..aff81e12460e 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_struct *p, bool update_load)
|
||||
@ -232,7 +232,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
|
||||
/*
|
||||
* We don't need the reset flag anymore after the fork. It has
|
||||
@@ -7529,7 +7539,7 @@ static struct task_struct *find_process_by_pid(pid_t pid)
|
||||
@@ -7516,7 +7526,7 @@ static struct task_struct *find_process_by_pid(pid_t pid)
|
||||
#define SETPARAM_POLICY -1
|
||||
|
||||
static void __setscheduler_params(struct task_struct *p,
|
||||
@ -241,7 +241,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
{
|
||||
int policy = attr->sched_policy;
|
||||
|
||||
@@ -7553,6 +7563,13 @@ static void __setscheduler_params(struct task_struct *p,
|
||||
@@ -7540,6 +7550,13 @@ static void __setscheduler_params(struct task_struct *p,
|
||||
set_load_weight(p, true);
|
||||
}
|
||||
|
||||
@ -255,7 +255,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
/*
|
||||
* Check the target process has a UID that matches the current process's:
|
||||
*/
|
||||
@@ -7687,6 +7704,13 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
@@ -7674,6 +7691,13 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -269,7 +269,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
/* Update task specific "requested" clamps */
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
|
||||
retval = uclamp_validate(p, attr);
|
||||
@@ -7734,6 +7758,9 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
@@ -7721,6 +7745,9 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
goto change;
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
|
||||
goto change;
|
||||
@ -279,7 +279,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
|
||||
p->sched_reset_on_fork = reset_on_fork;
|
||||
retval = 0;
|
||||
@@ -7822,6 +7849,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
@@ -7809,6 +7836,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
__setscheduler_params(p, attr);
|
||||
__setscheduler_prio(p, newprio);
|
||||
}
|
||||
@ -287,7 +287,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
__setscheduler_uclamp(p, attr);
|
||||
|
||||
if (queued) {
|
||||
@@ -8033,6 +8061,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
|
||||
@@ -8020,6 +8048,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
|
||||
size < SCHED_ATTR_SIZE_VER1)
|
||||
return -EINVAL;
|
||||
|
||||
@ -297,7 +297,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
/*
|
||||
* XXX: Do we want to be lenient like existing syscalls; or do we want
|
||||
* to be strict and return an error on out-of-bounds values?
|
||||
@@ -8270,6 +8301,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
|
||||
@@ -8257,6 +8288,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
|
||||
get_params(p, &kattr);
|
||||
kattr.sched_flags &= SCHED_FLAG_ALL;
|
||||
|
||||
@ -306,7 +306,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/*
|
||||
* This could race with another potential updater, but this is fine
|
||||
@@ -11214,6 +11247,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
|
||||
@@ -11180,6 +11213,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
|
||||
{
|
||||
return sched_group_set_idle(css_tg(css), idle);
|
||||
}
|
||||
@ -332,7 +332,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
#endif
|
||||
|
||||
static struct cftype cpu_legacy_files[] = {
|
||||
@@ -11228,6 +11280,11 @@ static struct cftype cpu_legacy_files[] = {
|
||||
@@ -11194,6 +11246,11 @@ static struct cftype cpu_legacy_files[] = {
|
||||
.read_s64 = cpu_idle_read_s64,
|
||||
.write_s64 = cpu_idle_write_s64,
|
||||
},
|
||||
@ -344,7 +344,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
#endif
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
{
|
||||
@@ -11467,6 +11524,12 @@ static struct cftype cpu_files[] = {
|
||||
@@ -11411,6 +11468,12 @@ static struct cftype cpu_files[] = {
|
||||
.read_s64 = cpu_idle_read_s64,
|
||||
.write_s64 = cpu_idle_write_s64,
|
||||
},
|
||||
@ -358,7 +358,7 @@ index 83e36547af17..8a541fe2d462 100644
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
{
|
||||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||||
index aeeba46a096b..5c743bcb340d 100644
|
||||
index 066ff1c8ae4e..e7e83181fbb6 100644
|
||||
--- a/kernel/sched/debug.c
|
||||
+++ b/kernel/sched/debug.c
|
||||
@@ -347,10 +347,7 @@ static __init int sched_init_debug(void)
|
||||
@ -373,7 +373,7 @@ index aeeba46a096b..5c743bcb340d 100644
|
||||
|
||||
debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
|
||||
debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
|
||||
@@ -582,9 +579,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
@@ -581,9 +578,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
else
|
||||
SEQ_printf(m, " %c", task_state_to_char(p));
|
||||
|
||||
@ -388,7 +388,7 @@ index aeeba46a096b..5c743bcb340d 100644
|
||||
(long long)(p->nvcsw + p->nivcsw),
|
||||
p->prio);
|
||||
|
||||
@@ -627,10 +628,9 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
|
||||
@@ -626,10 +627,9 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
|
||||
|
||||
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
@ -401,7 +401,7 @@ index aeeba46a096b..5c743bcb340d 100644
|
||||
unsigned long flags;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
@@ -644,26 +644,25 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
@@ -643,26 +643,25 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
SPLIT_NS(cfs_rq->exec_clock));
|
||||
|
||||
raw_spin_rq_lock_irqsave(rq, flags);
|
||||
@ -441,7 +441,7 @@ index aeeba46a096b..5c743bcb340d 100644
|
||||
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
|
||||
cfs_rq->nr_spread_over);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
|
||||
@@ -864,10 +863,7 @@ static void sched_debug_header(struct seq_file *m)
|
||||
@@ -863,10 +862,7 @@ static void sched_debug_header(struct seq_file *m)
|
||||
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
|
||||
@ -453,7 +453,7 @@ index aeeba46a096b..5c743bcb340d 100644
|
||||
P(sysctl_sched_child_runs_first);
|
||||
P(sysctl_sched_features);
|
||||
#undef PN
|
||||
@@ -1090,6 +1086,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
@@ -1089,6 +1085,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
#endif
|
||||
P(policy);
|
||||
P(prio);
|
||||
@ -462,7 +462,7 @@ index aeeba46a096b..5c743bcb340d 100644
|
||||
P(dl.runtime);
|
||||
P(dl.deadline);
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
index 2c335df30171..461409c0eac7 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -47,6 +47,7 @@
|
||||
@ -594,7 +594,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
|
||||
const struct sched_class fair_sched_class;
|
||||
|
||||
@@ -619,13 +569,200 @@ static inline bool entity_before(const struct sched_entity *a,
|
||||
@@ -619,13 +569,198 @@ static inline bool entity_before(const struct sched_entity *a,
|
||||
return (s64)(a->vruntime - b->vruntime) < 0;
|
||||
}
|
||||
|
||||
@ -671,7 +671,6 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
+ s64 key = entity_key(cfs_rq, se);
|
||||
+
|
||||
+ cfs_rq->avg_vruntime += key * weight;
|
||||
+ cfs_rq->avg_slice += se->slice * weight;
|
||||
+ cfs_rq->avg_load += weight;
|
||||
+}
|
||||
+
|
||||
@ -682,7 +681,6 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
+ s64 key = entity_key(cfs_rq, se);
|
||||
+
|
||||
+ cfs_rq->avg_vruntime -= key * weight;
|
||||
+ cfs_rq->avg_slice -= se->slice * weight;
|
||||
+ cfs_rq->avg_load -= weight;
|
||||
+}
|
||||
+
|
||||
@ -796,7 +794,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
|
||||
u64 vruntime = cfs_rq->min_vruntime;
|
||||
|
||||
@@ -636,9 +773,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
|
||||
@@ -636,9 +771,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
|
||||
curr = NULL;
|
||||
}
|
||||
|
||||
@ -807,7 +805,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
if (!curr)
|
||||
vruntime = se->vruntime;
|
||||
else
|
||||
@@ -647,7 +782,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
|
||||
@@ -647,7 +780,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
|
||||
|
||||
/* ensure we never gain time by being placed backwards. */
|
||||
u64_u32_store(cfs_rq->min_vruntime,
|
||||
@ -816,7 +814,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
|
||||
@@ -655,17 +790,51 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
|
||||
@@ -655,17 +788,51 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
|
||||
return entity_before(__node_2_se(a), __node_2_se(b));
|
||||
}
|
||||
|
||||
@ -870,7 +868,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
|
||||
@@ -678,14 +847,81 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
|
||||
@@ -678,14 +845,81 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
|
||||
return __node_2_se(left);
|
||||
}
|
||||
|
||||
@ -927,8 +925,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
+ if (best->deadline == best->min_deadline)
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- return __node_2_se(next);
|
||||
+
|
||||
+ /*
|
||||
+ * If the earlest deadline in this subtree is in the fully
|
||||
+ * eligible left half of our space, go there.
|
||||
@ -941,7 +938,8 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
+
|
||||
+ node = node->rb_right;
|
||||
+ }
|
||||
+
|
||||
|
||||
- return __node_2_se(next);
|
||||
+ if (!best || (curr && deadline_gt(deadline, best, curr)))
|
||||
+ best = curr;
|
||||
+
|
||||
@ -957,7 +955,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
@@ -707,104 +943,53 @@ int sched_update_scaling(void)
|
||||
@@ -707,104 +941,53 @@ int sched_update_scaling(void)
|
||||
{
|
||||
unsigned int factor = get_update_sysctl_factor();
|
||||
|
||||
@ -1090,7 +1088,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
#include "pelt.h"
|
||||
@@ -939,6 +1124,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||||
@@ -939,6 +1122,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||||
schedstat_add(cfs_rq->exec_clock, delta_exec);
|
||||
|
||||
curr->vruntime += calc_delta_fair(delta_exec, curr);
|
||||
@ -1098,7 +1096,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
update_min_vruntime(cfs_rq);
|
||||
|
||||
if (entity_is_task(curr)) {
|
||||
@@ -3393,16 +3579,36 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
|
||||
@@ -3393,16 +3577,36 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
|
||||
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
unsigned long weight)
|
||||
{
|
||||
@ -1135,7 +1133,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
#ifdef CONFIG_SMP
|
||||
do {
|
||||
u32 divider = get_pelt_divider(&se->avg);
|
||||
@@ -3412,9 +3618,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
@@ -3412,9 +3616,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
#endif
|
||||
|
||||
enqueue_load_avg(cfs_rq, se);
|
||||
@ -1149,7 +1147,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
void reweight_task(struct task_struct *p, int prio)
|
||||
@@ -4710,98 +4918,140 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
|
||||
@@ -4710,158 +4916,123 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
@ -1167,94 +1165,42 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
-}
|
||||
-
|
||||
-static inline bool entity_is_long_sleeper(struct sched_entity *se)
|
||||
+static inline bool
|
||||
+entity_has_slept(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 vslice, int flags)
|
||||
+static void
|
||||
+place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
- struct cfs_rq *cfs_rq;
|
||||
- u64 sleep_time;
|
||||
+ u64 now, vdelta;
|
||||
+ s64 delta;
|
||||
|
||||
- if (se->exec_start == 0)
|
||||
+ if (!(flags & ENQUEUE_WAKEUP))
|
||||
return false;
|
||||
|
||||
- cfs_rq = cfs_rq_of(se);
|
||||
-
|
||||
- sleep_time = rq_clock_task(rq_of(cfs_rq));
|
||||
+ if (flags & ENQUEUE_MIGRATED)
|
||||
+ return true;
|
||||
|
||||
- /* Happen while migrating because of clock task divergence */
|
||||
- if (sleep_time <= se->exec_start)
|
||||
+ now = rq_clock_task(rq_of(cfs_rq));
|
||||
+ delta = now - se->exec_start;
|
||||
+ if (delta < 0)
|
||||
return false;
|
||||
|
||||
- sleep_time -= se->exec_start;
|
||||
- if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
|
||||
- return true;
|
||||
+ vdelta = __calc_delta(delta, NICE_0_LOAD, &cfs_rq->load);
|
||||
+ if (vdelta < vslice)
|
||||
+ return false;
|
||||
|
||||
- if (se->exec_start == 0)
|
||||
- return false;
|
||||
+ return true;
|
||||
}
|
||||
|
||||
static void
|
||||
-place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
+place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
- u64 vruntime = cfs_rq->min_vruntime;
|
||||
-
|
||||
- cfs_rq = cfs_rq_of(se);
|
||||
+ u64 vslice = calc_delta_fair(se->slice, se);
|
||||
+ u64 vruntime = avg_vruntime(cfs_rq);
|
||||
+ s64 lag = 0;
|
||||
|
||||
/*
|
||||
- * The 'current' period is already promised to the current tasks,
|
||||
- * however the extra weight of the new task will slow them down a
|
||||
- * little, place the new task so that it fits in the slot that
|
||||
- * stays open at the end.
|
||||
- sleep_time = rq_clock_task(rq_of(cfs_rq));
|
||||
+ /*
|
||||
+ * Due to how V is constructed as the weighted average of entities,
|
||||
+ * adding tasks with positive lag, or removing tasks with negative lag
|
||||
+ * will move 'time' backwards, this can screw around with the lag of
|
||||
+ * other tasks.
|
||||
+ *
|
||||
+ * EEVDF: placement strategy #1 / #2
|
||||
*/
|
||||
- if (initial && sched_feat(START_DEBIT))
|
||||
- vruntime += sched_vslice(cfs_rq, se);
|
||||
+ */
|
||||
+ if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
|
||||
+ struct sched_entity *curr = cfs_rq->curr;
|
||||
+ unsigned long load;
|
||||
|
||||
- /* sleeps up to a single latency don't count. */
|
||||
- if (!initial) {
|
||||
- unsigned long thresh;
|
||||
- /* Happen while migrating because of clock task divergence */
|
||||
- if (sleep_time <= se->exec_start)
|
||||
- return false;
|
||||
+ lag = se->vlag;
|
||||
|
||||
- if (se_is_idle(se))
|
||||
- thresh = sysctl_sched_min_granularity;
|
||||
- else
|
||||
- thresh = sysctl_sched_latency;
|
||||
- sleep_time -= se->exec_start;
|
||||
- if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
|
||||
- return true;
|
||||
+ /*
|
||||
+ * For latency sensitive tasks; those that have a shorter than
|
||||
+ * average slice and do not fully consume the slice, transition
|
||||
+ * to EEVDF placement strategy #2.
|
||||
+ */
|
||||
+ if (sched_feat(PLACE_FUDGE) &&
|
||||
+ (cfs_rq->avg_slice > se->slice * cfs_rq->avg_load) &&
|
||||
+ entity_has_slept(cfs_rq, se, vslice, flags)) {
|
||||
+ lag += vslice;
|
||||
+ if (lag > 0)
|
||||
+ lag = 0;
|
||||
+ }
|
||||
|
||||
/*
|
||||
- * Halve their sleep time's effect, to allow
|
||||
- * for a gentler effect of sleepers:
|
||||
+ * If we want to place a task and preserve lag, we have to
|
||||
+ * consider the effect of the new entity on the weighted
|
||||
+ * average and compensate for this, otherwise lag can quickly
|
||||
@ -1305,7 +1251,52 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
+ * = W*vl_i
|
||||
+ *
|
||||
+ * vl_i = (W + w_i)*vl'_i / W
|
||||
+ */
|
||||
+ load = cfs_rq->avg_load;
|
||||
+ if (curr && curr->on_rq)
|
||||
+ load += scale_load_down(curr->load.weight);
|
||||
|
||||
- return false;
|
||||
-}
|
||||
+ lag *= load + scale_load_down(se->load.weight);
|
||||
+ if (WARN_ON_ONCE(!load))
|
||||
+ load = 1;
|
||||
+ lag = div_s64(lag, load);
|
||||
+ }
|
||||
|
||||
-static void
|
||||
-place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
-{
|
||||
- u64 vruntime = cfs_rq->min_vruntime;
|
||||
+ se->vruntime = vruntime - lag;
|
||||
|
||||
/*
|
||||
- * The 'current' period is already promised to the current tasks,
|
||||
- * however the extra weight of the new task will slow them down a
|
||||
- * little, place the new task so that it fits in the slot that
|
||||
- * stays open at the end.
|
||||
+ * When joining the competition; the exisiting tasks will be,
|
||||
+ * on average, halfway through their slice, as such start tasks
|
||||
+ * off with half a slice to ease into the competition.
|
||||
*/
|
||||
- if (initial && sched_feat(START_DEBIT))
|
||||
- vruntime += sched_vslice(cfs_rq, se);
|
||||
-
|
||||
- /* sleeps up to a single latency don't count. */
|
||||
- if (!initial) {
|
||||
- unsigned long thresh;
|
||||
-
|
||||
- if (se_is_idle(se))
|
||||
- thresh = sysctl_sched_min_granularity;
|
||||
- else
|
||||
- thresh = sysctl_sched_latency;
|
||||
+ if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
|
||||
+ vslice /= 2;
|
||||
|
||||
- /*
|
||||
- * Halve their sleep time's effect, to allow
|
||||
- * for a gentler effect of sleepers:
|
||||
- */
|
||||
- if (sched_feat(GENTLE_FAIR_SLEEPERS))
|
||||
- thresh >>= 1;
|
||||
-
|
||||
@ -1335,26 +1326,6 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
- se->vruntime = vruntime;
|
||||
- else
|
||||
- se->vruntime = max_vruntime(se->vruntime, vruntime);
|
||||
+ load = cfs_rq->avg_load;
|
||||
+ if (curr && curr->on_rq)
|
||||
+ load += scale_load_down(curr->load.weight);
|
||||
+
|
||||
+ lag *= load + scale_load_down(se->load.weight);
|
||||
+ if (WARN_ON_ONCE(!load))
|
||||
+ load = 1;
|
||||
+ lag = div_s64(lag, load);
|
||||
+ }
|
||||
+
|
||||
+ se->vruntime = vruntime - lag;
|
||||
+
|
||||
+ /*
|
||||
+ * When joining the competition; the exisiting tasks will be,
|
||||
+ * on average, halfway through their slice, as such start tasks
|
||||
+ * off with half a slice to ease into the competition.
|
||||
+ */
|
||||
+ if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
|
||||
+ vslice /= 2;
|
||||
+
|
||||
+ /*
|
||||
+ * EEVDF: vd_i = ve_i + r_i/w_i
|
||||
+ */
|
||||
@ -1362,7 +1333,6 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
|
||||
@@ -4809,60 +5059,20 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
|
||||
|
||||
static inline bool cfs_bandwidth_used(void);
|
||||
|
||||
@ -1425,7 +1395,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
/*
|
||||
* When enqueuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
@@ -4874,18 +5084,28 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -4873,18 +5044,28 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
*/
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
|
||||
se_update_runnable(se);
|
||||
@ -1457,7 +1427,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
if (!curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
@@ -4907,17 +5127,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -4896,17 +5077,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1475,7 +1445,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
static void __clear_buddies_next(struct sched_entity *se)
|
||||
{
|
||||
for_each_sched_entity(se) {
|
||||
@@ -4929,27 +5138,10 @@ static void __clear_buddies_next(struct sched_entity *se)
|
||||
@@ -4918,27 +5088,10 @@ static void __clear_buddies_next(struct sched_entity *se)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1503,7 +1473,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
|
||||
@@ -4983,20 +5175,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -4972,20 +5125,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
|
||||
clear_buddies(cfs_rq, se);
|
||||
|
||||
@ -1525,7 +1495,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
/* return excess runtime on last dequeue */
|
||||
return_cfs_rq_runtime(cfs_rq);
|
||||
|
||||
@@ -5015,52 +5199,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@@ -5004,52 +5149,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
update_idle_cfs_rq_clock_pelt(cfs_rq);
|
||||
}
|
||||
|
||||
@ -1578,7 +1548,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
static void
|
||||
set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
@@ -5099,9 +5237,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -5088,9 +5187,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
||||
}
|
||||
|
||||
@ -1588,7 +1558,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
/*
|
||||
* Pick the next process, keeping these things in mind, in this order:
|
||||
* 1) keep things fair between processes/task groups
|
||||
@@ -5112,50 +5247,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
||||
@@ -5101,50 +5197,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
||||
static struct sched_entity *
|
||||
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
{
|
||||
@ -1644,7 +1614,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
|
||||
@@ -5172,8 +5271,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
@@ -5161,8 +5221,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
/* throttle cfs_rqs exceeding runtime */
|
||||
check_cfs_rq_runtime(cfs_rq);
|
||||
|
||||
@ -1653,7 +1623,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
if (prev->on_rq) {
|
||||
update_stats_wait_start_fair(cfs_rq, prev);
|
||||
/* Put 'current' back into the tree. */
|
||||
@@ -5214,9 +5311,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
||||
@@ -5203,9 +5261,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
||||
hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
|
||||
return;
|
||||
#endif
|
||||
@ -1663,7 +1633,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
|
||||
@@ -6259,13 +6353,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
|
||||
@@ -6228,13 +6283,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
|
||||
static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
@ -1678,7 +1648,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
s64 delta = slice - ran;
|
||||
|
||||
if (delta < 0) {
|
||||
@@ -6289,8 +6382,7 @@ static void hrtick_update(struct rq *rq)
|
||||
@@ -6258,8 +6312,7 @@ static void hrtick_update(struct rq *rq)
|
||||
if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class)
|
||||
return;
|
||||
|
||||
@ -1688,7 +1658,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
#else /* !CONFIG_SCHED_HRTICK */
|
||||
static inline void
|
||||
@@ -6331,17 +6423,6 @@ static int sched_idle_rq(struct rq *rq)
|
||||
@@ -6300,17 +6353,6 @@ static int sched_idle_rq(struct rq *rq)
|
||||
rq->nr_running);
|
||||
}
|
||||
|
||||
@ -1706,7 +1676,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
#ifdef CONFIG_SMP
|
||||
static int sched_idle_cpu(int cpu)
|
||||
{
|
||||
@@ -7844,18 +7925,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
||||
@@ -7816,18 +7858,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
|
||||
@ -1725,7 +1695,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
if (!task_on_rq_migrating(p)) {
|
||||
remove_entity_load_avg(se);
|
||||
|
||||
@@ -7893,66 +7962,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
@@ -7865,66 +7895,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
@ -1792,7 +1762,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
static void set_next_buddy(struct sched_entity *se)
|
||||
{
|
||||
for_each_sched_entity(se) {
|
||||
@@ -7964,12 +7973,6 @@ static void set_next_buddy(struct sched_entity *se)
|
||||
@@ -7936,12 +7906,6 @@ static void set_next_buddy(struct sched_entity *se)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1805,7 +1775,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
@@ -7978,7 +7981,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
@@ -7950,7 +7914,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
@ -1813,7 +1783,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
int next_buddy_marked = 0;
|
||||
int cse_is_idle, pse_is_idle;
|
||||
|
||||
@@ -7994,7 +7996,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
@@ -7966,7 +7929,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
|
||||
return;
|
||||
|
||||
@ -1822,7 +1792,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
set_next_buddy(pse);
|
||||
next_buddy_marked = 1;
|
||||
}
|
||||
@@ -8039,35 +8041,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
@@ -8011,35 +7974,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
if (cse_is_idle != pse_is_idle)
|
||||
return;
|
||||
|
||||
@ -1865,7 +1835,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -8268,8 +8254,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
|
||||
@@ -8240,8 +8187,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
|
||||
|
||||
/*
|
||||
* sched_yield() is very simple
|
||||
@ -1874,7 +1844,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
*/
|
||||
static void yield_task_fair(struct rq *rq)
|
||||
{
|
||||
@@ -8285,21 +8269,19 @@ static void yield_task_fair(struct rq *rq)
|
||||
@@ -8257,21 +8202,19 @@ static void yield_task_fair(struct rq *rq)
|
||||
|
||||
clear_buddies(cfs_rq, se);
|
||||
|
||||
@ -1908,7 +1878,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
|
||||
@@ -8547,8 +8529,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
|
||||
@@ -8514,8 +8457,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
|
||||
* Buddy candidates are cache hot:
|
||||
*/
|
||||
if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
|
||||
@ -1918,7 +1888,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
return 1;
|
||||
|
||||
if (sysctl_sched_migration_cost == -1)
|
||||
@@ -12174,8 +12155,8 @@ static void rq_offline_fair(struct rq *rq)
|
||||
@@ -12025,8 +11967,8 @@ static void rq_offline_fair(struct rq *rq)
|
||||
static inline bool
|
||||
__entity_slice_used(struct sched_entity *se, int min_nr_tasks)
|
||||
{
|
||||
@ -1928,7 +1898,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
|
||||
return (rtime * min_nr_tasks > slice);
|
||||
}
|
||||
@@ -12331,8 +12312,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
|
||||
@@ -12182,8 +12124,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
|
||||
*/
|
||||
static void task_fork_fair(struct task_struct *p)
|
||||
{
|
||||
@ -1938,7 +1908,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
struct rq *rq = this_rq();
|
||||
struct rq_flags rf;
|
||||
|
||||
@@ -12341,22 +12322,9 @@ static void task_fork_fair(struct task_struct *p)
|
||||
@@ -12192,22 +12134,9 @@ static void task_fork_fair(struct task_struct *p)
|
||||
|
||||
cfs_rq = task_cfs_rq(current);
|
||||
curr = cfs_rq->curr;
|
||||
@ -1963,7 +1933,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
@@ -12385,34 +12353,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
@@ -12236,34 +12165,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
@ -1998,7 +1968,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
/*
|
||||
* Propagate the changes of the sched_entity across the tg tree to make it
|
||||
@@ -12483,16 +12423,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
|
||||
@@ -12334,16 +12235,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
|
||||
static void detach_task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
@ -2015,7 +1985,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
|
||||
detach_entity_cfs_rq(se);
|
||||
}
|
||||
@@ -12500,12 +12430,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
|
||||
@@ -12351,12 +12242,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
|
||||
static void attach_task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
@ -2028,7 +1998,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
}
|
||||
|
||||
static void switched_from_fair(struct rq *rq, struct task_struct *p)
|
||||
@@ -12616,6 +12542,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
@@ -12467,6 +12354,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
goto err;
|
||||
|
||||
tg->shares = NICE_0_LOAD;
|
||||
@ -2036,7 +2006,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
|
||||
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||
|
||||
@@ -12714,6 +12641,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
@@ -12565,6 +12453,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
}
|
||||
|
||||
se->my_q = cfs_rq;
|
||||
@ -2046,7 +2016,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
/* guarantee group entities always have weight */
|
||||
update_load_set(&se->load, NICE_0_LOAD);
|
||||
se->parent = parent;
|
||||
@@ -12844,6 +12774,29 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
@@ -12695,6 +12586,29 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2076,7 +2046,7 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
void free_fair_sched_group(struct task_group *tg) { }
|
||||
@@ -12870,7 +12823,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
|
||||
@@ -12721,7 +12635,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
|
||||
* idle runqueue:
|
||||
*/
|
||||
if (rq->cfs.load.weight)
|
||||
@ -2086,10 +2056,10 @@ index 4039ff46fcb3..0fbb8fb24a50 100644
|
||||
return rr_interval;
|
||||
}
|
||||
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
|
||||
index ee7f23c76bd3..7d65b40299d9 100644
|
||||
index ee7f23c76bd3..54334ca5c5c6 100644
|
||||
--- a/kernel/sched/features.h
|
||||
+++ b/kernel/sched/features.h
|
||||
@@ -1,16 +1,12 @@
|
||||
@@ -1,16 +1,11 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
-/*
|
||||
- * Only give sleepers 50% of their service deficit. This allows
|
||||
@ -2106,12 +2076,11 @@ index ee7f23c76bd3..7d65b40299d9 100644
|
||||
*/
|
||||
-SCHED_FEAT(START_DEBIT, true)
|
||||
+SCHED_FEAT(PLACE_LAG, true)
|
||||
+SCHED_FEAT(PLACE_FUDGE, true)
|
||||
+SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
|
||||
|
||||
/*
|
||||
* Prefer to schedule the task we woke last (assuming it failed
|
||||
@@ -19,13 +15,6 @@ SCHED_FEAT(START_DEBIT, true)
|
||||
@@ -19,13 +14,6 @@ SCHED_FEAT(START_DEBIT, true)
|
||||
*/
|
||||
SCHED_FEAT(NEXT_BUDDY, false)
|
||||
|
||||
@ -2125,7 +2094,7 @@ index ee7f23c76bd3..7d65b40299d9 100644
|
||||
/*
|
||||
* Consider buddies to be cache hot, decreases the likeliness of a
|
||||
* cache buddy being migrated away, increases cache locality.
|
||||
@@ -98,6 +87,3 @@ SCHED_FEAT(UTIL_EST, true)
|
||||
@@ -98,6 +86,3 @@ SCHED_FEAT(UTIL_EST, true)
|
||||
SCHED_FEAT(UTIL_EST_FASTUP, true)
|
||||
|
||||
SCHED_FEAT(LATENCY_WARN, false)
|
||||
@ -2133,7 +2102,7 @@ index ee7f23c76bd3..7d65b40299d9 100644
|
||||
-SCHED_FEAT(ALT_PERIOD, true)
|
||||
-SCHED_FEAT(BASE_SLICE, true)
|
||||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
||||
index 9baeb1a2dfdd..4236c4c893aa 100644
|
||||
index e93e006a942b..67cd7e1fd501 100644
|
||||
--- a/kernel/sched/sched.h
|
||||
+++ b/kernel/sched/sched.h
|
||||
@@ -372,6 +372,8 @@ struct task_group {
|
||||
@ -2154,18 +2123,17 @@ index 9baeb1a2dfdd..4236c4c893aa 100644
|
||||
#ifdef CONFIG_SMP
|
||||
extern void set_task_rq_fair(struct sched_entity *se,
|
||||
struct cfs_rq *prev, struct cfs_rq *next);
|
||||
@@ -548,6 +552,10 @@ struct cfs_rq {
|
||||
@@ -548,6 +552,9 @@ struct cfs_rq {
|
||||
unsigned int idle_nr_running; /* SCHED_IDLE */
|
||||
unsigned int idle_h_nr_running; /* SCHED_IDLE */
|
||||
|
||||
+ s64 avg_vruntime;
|
||||
+ u64 avg_slice;
|
||||
+ u64 avg_load;
|
||||
+
|
||||
u64 exec_clock;
|
||||
u64 min_vruntime;
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
@@ -567,8 +575,6 @@ struct cfs_rq {
|
||||
@@ -567,8 +574,6 @@ struct cfs_rq {
|
||||
*/
|
||||
struct sched_entity *curr;
|
||||
struct sched_entity *next;
|
||||
@ -2174,7 +2142,7 @@ index 9baeb1a2dfdd..4236c4c893aa 100644
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
unsigned int nr_spread_over;
|
||||
@@ -2198,6 +2204,7 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
@@ -2195,6 +2200,7 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
#else
|
||||
#define ENQUEUE_MIGRATED 0x00
|
||||
#endif
|
||||
@ -2182,7 +2150,7 @@ index 9baeb1a2dfdd..4236c4c893aa 100644
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
@@ -2502,11 +2509,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
|
||||
@@ -2499,11 +2505,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
|
||||
extern const_debug unsigned int sysctl_sched_nr_migrate;
|
||||
extern const_debug unsigned int sysctl_sched_migration_cost;
|
||||
|
||||
@ -2196,7 +2164,7 @@ index 9baeb1a2dfdd..4236c4c893aa 100644
|
||||
extern int sysctl_resched_latency_warn_ms;
|
||||
extern int sysctl_resched_latency_warn_once;
|
||||
|
||||
@@ -2519,6 +2524,8 @@ extern unsigned int sysctl_numa_balancing_scan_size;
|
||||
@@ -2516,6 +2520,8 @@ extern unsigned int sysctl_numa_balancing_scan_size;
|
||||
extern unsigned int sysctl_numa_balancing_hot_threshold;
|
||||
#endif
|
||||
|
||||
@ -2205,7 +2173,7 @@ index 9baeb1a2dfdd..4236c4c893aa 100644
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
|
||||
/*
|
||||
@@ -3483,4 +3490,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
|
||||
@@ -3480,4 +3486,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
|
||||
static inline void init_sched_mm_cid(struct task_struct *t) { }
|
||||
#endif
|
||||
|
||||
|
@ -1,49 +1,76 @@
|
||||
From e6e251fb3f3927c18ac4f2a22a43c6c198133d19 Mon Sep 17 00:00:00 2001
|
||||
From: Piotr Gorski <lucjan.lucjanov@gmail.com>
|
||||
Date: Sun, 23 Jul 2023 09:46:42 +0200
|
||||
From 377657f92d256b364813e3f8b2a58edfc9833815 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Jung <admin@ptr1337.dev>
|
||||
Date: Sun, 30 Jul 2023 09:43:51 +0200
|
||||
Subject: [PATCH] bore-eevdf
|
||||
|
||||
Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
|
||||
Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||||
---
|
||||
include/linux/sched.h | 10 ++
|
||||
include/linux/sched.h | 30 ++++++
|
||||
init/Kconfig | 20 ++++
|
||||
kernel/sched/core.c | 117 +++++++++++++++++++++++
|
||||
kernel/sched/core.c | 118 +++++++++++++++++++++
|
||||
kernel/sched/debug.c | 4 +
|
||||
kernel/sched/fair.c | 203 ++++++++++++++++++++++++++++++++++++++--
|
||||
kernel/sched/fair.c | 228 ++++++++++++++++++++++++++++++++++++++--
|
||||
kernel/sched/features.h | 4 +
|
||||
kernel/sched/sched.h | 1 +
|
||||
7 files changed, 351 insertions(+), 8 deletions(-)
|
||||
7 files changed, 397 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index e99a9aa6a..14a1ce058 100644
|
||||
index c940c4dc8304..8663c0813f81 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -559,6 +559,12 @@ struct sched_entity {
|
||||
@@ -545,6 +545,26 @@ struct sched_statistics {
|
||||
#endif /* CONFIG_SCHEDSTATS */
|
||||
} ____cacheline_aligned;
|
||||
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+union union16 {
|
||||
+ u16 u16;
|
||||
+ s16 s16;
|
||||
+ u8 u8[2];
|
||||
+ s8 s8[2];
|
||||
+};
|
||||
+typedef union union16 x16;
|
||||
+
|
||||
+union union32 {
|
||||
+ u32 u32;
|
||||
+ s32 s32;
|
||||
+ u16 u16[2];
|
||||
+ s16 s16[2];
|
||||
+ u8 u8[4];
|
||||
+ s8 s8[4];
|
||||
+};
|
||||
+typedef union union32 x32;
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
+
|
||||
struct sched_entity {
|
||||
/* For load-balancing: */
|
||||
struct load_weight load;
|
||||
@@ -559,6 +579,12 @@ struct sched_entity {
|
||||
u64 sum_exec_runtime;
|
||||
u64 prev_sum_exec_runtime;
|
||||
u64 vruntime;
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+ u64 prev_burst_time;
|
||||
+ u64 burst_time;
|
||||
+ u64 max_burst_time;
|
||||
+ u8 penalty_score;
|
||||
+ u16 prev_burst_penalty;
|
||||
+ u16 curr_burst_penalty;
|
||||
+ u16 burst_penalty;
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
s64 vlag;
|
||||
u64 slice;
|
||||
|
||||
@@ -990,6 +996,10 @@ struct task_struct {
|
||||
@@ -990,6 +1016,10 @@ struct task_struct {
|
||||
struct list_head children;
|
||||
struct list_head sibling;
|
||||
struct task_struct *group_leader;
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+ u64 child_burst_cache;
|
||||
+ u16 child_burst_cache;
|
||||
+ u64 child_burst_last_cached;
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
|
||||
/*
|
||||
* 'ptraced' is the list of tasks this task is using ptrace() on.
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 71755cc8e..c697be79e 100644
|
||||
index 71755cc8ed3e..c697be79e594 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1277,6 +1277,26 @@ config CHECKPOINT_RESTORE
|
||||
@ -74,30 +101,31 @@ index 71755cc8e..c697be79e 100644
|
||||
bool "Automatic process group scheduling"
|
||||
select CGROUPS
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 8a541fe2d..13969a3a3 100644
|
||||
index aff81e12460e..839605620f63 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -4491,6 +4491,112 @@ int wake_up_state(struct task_struct *p, unsigned int state)
|
||||
@@ -4491,6 +4491,113 @@ int wake_up_state(struct task_struct *p, unsigned int state)
|
||||
return try_to_wake_up(p, state, 0);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+#define CHILD_BURST_CUTOFF_BITS 9
|
||||
+extern unsigned int sched_burst_cache_lifetime;
|
||||
+extern unsigned int sched_burst_fork_atavistic;
|
||||
+
|
||||
+void __init sched_init_bore(void) {
|
||||
+ init_task.child_burst_cache = 0;
|
||||
+ init_task.child_burst_last_cached = 0;
|
||||
+ init_task.se.prev_burst_time = 0;
|
||||
+ init_task.se.burst_time = 0;
|
||||
+ init_task.se.max_burst_time = 0;
|
||||
+ init_task.se.prev_burst_penalty = 0;
|
||||
+ init_task.se.curr_burst_penalty = 0;
|
||||
+ init_task.se.burst_penalty = 0;
|
||||
+}
|
||||
+
|
||||
+void inline sched_fork_bore(struct task_struct *p) {
|
||||
+ p->child_burst_cache = 0;
|
||||
+ p->child_burst_last_cached = 0;
|
||||
+ p->se.burst_time = 0;
|
||||
+ p->se.curr_burst_penalty = 0;
|
||||
+}
|
||||
+
|
||||
+static u32 count_child_tasks(struct task_struct *p) {
|
||||
@ -112,31 +140,31 @@ index 8a541fe2d..13969a3a3 100644
|
||||
+}
|
||||
+
|
||||
+static void __update_child_burst_cache(
|
||||
+ struct task_struct *p, u32 cnt, u64 sum, u64 now) {
|
||||
+ u64 avg = 0;
|
||||
+ if (cnt) avg = div_u64(sum, cnt) << CHILD_BURST_CUTOFF_BITS;
|
||||
+ p->child_burst_cache = max(avg, p->se.max_burst_time);
|
||||
+ struct task_struct *p, u32 cnt, u32 sum, u64 now) {
|
||||
+ u16 avg = 0;
|
||||
+ if (cnt) avg = DIV_ROUND_CLOSEST(sum, cnt);
|
||||
+ p->child_burst_cache = max(avg, p->se.burst_penalty);
|
||||
+ p->child_burst_last_cached = now;
|
||||
+}
|
||||
+
|
||||
+static void update_child_burst_cache(struct task_struct *p, u64 now) {
|
||||
+ struct task_struct *child;
|
||||
+ u32 cnt = 0;
|
||||
+ u64 sum = 0;
|
||||
+ u32 sum = 0;
|
||||
+
|
||||
+ list_for_each_entry(child, &p->children, sibling) {
|
||||
+ cnt++;
|
||||
+ sum += child->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS;
|
||||
+ sum += child->se.burst_penalty;
|
||||
+ }
|
||||
+
|
||||
+ __update_child_burst_cache(p, cnt, sum, now);
|
||||
+}
|
||||
+
|
||||
+static void update_child_burst_cache_atavistic(
|
||||
+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u64 *asum) {
|
||||
+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
|
||||
+ struct task_struct *child, *dec;
|
||||
+ u32 cnt = 0, dcnt = 0;
|
||||
+ u64 sum = 0;
|
||||
+ u32 sum = 0;
|
||||
+
|
||||
+ list_for_each_entry(child, &p->children, sibling) {
|
||||
+ dec = child;
|
||||
@ -145,13 +173,13 @@ index 8a541fe2d..13969a3a3 100644
|
||||
+
|
||||
+ if (!dcnt || !depth) {
|
||||
+ cnt++;
|
||||
+ sum += dec->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS;
|
||||
+ sum += dec->se.burst_penalty;
|
||||
+ } else {
|
||||
+ if (child_burst_cache_expired(dec, now))
|
||||
+ update_child_burst_cache_atavistic(dec, now, depth - 1, &cnt, &sum);
|
||||
+ else {
|
||||
+ cnt += dcnt;
|
||||
+ sum += (dec->child_burst_cache >> CHILD_BURST_CUTOFF_BITS) * dcnt;
|
||||
+ sum += (dec->child_burst_cache) * dcnt;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
@ -161,12 +189,12 @@ index 8a541fe2d..13969a3a3 100644
|
||||
+ *asum += sum;
|
||||
+}
|
||||
+
|
||||
+static void update_task_initial_burst_time(struct task_struct *p) {
|
||||
+static void fork_burst_penalty(struct task_struct *p) {
|
||||
+ struct sched_entity *se = &p->se;
|
||||
+ struct task_struct *anc = p->real_parent;
|
||||
+ u64 now = ktime_get_ns();
|
||||
+ u32 cnt = 0;
|
||||
+ u64 sum = 0;
|
||||
+ u32 sum = 0;
|
||||
+
|
||||
+ read_lock(&tasklist_lock);
|
||||
+
|
||||
@ -182,15 +210,15 @@ index 8a541fe2d..13969a3a3 100644
|
||||
+
|
||||
+ read_unlock(&tasklist_lock);
|
||||
+
|
||||
+ se->max_burst_time = se->prev_burst_time =
|
||||
+ max(se->prev_burst_time, anc->child_burst_cache);
|
||||
+ se->burst_penalty = se->prev_burst_penalty =
|
||||
+ max(se->prev_burst_penalty, anc->child_burst_cache);
|
||||
+}
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
+
|
||||
/*
|
||||
* Perform scheduler related setup for a newly forked process p.
|
||||
* p is forked by current.
|
||||
@@ -4507,6 +4613,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
@@ -4507,6 +4614,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
p->se.prev_sum_exec_runtime = 0;
|
||||
p->se.nr_migrations = 0;
|
||||
p->se.vruntime = 0;
|
||||
@ -200,30 +228,30 @@ index 8a541fe2d..13969a3a3 100644
|
||||
p->se.vlag = 0;
|
||||
INIT_LIST_HEAD(&p->se.group_node);
|
||||
|
||||
@@ -4828,6 +4937,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
||||
@@ -4828,6 +4938,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
|
||||
|
||||
void sched_post_fork(struct task_struct *p)
|
||||
{
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+ update_task_initial_burst_time(p);
|
||||
+ fork_burst_penalty(p);
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
uclamp_post_fork(p);
|
||||
}
|
||||
|
||||
@@ -9967,6 +10079,11 @@ void __init sched_init(void)
|
||||
@@ -9954,6 +10067,11 @@ void __init sched_init(void)
|
||||
BUG_ON(&dl_sched_class != &stop_sched_class + 1);
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+ sched_init_bore();
|
||||
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.5.3 by Masahito Suzuki");
|
||||
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 3.0 Beta2 by Masahito Suzuki");
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
+
|
||||
wait_bit_init();
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||||
index 5c743bcb3..755ef4c8d 100644
|
||||
index e7e83181fbb6..ff41a524c1ee 100644
|
||||
--- a/kernel/sched/debug.c
|
||||
+++ b/kernel/sched/debug.c
|
||||
@@ -348,6 +348,7 @@ static __init int sched_init_debug(void)
|
||||
@ -234,18 +262,18 @@ index 5c743bcb3..755ef4c8d 100644
|
||||
|
||||
debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
|
||||
debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
|
||||
@@ -595,6 +596,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
@@ -594,6 +595,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
|
||||
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
|
||||
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+ SEQ_printf(m, " %2d", p->se.penalty_score);
|
||||
+ SEQ_printf(m, " %2d", ((x16*)&p->se.burst_penalty)->u8[1]);
|
||||
+#endif
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
|
||||
#endif
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index d6042543c..e52c14232 100644
|
||||
index 461409c0eac7..90ce27fb0a3f 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -19,6 +19,9 @@
|
||||
@ -281,7 +309,7 @@ index d6042543c..e52c14232 100644
|
||||
|
||||
/*
|
||||
* After fork, child runs first. If set to 0 (default) then
|
||||
@@ -84,8 +87,76 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
|
||||
@@ -84,8 +87,93 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
|
||||
*/
|
||||
unsigned int sysctl_sched_child_runs_first __read_mostly;
|
||||
|
||||
@ -292,55 +320,72 @@ index d6042543c..e52c14232 100644
|
||||
+ * and reduces their over-scheduling. Synchronous workloads will still
|
||||
+ * have immediate wakeup/sleep latencies.
|
||||
+ *
|
||||
+ * (default: 3.2 msec * 1, units: nanoseconds)
|
||||
+ * (default: 1.6 msec * 1, units: nanoseconds)
|
||||
+ */
|
||||
+unsigned int sysctl_sched_wakeup_granularity = 3200000UL;
|
||||
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 3200000UL;
|
||||
+unsigned int sysctl_sched_wakeup_granularity = 1600000UL;
|
||||
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 1600000UL;
|
||||
+
|
||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+unsigned int __read_mostly sched_bore = 1;
|
||||
+unsigned int __read_mostly sched_burst_cache_lifetime = 60000000;
|
||||
+unsigned int __read_mostly sched_burst_penalty_offset = 12;
|
||||
+unsigned int __read_mostly sched_burst_penalty_offset = 18;
|
||||
+unsigned int __read_mostly sched_burst_penalty_scale = 1292;
|
||||
+unsigned int __read_mostly sched_burst_smoothness = 2;
|
||||
+unsigned int __read_mostly sched_burst_smoothness_up = 1;
|
||||
+unsigned int __read_mostly sched_burst_smoothness_down = 0;
|
||||
+unsigned int __read_mostly sched_burst_fork_atavistic = 2;
|
||||
+static int three = 3;
|
||||
+static int sixty_four = 64;
|
||||
+static int maxval_12_bits = 4095;
|
||||
+
|
||||
+#define FIXED_SHIFT 10
|
||||
+#define FIXED_ONE (1 << FIXED_SHIFT)
|
||||
+typedef u32 fixed;
|
||||
+#define MAX_BURST_PENALTY ((u32)(40UL << 8) - 1)
|
||||
+
|
||||
+static void update_burst_score(struct sched_entity *se) {
|
||||
+ u64 burst_time = se->max_burst_time;
|
||||
+static inline u32 log2plus1_u64_u32f8(u64 v) {
|
||||
+ x32 result;
|
||||
+ int msb = fls64(v);
|
||||
+ result.u8[0] = v << (64 - msb) >> 55;
|
||||
+ result.u8[1] = msb;
|
||||
+ return result.u32;
|
||||
+}
|
||||
+
|
||||
+ int msb = fls64(burst_time);
|
||||
+ fixed integer_part = msb << FIXED_SHIFT;
|
||||
+ fixed fractional_part = burst_time << (64 - msb) << 1 >> (64 - FIXED_SHIFT);
|
||||
+ fixed greed = integer_part | fractional_part;
|
||||
+static inline u32 u8h_u32(u8 v) {
|
||||
+ x32 result;
|
||||
+ result.u8[1] = v;
|
||||
+ return result.u32;
|
||||
+}
|
||||
+
|
||||
+ fixed tolerance = sched_burst_penalty_offset << FIXED_SHIFT;
|
||||
+ fixed penalty = max(0, (s32)greed - (s32)tolerance);
|
||||
+ fixed scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
|
||||
+static inline u32 calc_burst_penalty(struct sched_entity *se) {
|
||||
+ u32 greed, tolerance, penalty, scaled_penalty;
|
||||
+
|
||||
+ u8 score = min(39U, scaled_penalty >> FIXED_SHIFT);
|
||||
+ se->penalty_score = score;
|
||||
+ greed = log2plus1_u64_u32f8(se->burst_time);
|
||||
+ tolerance = u8h_u32(sched_burst_penalty_offset);
|
||||
+ penalty = max(0, (s32)greed - (s32)tolerance);
|
||||
+ scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
|
||||
+
|
||||
+ return min(MAX_BURST_PENALTY, scaled_penalty);
|
||||
+}
|
||||
+
|
||||
+static void update_burst_penalty(struct sched_entity *se) {
|
||||
+ se->curr_burst_penalty = calc_burst_penalty(se);
|
||||
+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
|
||||
+}
|
||||
+
|
||||
+static inline u64 penalty_scale(u64 delta, struct sched_entity *se) {
|
||||
+ return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->penalty_score], 22);
|
||||
+ u8 score = ((x16*)&se->burst_penalty)->u8[1];
|
||||
+ return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);
|
||||
+}
|
||||
+
|
||||
+static inline u64 __binary_smooth(u64 new, u64 old, unsigned int smoothness) {
|
||||
+ return (new <= old)? new: (new + old * ((1 << smoothness) - 1)) >> smoothness;
|
||||
+static inline u32 binary_smooth(u32 new, u32 old) {
|
||||
+ return (new >= old)?
|
||||
+ old + ((new - old) >> sched_burst_smoothness_up):
|
||||
+ old - ((old - new) >> sched_burst_smoothness_down);
|
||||
+}
|
||||
+
|
||||
+void restart_burst(struct sched_entity *se) {
|
||||
+ se->max_burst_time = se->prev_burst_time = __binary_smooth(
|
||||
+ se->burst_time, se->prev_burst_time, sched_burst_smoothness);
|
||||
+static void restart_burst(struct sched_entity *se) {
|
||||
+ se->burst_penalty = se->prev_burst_penalty =
|
||||
+ binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
|
||||
+ se->curr_burst_penalty = 0;
|
||||
+ se->burst_time = 0;
|
||||
+}
|
||||
+
|
||||
@ -358,7 +403,7 @@ index d6042543c..e52c14232 100644
|
||||
int sched_thermal_decay_shift;
|
||||
static int __init setup_sched_thermal_decay_shift(char *str)
|
||||
{
|
||||
@@ -145,6 +216,60 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
|
||||
@@ -145,6 +233,69 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static struct ctl_table sched_fair_sysctls[] = {
|
||||
@ -407,8 +452,17 @@ index d6042543c..e52c14232 100644
|
||||
+ .extra2 = &maxval_12_bits,
|
||||
+ },
|
||||
+ {
|
||||
+ .procname = "sched_burst_smoothness",
|
||||
+ .data = &sched_burst_smoothness,
|
||||
+ .procname = "sched_burst_smoothness_down",
|
||||
+ .data = &sched_burst_smoothness_down,
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = &proc_dointvec_minmax,
|
||||
+ .extra1 = SYSCTL_ZERO,
|
||||
+ .extra2 = &three,
|
||||
+ },
|
||||
+ {
|
||||
+ .procname = "sched_burst_smoothness_up",
|
||||
+ .data = &sched_burst_smoothness_up,
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = &proc_dointvec_minmax,
|
||||
@ -419,7 +473,7 @@ index d6042543c..e52c14232 100644
|
||||
{
|
||||
.procname = "sched_child_runs_first",
|
||||
.data = &sysctl_sched_child_runs_first,
|
||||
@@ -238,6 +363,7 @@ static void update_sysctl(void)
|
||||
@@ -238,6 +389,7 @@ static void update_sysctl(void)
|
||||
#define SET_SYSCTL(name) \
|
||||
(sysctl_##name = (factor) * normalized_sysctl_##name)
|
||||
SET_SYSCTL(sched_base_slice);
|
||||
@ -427,7 +481,7 @@ index d6042543c..e52c14232 100644
|
||||
#undef SET_SYSCTL
|
||||
}
|
||||
|
||||
@@ -308,11 +434,19 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
|
||||
@@ -308,11 +460,19 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
|
||||
/*
|
||||
* delta /= w
|
||||
*/
|
||||
@ -447,7 +501,7 @@ index d6042543c..e52c14232 100644
|
||||
return delta;
|
||||
}
|
||||
|
||||
@@ -708,7 +842,11 @@ void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -706,7 +866,11 @@ void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
SCHED_WARN_ON(!se->on_rq);
|
||||
lag = avg_vruntime(cfs_rq) - se->vruntime;
|
||||
|
||||
@ -459,7 +513,7 @@ index d6042543c..e52c14232 100644
|
||||
se->vlag = clamp(lag, -limit, limit);
|
||||
}
|
||||
|
||||
@@ -946,6 +1084,7 @@ int sched_update_scaling(void)
|
||||
@@ -944,6 +1108,7 @@ int sched_update_scaling(void)
|
||||
#define WRT_SYSCTL(name) \
|
||||
(normalized_sysctl_##name = sysctl_##name / (factor))
|
||||
WRT_SYSCTL(sched_base_slice);
|
||||
@ -467,19 +521,18 @@ index d6042543c..e52c14232 100644
|
||||
#undef WRT_SYSCTL
|
||||
|
||||
return 0;
|
||||
@@ -1123,6 +1262,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||||
@@ -1121,6 +1286,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||||
curr->sum_exec_runtime += delta_exec;
|
||||
schedstat_add(cfs_rq->exec_clock, delta_exec);
|
||||
|
||||
+#ifdef CONFIG_SCHED_BORE
|
||||
+ curr->burst_time += delta_exec;
|
||||
+ curr->max_burst_time = max(curr->max_burst_time, curr->burst_time);
|
||||
+ update_burst_score(curr);
|
||||
+ update_burst_penalty(curr);
|
||||
+#endif // CONFIG_SCHED_BORE
|
||||
curr->vruntime += calc_delta_fair(delta_exec, curr);
|
||||
update_deadline(cfs_rq, curr);
|
||||
update_min_vruntime(cfs_rq);
|
||||
@@ -5237,6 +5381,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -5187,6 +5356,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
||||
}
|
||||
|
||||
@ -489,7 +542,7 @@ index d6042543c..e52c14232 100644
|
||||
/*
|
||||
* Pick the next process, keeping these things in mind, in this order:
|
||||
* 1) keep things fair between processes/task groups
|
||||
@@ -5247,14 +5394,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -5197,14 +5369,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
static struct sched_entity *
|
||||
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
{
|
||||
@ -508,7 +561,7 @@ index d6042543c..e52c14232 100644
|
||||
}
|
||||
|
||||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
|
||||
@@ -6522,6 +6671,38 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
@@ -6452,6 +6626,38 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
hrtick_update(rq);
|
||||
}
|
||||
|
||||
@ -547,7 +600,7 @@ index d6042543c..e52c14232 100644
|
||||
static void set_next_buddy(struct sched_entity *se);
|
||||
|
||||
/*
|
||||
@@ -6540,6 +6721,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
@@ -6470,6 +6676,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
util_est_dequeue(&rq->cfs, p);
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
@ -557,7 +610,7 @@ index d6042543c..e52c14232 100644
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
dequeue_entity(cfs_rq, se, flags);
|
||||
|
||||
@@ -8047,7 +8231,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
@@ -7980,7 +8189,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
/*
|
||||
* XXX pick_eevdf(cfs_rq) != se ?
|
||||
*/
|
||||
@ -566,7 +619,7 @@ index d6042543c..e52c14232 100644
|
||||
goto preempt;
|
||||
|
||||
return;
|
||||
@@ -8260,6 +8444,9 @@ static void yield_task_fair(struct rq *rq)
|
||||
@@ -8193,6 +8402,9 @@ static void yield_task_fair(struct rq *rq)
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
struct sched_entity *se = &curr->se;
|
||||
@ -577,10 +630,10 @@ index d6042543c..e52c14232 100644
|
||||
/*
|
||||
* Are we the only task in the tree?
|
||||
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
|
||||
index 7d65b4029..bd274f7c7 100644
|
||||
index 54334ca5c5c6..416ec4bcdb0f 100644
|
||||
--- a/kernel/sched/features.h
|
||||
+++ b/kernel/sched/features.h
|
||||
@@ -13,7 +13,11 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
|
||||
@@ -12,7 +12,11 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
|
||||
* wakeup-preemption), since its likely going to consume data we
|
||||
* touched, increases cache locality.
|
||||
*/
|
||||
@ -593,10 +646,10 @@ index 7d65b4029..bd274f7c7 100644
|
||||
/*
|
||||
* Consider buddies to be cache hot, decreases the likeliness of a
|
||||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
||||
index 4236c4c89..714cc6ad9 100644
|
||||
index 67cd7e1fd501..04d065015d6c 100644
|
||||
--- a/kernel/sched/sched.h
|
||||
+++ b/kernel/sched/sched.h
|
||||
@@ -2510,6 +2510,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate;
|
||||
@@ -2506,6 +2506,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate;
|
||||
extern const_debug unsigned int sysctl_sched_migration_cost;
|
||||
|
||||
extern unsigned int sysctl_sched_base_slice;
|
||||
@ -605,4 +658,4 @@ index 4236c4c89..714cc6ad9 100644
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
extern int sysctl_resched_latency_warn_ms;
|
||||
--
|
||||
2.41.0.159.g0bfa463d37
|
||||
2.41.0
|
||||
|
573
patches/0006-AMD-cppc.patch
Normal file
573
patches/0006-AMD-cppc.patch
Normal file
@ -0,0 +1,573 @@
|
||||
From ab6268d199fa749e274a48b00c443538ae492b16 Mon Sep 17 00:00:00 2001
|
||||
From: Piotr Gorski <lucjan.lucjanov@gmail.com>
|
||||
Date: Wed, 9 Aug 2023 14:07:31 +0200
|
||||
Subject: [PATCH] amd-6.5: merge changes from dev tree
|
||||
|
||||
Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
|
||||
---
|
||||
.../admin-guide/kernel-parameters.txt | 5 +
|
||||
Documentation/admin-guide/pm/amd-pstate.rst | 55 +++++
|
||||
drivers/acpi/cppc_acpi.c | 13 ++
|
||||
drivers/acpi/processor_driver.c | 6 +
|
||||
drivers/cpufreq/amd-pstate.c | 191 ++++++++++++++++--
|
||||
drivers/cpufreq/cpufreq.c | 13 ++
|
||||
include/acpi/cppc_acpi.h | 5 +
|
||||
include/linux/amd-pstate.h | 1 +
|
||||
include/linux/cpufreq.h | 4 +
|
||||
9 files changed, 272 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index a1457995f..1f53c395a 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -363,6 +363,11 @@
|
||||
selects a performance level in this range and appropriate
|
||||
to the current workload.
|
||||
|
||||
+ amd_prefcore=
|
||||
+ [X86]
|
||||
+ enable
|
||||
+ Enable AMD Pstate Preferred Core.
|
||||
+
|
||||
amijoy.map= [HW,JOY] Amiga joystick support
|
||||
Map of devices attached to JOY0DAT and JOY1DAT
|
||||
Format: <a>,<b>
|
||||
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
index 1cf40f692..4a30cf235 100644
|
||||
--- a/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
@@ -353,6 +353,49 @@ is activated. In this mode, driver requests minimum and maximum performance
|
||||
level and the platform autonomously selects a performance level in this range
|
||||
and appropriate to the current workload.
|
||||
|
||||
+AMD Pstate Preferred Core
|
||||
+=================================
|
||||
+
|
||||
+The core frequency is subjected to the process variation in semiconductors.
|
||||
+Not all cores are able to reach the maximum frequency respecting the
|
||||
+infrastructure limits. Consequently, AMD has redefined the concept of
|
||||
+maximum frequency of a part. This means that a fraction of cores can reach
|
||||
+maximum frequency. To find the best process scheduling policy for a given
|
||||
+scenario, OS needs to know the core ordering informed by the platform through
|
||||
+highest performance capability register of the CPPC interface.
|
||||
+
|
||||
+``AMD Pstate Preferred Core`` use ITMT arch provides functions and data structures
|
||||
+for enabling the scheduler to favor scheduling on cores can be get a higher frequency
|
||||
+with lower voltage under preferred core. And it has the ability to dynamically
|
||||
+change the preferred core based on the workload and platform conditions and
|
||||
+accounting for thermals and aging.
|
||||
+
|
||||
+The priority metric will be initialized by the AMD Pstate driver. The AMD Pstate
|
||||
+driver will also determine whether or not ``AMD Pstate Preferred Core`` is
|
||||
+supported by the platform.
|
||||
+
|
||||
+AMD Pstate driver will provide an initial core ordering when the system boots.
|
||||
+The platform uses the CPPC interfaces to communicate the core ranking to the
|
||||
+operating system and scheduler to make sure that OS is choosing the cores
|
||||
+with highest performance firstly for scheduling the process. When AMD Pstate
|
||||
+driver receives a message with the highest performance change, it will
|
||||
+update the core ranking and set the cpu's priority.
|
||||
+
|
||||
+AMD Preferred Core Switch
|
||||
+=================================
|
||||
+Kernel Parameters
|
||||
+-----------------
|
||||
+
|
||||
+``AMD Pstate Preferred Core`` has two states: enable and disable.
|
||||
+Enable/disable states can be chosen by different kernel parameters.
|
||||
+Default disable ``AMD Pstate Preferred Core``.
|
||||
+
|
||||
+``amd_prefcore=enable``
|
||||
+
|
||||
+If ``amd_prefcore=enable`` is passed to kernel command line option
|
||||
+then enable ``AMD Pstate Preferred Core`` if the processor and power
|
||||
+firmware can support preferred core feature.
|
||||
+
|
||||
User Space Interface in ``sysfs`` - General
|
||||
===========================================
|
||||
|
||||
@@ -385,6 +428,18 @@ control its functionality at the system level. They are located in the
|
||||
to the operation mode represented by that string - or to be
|
||||
unregistered in the "disable" case.
|
||||
|
||||
+``prefcore_state``
|
||||
+ Preferred Core state of the driver: "enabled" or "disabled".
|
||||
+
|
||||
+ "enabled"
|
||||
+ Enable the AMD Preferred Core.
|
||||
+
|
||||
+ "disabled"
|
||||
+ Disable the AMD Preferred Core
|
||||
+
|
||||
+
|
||||
+ This attribute is read-only to check the state of Preferred Core.
|
||||
+
|
||||
``cpupower`` tool support for ``amd-pstate``
|
||||
===============================================
|
||||
|
||||
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
|
||||
index 7ff269a78..ad388a0e8 100644
|
||||
--- a/drivers/acpi/cppc_acpi.c
|
||||
+++ b/drivers/acpi/cppc_acpi.c
|
||||
@@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
||||
return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * cppc_get_highest_perf - Get the highest performance register value.
|
||||
+ * @cpunum: CPU from which to get highest performance.
|
||||
+ * @highest_perf: Return address.
|
||||
+ *
|
||||
+ * Return: 0 for success, -EIO otherwise.
|
||||
+ */
|
||||
+int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
||||
+{
|
||||
+ return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
|
||||
+
|
||||
/**
|
||||
* cppc_get_epp_perf - Get the epp register value.
|
||||
* @cpunum: CPU from which to get epp preference value.
|
||||
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
|
||||
index 4bd16b3f0..29b2fb68a 100644
|
||||
--- a/drivers/acpi/processor_driver.c
|
||||
+++ b/drivers/acpi/processor_driver.c
|
||||
@@ -27,6 +27,7 @@
|
||||
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
|
||||
#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
|
||||
#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
|
||||
+#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85
|
||||
|
||||
MODULE_AUTHOR("Paul Diefenbaugh");
|
||||
MODULE_DESCRIPTION("ACPI Processor Driver");
|
||||
@@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
|
||||
acpi_bus_generate_netlink_event(device->pnp.device_class,
|
||||
dev_name(&device->dev), event, 0);
|
||||
break;
|
||||
+ case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
|
||||
+ cpufreq_update_highest_perf(pr->id);
|
||||
+ acpi_bus_generate_netlink_event(device->pnp.device_class,
|
||||
+ dev_name(&device->dev), event, 0);
|
||||
+ break;
|
||||
default:
|
||||
acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
|
||||
break;
|
||||
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
|
||||
index 81fba0dcb..ba10aa971 100644
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/static_call.h>
|
||||
#include <linux/amd-pstate.h>
|
||||
+#include <linux/topology.h>
|
||||
|
||||
#include <acpi/processor.h>
|
||||
#include <acpi/cppc_acpi.h>
|
||||
@@ -49,6 +50,8 @@
|
||||
|
||||
#define AMD_PSTATE_TRANSITION_LATENCY 20000
|
||||
#define AMD_PSTATE_TRANSITION_DELAY 1000
|
||||
+#define AMD_PSTATE_PREFCORE_THRESHOLD 166
|
||||
+#define AMD_PSTATE_MAX_CPPC_PERF 255
|
||||
|
||||
/*
|
||||
* TODO: We need more time to fine tune processors with shared memory solution
|
||||
@@ -65,6 +68,14 @@ static struct cpufreq_driver amd_pstate_epp_driver;
|
||||
static int cppc_state = AMD_PSTATE_UNDEFINED;
|
||||
static bool cppc_enabled;
|
||||
|
||||
+/*
|
||||
+ * CPPC Preferred Core feature is supported by power firmware
|
||||
+ */
|
||||
+static bool prefcore_enabled = false;
|
||||
+
|
||||
+/* Disable AMD Pstate Preferred Core loading */
|
||||
+static bool no_prefcore __read_mostly = true;
|
||||
+
|
||||
/*
|
||||
* AMD Energy Preference Performance (EPP)
|
||||
* The EPP is used in the CCLK DPM controller to drive
|
||||
@@ -290,27 +301,26 @@ static inline int amd_pstate_enable(bool enable)
|
||||
static int pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
u64 cap1;
|
||||
- u32 highest_perf;
|
||||
|
||||
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
||||
&cap1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- /*
|
||||
- * TODO: Introduce AMD specific power feature.
|
||||
- *
|
||||
- * CPPC entry doesn't indicate the highest performance in some ASICs.
|
||||
+ /* For platforms that do not support the preferred core feature, the
|
||||
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
|
||||
+ * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
|
||||
+ * the default max perf.
|
||||
*/
|
||||
- highest_perf = amd_get_highest_perf();
|
||||
- if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
|
||||
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
|
||||
-
|
||||
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
|
||||
+ if (!prefcore_enabled)
|
||||
+ WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
+ else
|
||||
+ WRITE_ONCE(cpudata->highest_perf, AMD_PSTATE_PREFCORE_THRESHOLD);
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
+ WRITE_ONCE(cpudata->prefcore_highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -318,22 +328,21 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
static int cppc_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
- u32 highest_perf;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- highest_perf = amd_get_highest_perf();
|
||||
- if (highest_perf > cppc_perf.highest_perf)
|
||||
- highest_perf = cppc_perf.highest_perf;
|
||||
-
|
||||
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
|
||||
+ if (!prefcore_enabled)
|
||||
+ WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf);
|
||||
+ else
|
||||
+ WRITE_ONCE(cpudata->highest_perf, AMD_PSTATE_PREFCORE_THRESHOLD);
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
||||
cppc_perf.lowest_nonlinear_perf);
|
||||
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
|
||||
+ WRITE_ONCE(cpudata->prefcore_highest_perf, cppc_perf.highest_perf);
|
||||
|
||||
if (cppc_state == AMD_PSTATE_ACTIVE)
|
||||
return 0;
|
||||
@@ -676,6 +685,118 @@ static void amd_perf_ctl_reset(unsigned int cpu)
|
||||
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Set AMD Pstate Preferred Core enable can't be done directly from cpufreq callbacks
|
||||
+ * due to locking, so queue the work for later.
|
||||
+ */
|
||||
+static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
|
||||
+{
|
||||
+ sched_set_itmt_support();
|
||||
+}
|
||||
+static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
|
||||
+
|
||||
+/**
|
||||
+ * Get the highest performance register value.
|
||||
+ * @cpu: CPU from which to get highest performance.
|
||||
+ * @highest_perf: Return address.
|
||||
+ *
|
||||
+ * Return: 0 for success, -EIO otherwise.
|
||||
+ */
|
||||
+static int amd_pstate_get_highest_perf(int cpu, u64 *highest_perf)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
||||
+ u64 cap1;
|
||||
+
|
||||
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
+ } else {
|
||||
+ ret = cppc_get_highest_perf(cpu, highest_perf);
|
||||
+ }
|
||||
+
|
||||
+ return (ret);
|
||||
+}
|
||||
+
|
||||
+static void amd_pstate_init_prefcore(void)
|
||||
+{
|
||||
+ int cpu, ret;
|
||||
+ u64 highest_perf;
|
||||
+
|
||||
+ if (no_prefcore)
|
||||
+ return;
|
||||
+
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
|
||||
+ if (ret)
|
||||
+ break;
|
||||
+
|
||||
+ sched_set_itmt_core_prio(highest_perf, cpu);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * This code can be run during CPU online under the
|
||||
+ * CPU hotplug locks, so sched_set_amd_prefcore_support()
|
||||
+ * cannot be called from here. Queue up a work item
|
||||
+ * to invoke it.
|
||||
+ */
|
||||
+ schedule_work(&sched_prefcore_work);
|
||||
+}
|
||||
+
|
||||
+static void amd_pstate_update_highest_perf(unsigned int cpu)
|
||||
+{
|
||||
+ struct cpufreq_policy *policy;
|
||||
+ struct amd_cpudata *cpudata;
|
||||
+ u32 prev_high = 0, cur_high = 0;
|
||||
+ u64 highest_perf;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (!prefcore_enabled)
|
||||
+ return;
|
||||
+
|
||||
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
|
||||
+ if (ret)
|
||||
+ return;
|
||||
+
|
||||
+ policy = cpufreq_cpu_get(cpu);
|
||||
+ cpudata = policy->driver_data;
|
||||
+ cur_high = highest_perf;
|
||||
+ prev_high = READ_ONCE(cpudata->prefcore_highest_perf);
|
||||
+
|
||||
+ if (prev_high != cur_high) {
|
||||
+ WRITE_ONCE(cpudata->prefcore_highest_perf, cur_high);
|
||||
+ sched_set_itmt_core_prio(cur_high, cpu);
|
||||
+ }
|
||||
+
|
||||
+ cpufreq_cpu_put(policy);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Check if AMD Pstate Preferred core feature is supported and enabled
|
||||
+ * 1) no_prefcore is used to enable or disable AMD Pstate Preferred Core
|
||||
+ * loading when user would like to enable or disable it. Without that,
|
||||
+ * AMD Pstate Preferred Core will be disabled by default if the processor
|
||||
+ * and power firmware can support preferred core feature.
|
||||
+ * 2) prefcore_enabled is used to indicate whether CPPC preferred core is enabled.
|
||||
+ */
|
||||
+static void check_prefcore_supported(int cpu)
|
||||
+{
|
||||
+ u64 highest_perf;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (no_prefcore)
|
||||
+ return;
|
||||
+
|
||||
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
|
||||
+ if (ret)
|
||||
+ return;
|
||||
+
|
||||
+ if(highest_perf < AMD_PSTATE_MAX_CPPC_PERF)
|
||||
+ prefcore_enabled = true;
|
||||
+}
|
||||
+
|
||||
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
|
||||
@@ -697,6 +818,9 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
||||
|
||||
cpudata->cpu = policy->cpu;
|
||||
|
||||
+ /* check if CPPC preferred core feature is enabled*/
|
||||
+ check_prefcore_supported(policy->cpu);
|
||||
+
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
@@ -1012,8 +1136,8 @@ static int amd_pstate_update_status(const char *buf, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static ssize_t show_status(struct kobject *kobj,
|
||||
- struct kobj_attribute *attr, char *buf)
|
||||
+static ssize_t status_show(struct device *dev,
|
||||
+ struct device_attribute *attr, char *buf)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
@@ -1024,7 +1148,7 @@ static ssize_t show_status(struct kobject *kobj,
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
|
||||
+static ssize_t status_store(struct device *a, struct device_attribute *b,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
char *p = memchr(buf, '\n', count);
|
||||
@@ -1037,13 +1161,20 @@ static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
|
||||
return ret < 0 ? ret : count;
|
||||
}
|
||||
|
||||
+static ssize_t prefcore_state_show(struct device *dev,
|
||||
+ struct device_attribute *attr, char *buf)
|
||||
+{
|
||||
+ return sysfs_emit(buf, "%s\n", prefcore_enabled ? "enabled" : "disabled");
|
||||
+}
|
||||
+
|
||||
cpufreq_freq_attr_ro(amd_pstate_max_freq);
|
||||
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
|
||||
|
||||
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
|
||||
cpufreq_freq_attr_rw(energy_performance_preference);
|
||||
cpufreq_freq_attr_ro(energy_performance_available_preferences);
|
||||
-define_one_global_rw(status);
|
||||
+static DEVICE_ATTR_RW(status);
|
||||
+static DEVICE_ATTR_RO(prefcore_state);
|
||||
|
||||
static struct freq_attr *amd_pstate_attr[] = {
|
||||
&amd_pstate_max_freq,
|
||||
@@ -1062,7 +1193,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
|
||||
};
|
||||
|
||||
static struct attribute *pstate_global_attributes[] = {
|
||||
- &status.attr,
|
||||
+ &dev_attr_status.attr,
|
||||
+ &dev_attr_prefcore_state.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -1114,6 +1246,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
||||
cpudata->cpu = policy->cpu;
|
||||
cpudata->epp_policy = 0;
|
||||
|
||||
+ /* check if CPPC preferred core feature is supported*/
|
||||
+ check_prefcore_supported(policy->cpu);
|
||||
+
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
@@ -1392,6 +1527,7 @@ static struct cpufreq_driver amd_pstate_driver = {
|
||||
.suspend = amd_pstate_cpu_suspend,
|
||||
.resume = amd_pstate_cpu_resume,
|
||||
.set_boost = amd_pstate_set_boost,
|
||||
+ .update_highest_perf = amd_pstate_update_highest_perf,
|
||||
.name = "amd-pstate",
|
||||
.attr = amd_pstate_attr,
|
||||
};
|
||||
@@ -1406,6 +1542,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
|
||||
.online = amd_pstate_epp_cpu_online,
|
||||
.suspend = amd_pstate_epp_suspend,
|
||||
.resume = amd_pstate_epp_resume,
|
||||
+ .update_highest_perf = amd_pstate_update_highest_perf,
|
||||
.name = "amd-pstate-epp",
|
||||
.attr = amd_pstate_epp_attr,
|
||||
};
|
||||
@@ -1506,6 +1643,8 @@ static int __init amd_pstate_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
+ amd_pstate_init_prefcore();
|
||||
+
|
||||
return ret;
|
||||
|
||||
global_attr_free:
|
||||
@@ -1527,7 +1666,17 @@ static int __init amd_pstate_param(char *str)
|
||||
|
||||
return amd_pstate_set_driver(mode_idx);
|
||||
}
|
||||
+
|
||||
+static int __init amd_prefcore_param(char *str)
|
||||
+{
|
||||
+ if (!strcmp(str, "enable"))
|
||||
+ no_prefcore = false;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
early_param("amd_pstate", amd_pstate_param);
|
||||
+early_param("amd_prefcore", amd_prefcore_param);
|
||||
|
||||
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
|
||||
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
|
||||
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
|
||||
index 50bbc969f..842357abf 100644
|
||||
--- a/drivers/cpufreq/cpufreq.c
|
||||
+++ b/drivers/cpufreq/cpufreq.c
|
||||
@@ -2675,6 +2675,19 @@ void cpufreq_update_limits(unsigned int cpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_update_limits);
|
||||
|
||||
+/**
|
||||
+ * cpufreq_update_highest_perf - Update highest performance for a given CPU.
|
||||
+ * @cpu: CPU to update the highest performance for.
|
||||
+ *
|
||||
+ * Invoke the driver's ->update_highest_perf callback if present
|
||||
+ */
|
||||
+void cpufreq_update_highest_perf(unsigned int cpu)
|
||||
+{
|
||||
+ if (cpufreq_driver->update_highest_perf)
|
||||
+ cpufreq_driver->update_highest_perf(cpu);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(cpufreq_update_highest_perf);
|
||||
+
|
||||
/*********************************************************************
|
||||
* BOOST *
|
||||
*********************************************************************/
|
||||
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
|
||||
index 6126c977e..c0b69ffe7 100644
|
||||
--- a/include/acpi/cppc_acpi.h
|
||||
+++ b/include/acpi/cppc_acpi.h
|
||||
@@ -139,6 +139,7 @@ struct cppc_cpudata {
|
||||
#ifdef CONFIG_ACPI_CPPC_LIB
|
||||
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
|
||||
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
|
||||
+extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
|
||||
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
|
||||
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
|
||||
extern int cppc_set_enable(int cpu, bool enable);
|
||||
@@ -165,6 +166,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
+static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
||||
+{
|
||||
+ return -ENOTSUPP;
|
||||
+}
|
||||
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
|
||||
index 446394f84..fa86bc953 100644
|
||||
--- a/include/linux/amd-pstate.h
|
||||
+++ b/include/linux/amd-pstate.h
|
||||
@@ -70,6 +70,7 @@ struct amd_cpudata {
|
||||
u32 nominal_perf;
|
||||
u32 lowest_nonlinear_perf;
|
||||
u32 lowest_perf;
|
||||
+ u32 prefcore_highest_perf;
|
||||
|
||||
u32 max_freq;
|
||||
u32 min_freq;
|
||||
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
|
||||
index 172ff51c1..766c83a4f 100644
|
||||
--- a/include/linux/cpufreq.h
|
||||
+++ b/include/linux/cpufreq.h
|
||||
@@ -231,6 +231,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
|
||||
void refresh_frequency_limits(struct cpufreq_policy *policy);
|
||||
void cpufreq_update_policy(unsigned int cpu);
|
||||
void cpufreq_update_limits(unsigned int cpu);
|
||||
+void cpufreq_update_highest_perf(unsigned int cpu);
|
||||
bool have_governor_per_policy(void);
|
||||
bool cpufreq_supports_freq_invariance(void);
|
||||
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
|
||||
@@ -376,6 +377,9 @@ struct cpufreq_driver {
|
||||
/* Called to update policy limits on firmware notifications. */
|
||||
void (*update_limits)(unsigned int cpu);
|
||||
|
||||
+ /* Called to update highest performance on firmware notifications. */
|
||||
+ void (*update_highest_perf)(unsigned int cpu);
|
||||
+
|
||||
/* optional */
|
||||
int (*bios_limit)(int cpu, unsigned int *limit);
|
||||
|
||||
--
|
||||
2.42.0.rc0.25.ga82fb66fed
|
@ -16,3 +16,5 @@ patch -Np1 < "../patches/0002-eevdfbore.patch"
|
||||
patch -Np1 < "../patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch"
|
||||
# Allow pre polaris cards to use the amdgpu kernel module
|
||||
patch -Np1 < "../patches/0005-amdgpu-si-cik-default.patch"
|
||||
# AMD Patch for CPPC
|
||||
patch -Np1 < "../patches/0006-AMD-cppc.patch"
|
@ -2,7 +2,7 @@
|
||||
|
||||
echo "Pika Kernel - Getting source"
|
||||
|
||||
wget -nv https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/snapshot/linux-6.5-rc3.tar.gz
|
||||
tar -xf ./linux-6.5-rc3.tar.gz
|
||||
wget -nv https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/snapshot/linux-6.5-rc5.tar.gz
|
||||
tar -xf ./linux-6.5-rc5.tar.gz
|
||||
|
||||
cd linux-6.5-rc3
|
||||
cd linux-6.5-rc5
|
||||
|
Loading…
Reference in New Issue
Block a user