diff --git a/patches/0002-bore-cachy.patch b/patches/0002-bore-cachy.patch index faa3a41..a50263f 100644 --- a/patches/0002-bore-cachy.patch +++ b/patches/0002-bore-cachy.patch @@ -1,30 +1,30 @@ -From 651d6a962b139ff8f2ae5362eafe97597e775c12 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Fri, 27 Dec 2024 18:37:16 +0100 +From 2485f3af3e13d470a6bf3b928725a50b54cb3f55 Mon Sep 17 00:00:00 2001 +From: Eric Naim +Date: Tue, 7 Jan 2025 17:26:29 +0700 Subject: [PATCH] bore-cachy -Signed-off-by: Peter Jung +Signed-off-by: Eric Naim --- - include/linux/sched.h | 17 ++ + include/linux/sched.h | 18 ++ include/linux/sched/bore.h | 40 ++++ init/Kconfig | 17 ++ kernel/Kconfig.hz | 17 ++ - kernel/fork.c | 5 + + kernel/fork.c | 6 + kernel/sched/Makefile | 1 + - kernel/sched/bore.c | 424 +++++++++++++++++++++++++++++++++++++ + kernel/sched/bore.c | 446 +++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 6 + - kernel/sched/debug.c | 61 +++++- + kernel/sched/debug.c | 61 ++++- kernel/sched/fair.c | 89 ++++++-- kernel/sched/sched.h | 9 + - 11 files changed, 668 insertions(+), 18 deletions(-) + 11 files changed, 692 insertions(+), 18 deletions(-) create mode 100644 include/linux/sched/bore.h create mode 100644 kernel/sched/bore.c diff --git a/include/linux/sched.h b/include/linux/sched.h -index c14446c6164d..83e35dfbbc50 100644 +index 02eaf84c8626..c76461bd57f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -538,6 +538,14 @@ struct sched_statistics { +@@ -538,6 +538,15 @@ struct sched_statistics { #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; @@ -33,13 +33,14 @@ index c14446c6164d..83e35dfbbc50 100644 + u8 score; + u32 count; + u64 timestamp; ++ spinlock_t lock; +}; +#endif // CONFIG_SCHED_BORE + struct sched_entity { /* For load-balancing: */ struct load_weight load; -@@ -557,6 +565,15 @@ struct sched_entity { +@@ -557,6 +566,15 @@ struct sched_entity { u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; @@ -57,7 +58,7 @@ index c14446c6164d..83e35dfbbc50 100644 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h new file mode 100644 -index 000000000000..653b918d36c0 +index 000000000000..a36947e12c2f --- /dev/null +++ b/include/linux/sched/bore.h @@ -0,0 +1,40 @@ @@ -67,7 +68,7 @@ index 000000000000..653b918d36c0 + +#ifndef _LINUX_SCHED_BORE_H +#define _LINUX_SCHED_BORE_H -+#define SCHED_BORE_VERSION "5.7.15" ++#define SCHED_BORE_VERSION "5.9.5" + +#ifdef CONFIG_SCHED_BORE +extern u8 __read_mostly sched_bore; @@ -92,9 +93,9 @@ index 000000000000..653b918d36c0 + void __user *buffer, size_t *lenp, loff_t *ppos); + +extern void sched_clone_bore( -+ struct task_struct *p, struct task_struct *parent, u64 clone_flags); ++ struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now); + -+extern void init_task_bore(struct task_struct *p); ++extern void reset_task_bore(struct task_struct *p); +extern void sched_bore_init(void); + +extern void reweight_entity( @@ -157,7 +158,7 @@ index 0f78364efd4f..83a6b919ab29 100644 config SCHED_HRTICK def_bool HIGH_RES_TIMERS diff --git a/kernel/fork.c b/kernel/fork.c -index e97e527cec69..d40105f4b177 100644 +index d27b8f5582df..86adb9321e2d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -117,6 +117,8 @@ @@ -169,16 +170,17 @@ index e97e527cec69..d40105f4b177 100644 #include #define CREATE_TRACE_POINTS -@@ -2372,6 +2374,9 @@ __latent_entropy struct task_struct *copy_process( - retval = sched_fork(clone_flags, p); - if (retval) - goto bad_fork_cleanup_policy; -+#ifdef CONFIG_SCHED_BORE -+ sched_clone_bore(p, current, clone_flags); -+#endif // CONFIG_SCHED_BORE +@@ -2522,6 +2524,10 @@ __latent_entropy struct task_struct *copy_process( + p->start_time = ktime_get_ns(); + p->start_boottime = ktime_get_boottime_ns(); - retval = perf_event_init_task(p, clone_flags); - if (retval) ++#ifdef CONFIG_SCHED_BORE ++ if (likely(p->pid)) ++ sched_clone_bore(p, current, clone_flags, p->start_time); ++#endif // CONFIG_SCHED_BORE + /* + * Make it visible to the rest of the system, but dont wake it up yet. + * Need tasklist lock for parent etc handling! diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 976092b7bd45..293aad675444 100644 --- a/kernel/sched/Makefile @@ -190,10 +192,10 @@ index 976092b7bd45..293aad675444 100644 +obj-y += bore.o diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c new file mode 100644 -index 000000000000..46d1e86f1e4e +index 000000000000..d55cd32b34ea --- /dev/null +++ b/kernel/sched/bore.c -@@ -0,0 +1,424 @@ +@@ -0,0 +1,446 @@ +/* + * Burst-Oriented Response Enhancer (BORE) CPU Scheduler + * Copyright (C) 2021-2024 Masahito Suzuki @@ -350,11 +352,14 @@ index 000000000000..46d1e86f1e4e +#define for_each_child(p, t) \ + list_for_each_entry(t, &(p)->children, sibling) + -+static u32 count_children_max2(struct task_struct *p) { -+ u32 cnt = 0; -+ struct task_struct *child; -+ for_each_child(p, child) {if (2 <= ++cnt) break;} -+ return cnt; ++static u32 count_entries_upto2(struct list_head *head) { ++ struct list_head *next = head->next; ++ return (next != head) + (next->next != head); ++} ++ ++static inline void init_task_burst_cache_lock(struct task_struct *p) { ++ spin_lock_init(&p->se.child_burst.lock); ++ spin_lock_init(&p->se.group_burst.lock); +} + +static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now) @@ -384,24 +389,29 @@ index 000000000000..46d1e86f1e4e +static inline u8 inherit_burst_direct( + struct task_struct *p, u64 now, u64 clone_flags) { + struct task_struct *parent = p; ++ struct sched_burst_cache *bc; + + if (clone_flags & CLONE_PARENT) + parent = parent->real_parent; + -+ if (burst_cache_expired(&parent->se.child_burst, now)) ++ bc = &parent->se.child_burst; ++ spin_lock(&bc->lock); ++ if (burst_cache_expired(bc, now)) + update_child_burst_direct(parent, now); ++ spin_unlock(&bc->lock); + -+ return parent->se.child_burst.score; ++ return bc->score; +} + +static void update_child_burst_topological( + struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) { + u32 cnt = 0, dcnt = 0, sum = 0; + struct task_struct *child, *dec; ++ struct sched_burst_cache *bc __maybe_unused; + + for_each_child(p, child) { + dec = child; -+ while ((dcnt = count_children_max2(dec)) == 1) ++ while ((dcnt = count_entries_upto2(&dec->children)) == 1) + dec = list_first_entry(&dec->children, struct task_struct, sibling); + + if (!dcnt || !depth) { @@ -410,13 +420,20 @@ index 000000000000..46d1e86f1e4e + sum += dec->se.burst_penalty; + continue; + } -+ if (!burst_cache_expired(&dec->se.child_burst, now)) { -+ cnt += dec->se.child_burst.count; -+ sum += (u32)dec->se.child_burst.score * dec->se.child_burst.count; -+ if (sched_burst_cache_stop_count <= cnt) break; ++ bc = &dec->se.child_burst; ++ spin_lock(&bc->lock); ++ if (!burst_cache_expired(bc, now)) { ++ cnt += bc->count; ++ sum += (u32)bc->score * bc->count; ++ if (sched_burst_cache_stop_count <= cnt) { ++ spin_unlock(&bc->lock); ++ break; ++ } ++ spin_unlock(&bc->lock); + continue; + } + update_child_burst_topological(dec, now, depth - 1, &cnt, &sum); ++ spin_unlock(&bc->lock); + } + + update_burst_cache(&p->se.child_burst, p, cnt, sum, now); @@ -427,6 +444,7 @@ index 000000000000..46d1e86f1e4e +static inline u8 inherit_burst_topological( + struct task_struct *p, u64 now, u64 clone_flags) { + struct task_struct *anc = p; ++ struct sched_burst_cache *bc; + u32 cnt = 0, sum = 0; + u32 base_child_cnt = 0; + @@ -437,16 +455,19 @@ index 000000000000..46d1e86f1e4e + + for (struct task_struct *next; + anc != (next = anc->real_parent) && -+ count_children_max2(anc) <= base_child_cnt;) { ++ count_entries_upto2(&anc->children) <= base_child_cnt;) { + anc = next; + base_child_cnt = 1; + } + -+ if (burst_cache_expired(&anc->se.child_burst, now)) ++ bc = &anc->se.child_burst; ++ spin_lock(&bc->lock); ++ if (burst_cache_expired(bc, now)) + update_child_burst_topological( + anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); ++ spin_unlock(&bc->lock); + -+ return anc->se.child_burst.score; ++ return bc->score; +} + +static inline void update_tg_burst(struct task_struct *p, u64 now) { @@ -464,28 +485,30 @@ index 000000000000..46d1e86f1e4e + +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) { + struct task_struct *parent = rcu_dereference(p->group_leader); -+ if (burst_cache_expired(&parent->se.group_burst, now)) ++ struct sched_burst_cache *bc = &parent->se.group_burst; ++ spin_lock(&bc->lock); ++ if (burst_cache_expired(bc, now)) + update_tg_burst(parent, now); ++ spin_unlock(&bc->lock); + -+ return parent->se.group_burst.score; ++ return bc->score; +} + -+void sched_clone_bore( -+ struct task_struct *p, struct task_struct *parent, u64 clone_flags) { ++void sched_clone_bore(struct task_struct *p, ++ struct task_struct *parent, u64 clone_flags, u64 now) { + struct sched_entity *se = &p->se; -+ u64 now; + u8 penalty; + ++ init_task_burst_cache_lock(p); ++ + if (!task_is_bore_eligible(p)) return; + + if (clone_flags & CLONE_THREAD) { + rcu_read_lock(); -+ now = jiffies_to_nsecs(jiffies); + penalty = inherit_burst_tg(parent, now); + rcu_read_unlock(); + } else { + read_lock(&tasklist_lock); -+ now = jiffies_to_nsecs(jiffies); + penalty = likely(sched_burst_fork_atavistic) ? + inherit_burst_topological(parent, now, clone_flags): + inherit_burst_direct(parent, now, clone_flags); @@ -499,7 +522,7 @@ index 000000000000..46d1e86f1e4e + se->group_burst.timestamp = 0; +} + -+void init_task_bore(struct task_struct *p) { ++void reset_task_bore(struct task_struct *p) { + p->se.burst_time = 0; + p->se.prev_burst_penalty = 0; + p->se.curr_burst_penalty = 0; @@ -511,7 +534,8 @@ index 000000000000..46d1e86f1e4e + +void __init sched_bore_init(void) { + printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION); -+ init_task_bore(&init_task); ++ reset_task_bore(&init_task); ++ init_task_burst_cache_lock(&init_task); +} + +#ifdef CONFIG_SYSCTL @@ -692,9 +716,9 @@ index 82b165bf48c4..d2d48cb6a668 100644 + .llseek = seq_lseek, \ + .release = single_release, \ +}; -+ -+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice) ++DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice) ++ +#undef DEFINE_SYSCTL_SCHED_FUNC +#else // !CONFIG_SCHED_BORE static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, @@ -751,7 +775,7 @@ index 82b165bf48c4..d2d48cb6a668 100644 P(se.avg.runnable_sum); P(se.avg.util_sum); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index d06d306b7fba..da27682ab602 100644 +index d06d306b7fba..2edb57febcc5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -55,6 +55,8 @@ @@ -980,7 +1004,7 @@ index d06d306b7fba..da27682ab602 100644 { + p->se.rel_deadline = 0; +#ifdef CONFIG_SCHED_BORE -+ init_task_bore(p); ++ reset_task_bore(p); +#endif // CONFIG_SCHED_BORE detach_task_cfs_rq(p); } @@ -1016,3 +1040,4 @@ index d6e2ca8c8cd2..f9677c5c4831 100644 extern int sysctl_resched_latency_warn_ms; -- 2.47.1 +