diff --git a/patches/0005-le9uo.patch b/patches/0005-le9uo.patch deleted file mode 100644 index 080dd73..0000000 --- a/patches/0005-le9uo.patch +++ /dev/null @@ -1,482 +0,0 @@ -From 95eac316f9bb32c4e33e64616876c2ab1dfaf3f4 Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Fri, 15 Nov 2024 15:22:08 +0800 -Subject: [PATCH] le9uo - -Signed-off-by: Eric Naim ---- - Documentation/admin-guide/sysctl/vm.rst | 72 ++++++++++++ - include/linux/mm.h | 8 ++ - kernel/sysctl.c | 34 ++++++ - mm/Kconfig | 63 +++++++++++ - mm/mm_init.c | 1 + - mm/vmscan.c | 143 +++++++++++++++++++++++- - 6 files changed, 317 insertions(+), 4 deletions(-) - -diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst -index f48eaa98d22d..fc777c14cff6 100644 ---- a/Documentation/admin-guide/sysctl/vm.rst -+++ b/Documentation/admin-guide/sysctl/vm.rst -@@ -25,6 +25,9 @@ files can be found in mm/swap.c. - Currently, these files are in /proc/sys/vm: - - - admin_reserve_kbytes -+- anon_min_ratio -+- clean_low_ratio -+- clean_min_ratio - - compact_memory - - compaction_proactiveness - - compact_unevictable_allowed -@@ -108,6 +111,67 @@ On x86_64 this is about 128MB. - Changing this takes effect whenever an application requests memory. - - -+anon_min_ratio -+============== -+ -+This knob provides *hard* protection of anonymous pages. The anonymous pages -+on the current node won't be reclaimed under any conditions when their amount -+is below vm.anon_min_ratio. -+ -+This knob may be used to prevent excessive swap thrashing when anonymous -+memory is low (for example, when memory is going to be overfilled by -+compressed data of zram module). -+ -+Setting this value too high (close to 100) can result in inability to -+swap and can lead to early OOM under memory pressure. -+ -+The unit of measurement is the percentage of the total memory of the node. -+ -+The default value is 15. -+ -+ -+clean_low_ratio -+================ -+ -+This knob provides *best-effort* protection of clean file pages. The file pages -+on the current node won't be reclaimed under memory pressure when the amount of -+clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. -+ -+Protection of clean file pages using this knob may be used when swapping is -+still possible to -+ - prevent disk I/O thrashing under memory pressure; -+ - improve performance in disk cache-bound tasks under memory pressure. -+ -+Setting it to a high value may result in a early eviction of anonymous pages -+into the swap space by attempting to hold the protected amount of clean file -+pages in memory. -+ -+The unit of measurement is the percentage of the total memory of the node. -+ -+The default value is 0. -+ -+ -+clean_min_ratio -+================ -+ -+This knob provides *hard* protection of clean file pages. The file pages on the -+current node won't be reclaimed under memory pressure when the amount of clean -+file pages is below vm.clean_min_ratio. -+ -+Hard protection of clean file pages using this knob may be used to -+ - prevent disk I/O thrashing under memory pressure even with no free swap space; -+ - improve performance in disk cache-bound tasks under memory pressure; -+ - avoid high latency and prevent livelock in near-OOM conditions. -+ -+Setting it to a high value may result in a early out-of-memory condition due to -+the inability to reclaim the protected amount of clean file pages when other -+types of pages cannot be reclaimed. -+ -+The unit of measurement is the percentage of the total memory of the node. -+ -+The default value is 15. -+ -+ - compact_memory - ============== - -@@ -964,6 +1028,14 @@ be 133 (x + 2x = 200, 2x = 133.33). - At 0, the kernel will not initiate swap until the amount of free and - file-backed pages is less than the high watermark in a zone. - -+This knob has no effect if the amount of clean file pages on the current -+node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, -+only anonymous pages can be reclaimed. -+ -+If the number of anonymous pages on the current node is below -+vm.anon_min_ratio, then only file pages can be reclaimed with -+any vm.swappiness value. -+ - - unprivileged_userfaultfd - ======================== -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 61fff5d34ed5..18dc2544700b 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -205,6 +205,14 @@ static inline void __mm_zero_struct_page(struct page *page) - - extern int sysctl_max_map_count; - -+extern bool sysctl_workingset_protection; -+extern u8 sysctl_anon_min_ratio; -+extern u8 sysctl_clean_low_ratio; -+extern u8 sysctl_clean_min_ratio; -+int vm_workingset_protection_update_handler( -+ const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos); -+ - extern unsigned long sysctl_user_reserve_kbytes; - extern unsigned long sysctl_admin_reserve_kbytes; - -diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index 676e89dc38c3..4a62ae02a2c0 100644 ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -2210,6 +2210,40 @@ static struct ctl_table vm_table[] = { - .extra1 = SYSCTL_ZERO, - }, - #endif -+ { -+ .procname = "workingset_protection", -+ .data = &sysctl_workingset_protection, -+ .maxlen = sizeof(bool), -+ .mode = 0644, -+ .proc_handler = &proc_dobool, -+ }, -+ { -+ .procname = "anon_min_ratio", -+ .data = &sysctl_anon_min_ratio, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = &vm_workingset_protection_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE_HUNDRED, -+ }, -+ { -+ .procname = "clean_low_ratio", -+ .data = &sysctl_clean_low_ratio, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = &vm_workingset_protection_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE_HUNDRED, -+ }, -+ { -+ .procname = "clean_min_ratio", -+ .data = &sysctl_clean_min_ratio, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = &vm_workingset_protection_update_handler, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE_HUNDRED, -+ }, - { - .procname = "user_reserve_kbytes", - .data = &sysctl_user_reserve_kbytes, -diff --git a/mm/Kconfig b/mm/Kconfig -index 6bfea371341e..87cb63f7ca57 100644 ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -499,6 +499,69 @@ config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP - config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP - bool - -+config ANON_MIN_RATIO -+ int "Default value for vm.anon_min_ratio" -+ depends on SYSCTL -+ range 0 100 -+ default 15 -+ help -+ This option sets the default value for vm.anon_min_ratio sysctl knob. -+ -+ The vm.anon_min_ratio sysctl knob provides *hard* protection of -+ anonymous pages. The anonymous pages on the current node won't be -+ reclaimed under any conditions when their amount is below -+ vm.anon_min_ratio. This knob may be used to prevent excessive swap -+ thrashing when anonymous memory is low (for example, when memory is -+ going to be overfilled by compressed data of zram module). -+ -+ Setting this value too high (close to MemTotal) can result in -+ inability to swap and can lead to early OOM under memory pressure. -+ -+config CLEAN_LOW_RATIO -+ int "Default value for vm.clean_low_ratio" -+ depends on SYSCTL -+ range 0 100 -+ default 0 -+ help -+ This option sets the default value for vm.clean_low_ratio sysctl knob. -+ -+ The vm.clean_low_ratio sysctl knob provides *best-effort* -+ protection of clean file pages. The file pages on the current node -+ won't be reclaimed under memory pressure when the amount of clean file -+ pages is below vm.clean_low_ratio *unless* we threaten to OOM. -+ Protection of clean file pages using this knob may be used when -+ swapping is still possible to -+ - prevent disk I/O thrashing under memory pressure; -+ - improve performance in disk cache-bound tasks under memory -+ pressure. -+ -+ Setting it to a high value may result in a early eviction of anonymous -+ pages into the swap space by attempting to hold the protected amount -+ of clean file pages in memory. -+ -+config CLEAN_MIN_RATIO -+ int "Default value for vm.clean_min_ratio" -+ depends on SYSCTL -+ range 0 100 -+ default 15 -+ help -+ This option sets the default value for vm.clean_min_ratio sysctl knob. -+ -+ The vm.clean_min_ratio sysctl knob provides *hard* protection of -+ clean file pages. The file pages on the current node won't be -+ reclaimed under memory pressure when the amount of clean file pages is -+ below vm.clean_min_ratio. Hard protection of clean file pages using -+ this knob may be used to -+ - prevent disk I/O thrashing under memory pressure even with no free -+ swap space; -+ - improve performance in disk cache-bound tasks under memory -+ pressure; -+ - avoid high latency and prevent livelock in near-OOM conditions. -+ -+ Setting it to a high value may result in a early out-of-memory condition -+ due to the inability to reclaim the protected amount of clean file pages -+ when other types of pages cannot be reclaimed. -+ - config HAVE_MEMBLOCK_PHYS_MAP - bool - -diff --git a/mm/mm_init.c b/mm/mm_init.c -index 4ba5607aaf19..0a83f8973ddf 100644 ---- a/mm/mm_init.c -+++ b/mm/mm_init.c -@@ -2628,6 +2628,7 @@ static void __init mem_init_print_info(void) - , K(totalhigh_pages()) - #endif - ); -+ printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.7 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); - } - - /* -diff --git a/mm/vmscan.c b/mm/vmscan.c -index 99568ccfb0fd..af10b7d3407a 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -148,6 +148,15 @@ struct scan_control { - /* The file folios on the current node are dangerously low */ - unsigned int file_is_tiny:1; - -+ /* The anonymous pages on the current node are below vm.anon_min_ratio */ -+ unsigned int anon_below_min:1; -+ -+ /* The clean file pages on the current node are below vm.clean_low_ratio */ -+ unsigned int clean_below_low:1; -+ -+ /* The clean file pages on the current node are below vm.clean_min_ratio */ -+ unsigned int clean_below_min:1; -+ - /* Always discard instead of demoting to lower tier memory */ - unsigned int no_demotion:1; - -@@ -197,6 +206,15 @@ struct scan_control { - #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) - #endif - -+bool sysctl_workingset_protection __read_mostly = true; -+u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; -+u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; -+u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; -+static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; -+static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; -+static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; -+static u64 workingset_protection_prev_totalram __read_mostly = 0; -+ - /* - * From 0 .. MAX_SWAPPINESS. Higher means more swappy. - */ -@@ -1094,6 +1112,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, - folio_mapped(folio) && folio_test_referenced(folio)) - goto keep_locked; - -+ if (folio_is_file_lru(folio) ? sc->clean_below_min : -+ (sc->anon_below_min && !sc->clean_below_min)) -+ goto keep_locked; -+ - /* - * The number of dirty pages determines if a node is marked - * reclaim_congested. kswapd will stall and start writing -@@ -2419,6 +2441,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, - goto out; - } - -+ /* -+ * Force-scan anon if clean file pages is under vm.clean_low_ratio -+ * or vm.clean_min_ratio. -+ */ -+ if (sc->clean_below_low || sc->clean_below_min) { -+ scan_balance = SCAN_ANON; -+ goto out; -+ } -+ - /* - * If there is enough inactive page cache, we do not reclaim - * anything from the anonymous working right now. -@@ -2563,6 +2594,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, - BUG(); - } - -+ /* -+ * Hard protection of the working set. -+ * Don't reclaim anon/file pages when the amount is -+ * below the watermark of the same type. -+ */ -+ if (file ? sc->clean_below_min : sc->anon_below_min) -+ scan = 0; -+ - nr[lru] = scan; - } - } -@@ -3996,11 +4035,7 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc - } - - /* to protect the working set of the last N jiffies */ --#ifdef CONFIG_CACHY --static unsigned long lru_gen_min_ttl __read_mostly = 1000; --#else - static unsigned long lru_gen_min_ttl __read_mostly; --#endif - - static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) - { -@@ -4038,6 +4073,96 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) - } - } - -+int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos) -+{ -+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); -+ if (ret || !write) -+ return ret; -+ -+ workingset_protection_prev_totalram = 0; -+ -+ return 0; -+} -+ -+static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) -+{ -+ unsigned long node_mem_total; -+ struct sysinfo i; -+ -+ if (!(sysctl_workingset_protection)) { -+ sc->anon_below_min = 0; -+ sc->clean_below_low = 0; -+ sc->clean_below_min = 0; -+ return; -+ } -+ -+ if (likely(sysctl_anon_min_ratio || -+ sysctl_clean_low_ratio || -+ sysctl_clean_min_ratio)) { -+#ifdef CONFIG_NUMA -+ si_meminfo_node(&i, pgdat->node_id); -+#else //CONFIG_NUMA -+ si_meminfo(&i); -+#endif //CONFIG_NUMA -+ node_mem_total = i.totalram; -+ -+ if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { -+ sysctl_anon_min_ratio_kb = -+ node_mem_total * sysctl_anon_min_ratio / 100; -+ sysctl_clean_low_ratio_kb = -+ node_mem_total * sysctl_clean_low_ratio / 100; -+ sysctl_clean_min_ratio_kb = -+ node_mem_total * sysctl_clean_min_ratio / 100; -+ workingset_protection_prev_totalram = node_mem_total; -+ } -+ } -+ -+ /* -+ * Check the number of anonymous pages to protect them from -+ * reclaiming if their amount is below the specified. -+ */ -+ if (sysctl_anon_min_ratio) { -+ unsigned long reclaimable_anon; -+ -+ reclaimable_anon = -+ node_page_state(pgdat, NR_ACTIVE_ANON) + -+ node_page_state(pgdat, NR_INACTIVE_ANON) + -+ node_page_state(pgdat, NR_ISOLATED_ANON); -+ -+ sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; -+ } else -+ sc->anon_below_min = 0; -+ -+ /* -+ * Check the number of clean file pages to protect them from -+ * reclaiming if their amount is below the specified. -+ */ -+ if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { -+ unsigned long reclaimable_file, dirty, clean; -+ -+ reclaimable_file = -+ node_page_state(pgdat, NR_ACTIVE_FILE) + -+ node_page_state(pgdat, NR_INACTIVE_FILE) + -+ node_page_state(pgdat, NR_ISOLATED_FILE); -+ dirty = node_page_state(pgdat, NR_FILE_DIRTY); -+ /* -+ * node_page_state() sum can go out of sync since -+ * all the values are not read at once. -+ */ -+ if (likely(reclaimable_file > dirty)) -+ clean = reclaimable_file - dirty; -+ else -+ clean = 0; -+ -+ sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; -+ sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; -+ } else { -+ sc->clean_below_low = 0; -+ sc->clean_below_min = 0; -+ } -+} -+ - /****************************************************************************** - * rmap/PT walk feedback - ******************************************************************************/ -@@ -4536,6 +4661,12 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw - */ - if (!swappiness) - type = LRU_GEN_FILE; -+ else if (sc->clean_below_min) -+ type = LRU_GEN_ANON; -+ else if (sc->anon_below_min) -+ type = LRU_GEN_FILE; -+ else if (sc->clean_below_low) -+ type = LRU_GEN_ANON; - else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) - type = LRU_GEN_ANON; - else if (swappiness == 1) -@@ -4815,6 +4946,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - -+ prepare_workingset_protection(pgdat, sc); -+ - /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ - if (mem_cgroup_below_min(NULL, memcg)) - return MEMCG_LRU_YOUNG; -@@ -5962,6 +6095,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) - - prepare_scan_control(pgdat, sc); - -+ prepare_workingset_protection(pgdat, sc); -+ - shrink_node_memcgs(pgdat, sc); - - flush_reclaim_state(sc); --- -2.47.0