diff --git a/patches/0001-cachyos-base-all.patch b/patches/0001-cachyos-base-all.patch index 7056ef2..f2dbe5c 100644 --- a/patches/0001-cachyos-base-all.patch +++ b/patches/0001-cachyos-base-all.patch @@ -1,7 +1,7 @@ -From 7c24828fcb4f30bdb06c9def0fdc602cb5836572 Mon Sep 17 00:00:00 2001 +From 1dba870e7ad0f4c52b9ffcd2aa68dd731a8f0761 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:06:31 +0200 -Subject: [PATCH 01/10] address-masking +Date: Thu, 10 Oct 2024 12:36:17 +0200 +Subject: [PATCH 01/11] address-masking Signed-off-by: Peter Jung --- @@ -110,10 +110,10 @@ index feeb935a2299..6e489f9e90f1 100644 -- 2.47.0.rc0 -From 694e2eec893e51c71b3faa821f561b8c387b3bb7 Mon Sep 17 00:00:00 2001 +From 158906b885fd180a24f22a91b7ce6bebccd4237e Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:06:44 +0200 -Subject: [PATCH 02/10] bbr3 +Date: Thu, 10 Oct 2024 12:36:51 +0200 +Subject: [PATCH 02/11] bbr3 Signed-off-by: Peter Jung --- @@ -3496,10 +3496,10 @@ index 4d40615dc8fc..f27941201ef2 100644 -- 2.47.0.rc0 -From 09659c4444b30d6a326d03b6fee478a2b76a8f28 Mon Sep 17 00:00:00 2001 +From 4b6c037745c54b45bdea3c80010809161d95484f Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:06:55 +0200 -Subject: [PATCH 03/10] cachy +Date: Thu, 10 Oct 2024 12:37:04 +0200 +Subject: [PATCH 03/11] cachy Signed-off-by: Peter Jung --- @@ -3572,7 +3572,7 @@ Signed-off-by: Peter Jung create mode 100644 drivers/pci/controller/intel-nvme-remap.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 09126bb8cc9f..43305af6b364 100644 +index be010fec7654..900113802ffc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2231,6 +2231,9 @@ @@ -3602,7 +3602,7 @@ index 09126bb8cc9f..43305af6b364 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/Makefile b/Makefile -index 7bcf0c32ea5e..12f0adb96c93 100644 +index 108d314ea95b..6d817eab1d73 100644 --- a/Makefile +++ b/Makefile @@ -803,11 +803,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks @@ -4510,7 +4510,7 @@ index 04fc786dd2c0..f98c9438760c 100644 EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index c0278d023cfc..7d92f16a430a 100644 +index 949ead440da9..348a330678bd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3568,6 +3568,8 @@ static int __init intel_pstate_setup(char *str) @@ -4578,10 +4578,10 @@ index df17e79c45c7..e454488c1a31 100644 + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index 74bb1e0e9134..1f7f4670ef55 100644 +index 1ab7cd8a6b6a..1d619a4bf1d6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4437,7 +4437,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) +@@ -4444,7 +4444,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } @@ -4635,10 +4635,10 @@ index 99014339aaa3..222f72b4c44f 100644 #endif return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -index 5cb11cc2d063..7e2020b0c0e8 100644 +index a573a6639898..52e0e42e26a5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -@@ -1568,7 +1568,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, +@@ -1569,7 +1569,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, drm_atomic_helper_plane_destroy_state(plane, state); } @@ -4647,7 +4647,7 @@ index 5cb11cc2d063..7e2020b0c0e8 100644 static void dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) -@@ -1759,7 +1759,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { +@@ -1760,7 +1760,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, @@ -4656,7 +4656,7 @@ index 5cb11cc2d063..7e2020b0c0e8 100644 .atomic_set_property = dm_atomic_plane_set_property, .atomic_get_property = dm_atomic_plane_get_property, #endif -@@ -1852,7 +1852,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, +@@ -1853,7 +1853,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_helper_add(plane, &dm_plane_helper_funcs); @@ -9871,7 +9871,7 @@ index 5d57ea27dbc4..0f7f8c737ae3 100644 }; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h -index d4d2f4d1d7cb..e0e19d9c1323 100644 +index aabec598f79a..7fe0981a7e46 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, @@ -10036,7 +10036,7 @@ index 38ef6d06888e..0f78364efd4f 100644 config SCHED_HRTICK diff --git a/kernel/fork.c b/kernel/fork.c -index cc760491f201..238695afc630 100644 +index 6b97fb2ac4af..003de4829c15 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -105,6 +105,10 @@ @@ -10050,7 +10050,7 @@ index cc760491f201..238695afc630 100644 #include #include #include -@@ -2138,6 +2142,10 @@ __latent_entropy struct task_struct *copy_process( +@@ -2135,6 +2139,10 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -10061,7 +10061,7 @@ index cc760491f201..238695afc630 100644 /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. -@@ -3287,6 +3295,12 @@ int ksys_unshare(unsigned long unshare_flags) +@@ -3283,6 +3291,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; @@ -10237,10 +10237,10 @@ index 0b0b95418b16..c4b835b91fc0 100644 static DEFINE_MUTEX(userns_state_mutex); diff --git a/mm/Kconfig b/mm/Kconfig -index b72e7d040f78..8feaa692ec87 100644 +index 03395624bc70..676ff8d1266b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -636,7 +636,7 @@ config COMPACTION +@@ -649,7 +649,7 @@ config COMPACTION config COMPACT_UNEVICTABLE_DEFAULT int depends on COMPACTION @@ -10524,46 +10524,42 @@ index 663ce300dd06..f83493838cf9 100644 -- 2.47.0.rc0 -From b9412503b89fd2523bba89b4cd9a67398883dc30 Mon Sep 17 00:00:00 2001 +From e033c3648c593fe0bd0d0bc911cccefdb4ec3e4a Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 22:43:33 +0200 -Subject: [PATCH 04/10] fixes +Date: Thu, 10 Oct 2024 12:37:16 +0200 +Subject: [PATCH 04/11] fixes Signed-off-by: Peter Jung --- - arch/Kconfig | 4 +- - arch/x86/include/asm/apic.h | 8 -- - arch/x86/kernel/amd_nb.c | 4 + - arch/x86/kernel/apic/apic_flat_64.c | 119 ++---------------- - arch/x86/kernel/cpu/amd.c | 3 +- - drivers/bluetooth/btusb.c | 4 + - drivers/firmware/sysfb.c | 4 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 + - drivers/gpu/drm/amd/display/dc/core/dc.c | 45 +++++-- - drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 9 +- - drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 3 +- - drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h | 2 +- - drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h | 2 +- - drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- - drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 2 +- - .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 2 +- - .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- - .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 25 +++- - .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 19 +-- - .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 14 +-- - .../gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 2 +- - .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 15 +-- - .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 34 +++-- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 25 ++-- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 22 ++-- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 15 +-- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 +- - .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 36 +++--- - .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 33 +++-- - .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 3 +- - drivers/net/wireless/realtek/rtw89/pci.c | 48 +++++-- - fs/btrfs/send.c | 8 +- - 32 files changed, 274 insertions(+), 248 deletions(-) + arch/Kconfig | 4 +- + arch/x86/kernel/amd_nb.c | 4 ++ + arch/x86/kernel/cpu/amd.c | 3 +- + drivers/bluetooth/btusb.c | 4 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++ + drivers/gpu/drm/amd/display/dc/core/dc.c | 45 ++++++++++++----- + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 9 +++- + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 3 +- + drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h | 2 +- + drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h | 2 +- + drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- + drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 2 +- + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 2 +- + .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- + .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 25 +++++++--- + .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 19 ++++---- + .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 14 +++--- + .../gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 2 +- + .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 15 +++--- + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 34 +++++++++---- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 25 +++++----- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 22 +++++---- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 15 +++--- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 +- + .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 36 ++++++++------ + .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 33 +++++++++---- + .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 3 +- + drivers/net/wireless/realtek/rtw89/pci.c | 48 ++++++++++++++++--- + 28 files changed, 259 insertions(+), 124 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 975dd22a2dbd..de69b8f5b5be 100644 @@ -10587,31 +10583,6 @@ index 975dd22a2dbd..de69b8f5b5be 100644 depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to -diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h -index 9327eb00e96d..be2045a18e69 100644 ---- a/arch/x86/include/asm/apic.h -+++ b/arch/x86/include/asm/apic.h -@@ -345,20 +345,12 @@ extern struct apic *apic; - * APIC drivers are probed based on how they are listed in the .apicdrivers - * section. So the order is important and enforced by the ordering - * of different apic driver files in the Makefile. -- * -- * For the files having two apic drivers, we use apic_drivers() -- * to enforce the order with in them. - */ - #define apic_driver(sym) \ - static const struct apic *__apicdrivers_##sym __used \ - __aligned(sizeof(struct apic *)) \ - __section(".apicdrivers") = { &sym } - --#define apic_drivers(sym1, sym2) \ -- static struct apic *__apicdrivers_##sym1##sym2[2] __used \ -- __aligned(sizeof(struct apic *)) \ -- __section(".apicdrivers") = { &sym1, &sym2 } -- - extern struct apic *__apicdrivers[], *__apicdrivers_end[]; - - /* diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 059e5c16af05..e8c3d65aee60 100644 --- a/arch/x86/kernel/amd_nb.c @@ -10634,164 +10605,6 @@ index 059e5c16af05..e8c3d65aee60 100644 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F4) }, {} -diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c -index f37ad3392fec..e0308d8c4e6c 100644 ---- a/arch/x86/kernel/apic/apic_flat_64.c -+++ b/arch/x86/kernel/apic/apic_flat_64.c -@@ -8,129 +8,25 @@ - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ --#include - #include --#include - --#include - #include - - #include "local.h" - --static struct apic apic_physflat; --static struct apic apic_flat; -- --struct apic *apic __ro_after_init = &apic_flat; --EXPORT_SYMBOL_GPL(apic); -- --static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) --{ -- return 1; --} -- --static void _flat_send_IPI_mask(unsigned long mask, int vector) --{ -- unsigned long flags; -- -- local_irq_save(flags); -- __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); -- local_irq_restore(flags); --} -- --static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) --{ -- unsigned long mask = cpumask_bits(cpumask)[0]; -- -- _flat_send_IPI_mask(mask, vector); --} -- --static void --flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) --{ -- unsigned long mask = cpumask_bits(cpumask)[0]; -- int cpu = smp_processor_id(); -- -- if (cpu < BITS_PER_LONG) -- __clear_bit(cpu, &mask); -- -- _flat_send_IPI_mask(mask, vector); --} -- --static u32 flat_get_apic_id(u32 x) -+static u32 physflat_get_apic_id(u32 x) - { - return (x >> 24) & 0xFF; - } - --static int flat_probe(void) -+static int physflat_probe(void) - { - return 1; - } - --static struct apic apic_flat __ro_after_init = { -- .name = "flat", -- .probe = flat_probe, -- .acpi_madt_oem_check = flat_acpi_madt_oem_check, -- -- .dest_mode_logical = true, -- -- .disable_esr = 0, -- -- .init_apic_ldr = default_init_apic_ldr, -- .cpu_present_to_apicid = default_cpu_present_to_apicid, -- -- .max_apic_id = 0xFE, -- .get_apic_id = flat_get_apic_id, -- -- .calc_dest_apicid = apic_flat_calc_apicid, -- -- .send_IPI = default_send_IPI_single, -- .send_IPI_mask = flat_send_IPI_mask, -- .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, -- .send_IPI_allbutself = default_send_IPI_allbutself, -- .send_IPI_all = default_send_IPI_all, -- .send_IPI_self = default_send_IPI_self, -- .nmi_to_offline_cpu = true, -- -- .read = native_apic_mem_read, -- .write = native_apic_mem_write, -- .eoi = native_apic_mem_eoi, -- .icr_read = native_apic_icr_read, -- .icr_write = native_apic_icr_write, -- .wait_icr_idle = apic_mem_wait_icr_idle, -- .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout, --}; -- --/* -- * Physflat mode is used when there are more than 8 CPUs on a system. -- * We cannot use logical delivery in this case because the mask -- * overflows, so use physical mode. -- */ - static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) - { --#ifdef CONFIG_ACPI -- /* -- * Quirk: some x86_64 machines can only use physical APIC mode -- * regardless of how many processors are present (x86_64 ES7000 -- * is an example). -- */ -- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && -- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { -- printk(KERN_DEBUG "system APIC only can use physical flat"); -- return 1; -- } -- -- if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { -- printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); -- return 1; -- } --#endif -- -- return 0; --} -- --static int physflat_probe(void) --{ -- return apic == &apic_physflat || num_possible_cpus() > 8 || jailhouse_paravirt(); -+ return 1; - } - - static struct apic apic_physflat __ro_after_init = { -@@ -146,7 +42,7 @@ static struct apic apic_physflat __ro_after_init = { - .cpu_present_to_apicid = default_cpu_present_to_apicid, - - .max_apic_id = 0xFE, -- .get_apic_id = flat_get_apic_id, -+ .get_apic_id = physflat_get_apic_id, - - .calc_dest_apicid = apic_default_calc_apicid, - -@@ -166,8 +62,7 @@ static struct apic apic_physflat __ro_after_init = { - .wait_icr_idle = apic_mem_wait_icr_idle, - .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout, - }; -+apic_driver(apic_physflat); - --/* -- * We need to check for physflat first, so this order is important. -- */ --apic_drivers(apic_physflat, apic_flat); -+struct apic *apic __ro_after_init = &apic_physflat; -+EXPORT_SYMBOL_GPL(apic); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1e0fe5f8ab84..65b2f0c5ec2d 100644 --- a/arch/x86/kernel/cpu/amd.c @@ -10805,10 +10618,10 @@ index 1e0fe5f8ab84..65b2f0c5ec2d 100644 + on_each_cpu(zenbleed_check_cpu, NULL, 1); } diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 1ec71a2fb63e..8ddd6ed219b4 100644 +index 93dbeb8b348d..e0458bcde8d1 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c -@@ -690,6 +690,10 @@ static const struct usb_device_id quirks_table[] = { +@@ -692,6 +692,10 @@ static const struct usb_device_id quirks_table[] = { { USB_DEVICE(0x0489, 0xe113), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, @@ -10819,23 +10632,6 @@ index 1ec71a2fb63e..8ddd6ed219b4 100644 { USB_DEVICE(0x13d3, 0x3602), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, -diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c -index 02a07d3d0d40..a3df782fa687 100644 ---- a/drivers/firmware/sysfb.c -+++ b/drivers/firmware/sysfb.c -@@ -67,9 +67,11 @@ static bool sysfb_unregister(void) - void sysfb_disable(struct device *dev) - { - struct screen_info *si = &screen_info; -+ struct device *parent; - - mutex_lock(&disable_lock); -- if (!dev || dev == sysfb_parent_dev(si)) { -+ parent = sysfb_parent_dev(si); -+ if (!dev || !parent || dev == parent) { - sysfb_unregister(); - disabled = true; - } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9c3b7b027485..ad5c05ee92f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -10853,10 +10649,10 @@ index 9c3b7b027485..ad5c05ee92f3 100644 return pci_register_driver(&amdgpu_kms_pci_driver); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c -index 85a2ef82afa5..70bdc9d95f34 100644 +index 9e05d77453ac..5e12a1624124 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c -@@ -5119,11 +5119,26 @@ static bool update_planes_and_stream_v3(struct dc *dc, +@@ -5120,11 +5120,26 @@ static bool update_planes_and_stream_v3(struct dc *dc, return true; } @@ -10883,7 +10679,7 @@ index 85a2ef82afa5..70bdc9d95f34 100644 dc_exit_ips_for_hw_access(dc); /* * update planes and stream version 3 separates FULL and FAST updates -@@ -5140,10 +5155,16 @@ bool dc_update_planes_and_stream(struct dc *dc, +@@ -5141,10 +5156,16 @@ bool dc_update_planes_and_stream(struct dc *dc, * features as they are now transparent to the new sequence. */ if (dc->ctx->dce_version >= DCN_VERSION_4_01) @@ -10902,7 +10698,7 @@ index 85a2ef82afa5..70bdc9d95f34 100644 } void dc_commit_updates_for_stream(struct dc *dc, -@@ -5153,6 +5174,8 @@ void dc_commit_updates_for_stream(struct dc *dc, +@@ -5154,6 +5175,8 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_stream_update *stream_update, struct dc_state *state) { @@ -10911,7 +10707,7 @@ index 85a2ef82afa5..70bdc9d95f34 100644 dc_exit_ips_for_hw_access(dc); /* TODO: Since change commit sequence can have a huge impact, * we decided to only enable it for DCN3x. However, as soon as -@@ -5160,17 +5183,17 @@ void dc_commit_updates_for_stream(struct dc *dc, +@@ -5161,17 +5184,17 @@ void dc_commit_updates_for_stream(struct dc *dc, * the new sequence for all ASICs. */ if (dc->ctx->dce_version >= DCN_VERSION_4_01) { @@ -11958,47 +11754,13 @@ index 02afeb3acce4..5aef7fa37878 100644 resource_len = pci_resource_len(pdev, bar_id); rtwpci->mmap = pci_iomap(pdev, bar_id, resource_len); -diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c -index 619fa0b8b3f6..f3de12a31c38 100644 ---- a/fs/btrfs/send.c -+++ b/fs/btrfs/send.c -@@ -346,8 +346,10 @@ struct name_cache_entry { - u64 parent_gen; - int ret; - int need_later_update; -+ /* Name length without NUL terminator. */ - int name_len; -- char name[] __counted_by(name_len); -+ /* Not NUL terminated. */ -+ char name[] __counted_by(name_len) __nonstring; - }; - - /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ -@@ -2388,7 +2390,7 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, - /* - * Store the result of the lookup in the name cache. - */ -- nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); -+ nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL); - if (!nce) { - ret = -ENOMEM; - goto out; -@@ -2400,7 +2402,7 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, - nce->parent_gen = *parent_gen; - nce->name_len = fs_path_len(dest); - nce->ret = ret; -- strcpy(nce->name, dest->start); -+ memcpy(nce->name, dest->start, nce->name_len); - - if (ino < sctx->send_progress) - nce->need_later_update = 0; -- 2.47.0.rc0 -From aadc07fef91ef7043a87858eff872f608649e5ee Mon Sep 17 00:00:00 2001 +From 49a2293cad3542ec1383bff6eb8d8bbc83b6c46d Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:07:08 +0200 -Subject: [PATCH 05/10] intel-pstate +Date: Thu, 10 Oct 2024 12:37:26 +0200 +Subject: [PATCH 05/11] intel-pstate Signed-off-by: Peter Jung --- @@ -12144,7 +11906,7 @@ index 0b69bfbf345d..ec07678c641b 100644 freq_scale = div64_u64(acnt, mcnt); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index 7d92f16a430a..86ad1fed71f1 100644 +index 348a330678bd..c11be253bfa3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -16,6 +16,7 @@ @@ -12479,10 +12241,10 @@ index 7d92f16a430a..86ad1fed71f1 100644 -- 2.47.0.rc0 -From 21b43327f8469d705fd68b20773dac3e9b554a8e Mon Sep 17 00:00:00 2001 +From b696b1ddc76a50fca46d2f4e0f2993114042e613 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:07:18 +0200 -Subject: [PATCH 06/10] ksm +Date: Thu, 10 Oct 2024 12:37:57 +0200 +Subject: [PATCH 06/11] ksm Signed-off-by: Peter Jung --- @@ -12912,10 +12674,10 @@ index 01071182763e..7394bad8178e 100644 -- 2.47.0.rc0 -From 2087698c3f9af692a9e088307a8f25da094bc7a2 Mon Sep 17 00:00:00 2001 +From 98440e16d1442aa58b0817752fab19604706bd84 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:08:21 +0200 -Subject: [PATCH 07/10] ntsync +Date: Thu, 10 Oct 2024 12:38:18 +0200 +Subject: [PATCH 07/11] ntsync Signed-off-by: Peter Jung --- @@ -16001,10 +15763,10 @@ index 000000000000..5fa2c9a0768c -- 2.47.0.rc0 -From f3788bc44e2875141e8cf16b36365cb2bac541a6 Mon Sep 17 00:00:00 2001 +From a0b3cb05c95954b4055a3327b1a00553583fbdde Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:08:44 +0200 -Subject: [PATCH 08/10] perf-per-core +Date: Thu, 10 Oct 2024 12:38:31 +0200 +Subject: [PATCH 08/11] perf-per-core Signed-off-by: Peter Jung --- @@ -16909,10 +16671,10 @@ index 9a6069e7133c..23722aa21e2f 100644 -- 2.47.0.rc0 -From 07f18266f273138cab1a5cb27517d6a954c5fb34 Mon Sep 17 00:00:00 2001 +From 074c7e9eebb929b35d8f128ae1250ae0d8832eec Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:09:09 +0200 -Subject: [PATCH 09/10] t2 +Date: Thu, 10 Oct 2024 12:39:01 +0200 +Subject: [PATCH 09/11] t2 Signed-off-by: Peter Jung --- @@ -17173,7 +16935,7 @@ index b1be458ed4dd..28c0e76a1e88 100644 drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); return 0; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c -index a07aca96e551..210e20b03aab 100644 +index 5b6aabce4c32..fafc673d508e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4640,6 +4640,7 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) @@ -18934,7 +18696,7 @@ index 6e4ebc349e45..4e79fafeeafa 100644 } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 99812c0f830b..bb845947585f 100644 +index c4a6908bbe54..30df701af5da 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -72,6 +72,7 @@ MODULE_LICENSE("GPL"); @@ -19095,7 +18857,7 @@ index 99812c0f830b..bb845947585f 100644 if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) mt_fix_const_fields(hdev, HID_DG_CONTACTID); -@@ -2267,6 +2294,11 @@ static const struct hid_device_id mt_devices[] = { +@@ -2273,6 +2300,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, @@ -27324,10 +27086,824 @@ index 4427572b2477..b60c99d61882 100755 -- 2.47.0.rc0 -From cf0e4ae5c086f49c71b2a5aad50a589d8aa1799e Mon Sep 17 00:00:00 2001 +From 3346172fe2036b00ffdf8b707c1aba850d6825ca Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 4 Oct 2024 17:09:19 +0200 -Subject: [PATCH 10/10] zstd +Date: Thu, 10 Oct 2024 12:39:25 +0200 +Subject: [PATCH 10/11] thp shrinker + +Signed-off-by: Peter Jung +--- + Documentation/admin-guide/mm/transhuge.rst | 16 +++ + include/linux/huge_mm.h | 4 +- + include/linux/khugepaged.h | 1 + + include/linux/page-flags.h | 13 +- + include/linux/rmap.h | 7 +- + include/linux/vm_event_item.h | 1 + + mm/huge_memory.c | 124 ++++++++++++++++-- + mm/khugepaged.c | 3 +- + mm/memcontrol.c | 3 +- + mm/migrate.c | 75 +++++++++-- + mm/migrate_device.c | 4 +- + mm/page_alloc.c | 5 +- + mm/rmap.c | 5 +- + mm/vmscan.c | 3 +- + mm/vmstat.c | 1 + + .../selftests/mm/split_huge_page_test.c | 71 ++++++++++ + tools/testing/selftests/mm/vm_util.c | 22 ++++ + tools/testing/selftests/mm/vm_util.h | 1 + + 18 files changed, 323 insertions(+), 36 deletions(-) + +diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst +index 058485daf186..02ae7bc9efbd 100644 +--- a/Documentation/admin-guide/mm/transhuge.rst ++++ b/Documentation/admin-guide/mm/transhuge.rst +@@ -202,6 +202,16 @@ PMD-mappable transparent hugepage:: + + cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size + ++All THPs at fault and collapse time will be added to _deferred_list, ++and will therefore be split under memory presure if they are considered ++"underused". A THP is underused if the number of zero-filled pages in ++the THP is above max_ptes_none (see below). It is possible to disable ++this behaviour by writing 0 to shrink_underused, and enable it by writing ++1 to it:: ++ ++ echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused ++ echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused ++ + khugepaged will be automatically started when PMD-sized THP is enabled + (either of the per-size anon control or the top-level control are set + to "always" or "madvise"), and it'll be automatically shutdown when +@@ -447,6 +457,12 @@ thp_deferred_split_page + splitting it would free up some memory. Pages on split queue are + going to be split under memory pressure. + ++thp_underused_split_page ++ is incremented when a huge page on the split queue was split ++ because it was underused. A THP is underused if the number of ++ zero pages in the THP is above a certain threshold ++ (/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none). ++ + thp_split_pmd + is incremented every time a PMD split into table of PTEs. + This can happen, for instance, when application calls mprotect() or +diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h +index e25d9ebfdf89..00af84aa88ea 100644 +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -321,7 +321,7 @@ static inline int split_huge_page(struct page *page) + { + return split_huge_page_to_list_to_order(page, NULL, 0); + } +-void deferred_split_folio(struct folio *folio); ++void deferred_split_folio(struct folio *folio, bool partially_mapped); + + void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long address, bool freeze, struct folio *folio); +@@ -484,7 +484,7 @@ static inline int split_huge_page(struct page *page) + { + return 0; + } +-static inline void deferred_split_folio(struct folio *folio) {} ++static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} + #define split_huge_pmd(__vma, __pmd, __address) \ + do { } while (0) + +diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h +index f68865e19b0b..30baae91b225 100644 +--- a/include/linux/khugepaged.h ++++ b/include/linux/khugepaged.h +@@ -4,6 +4,7 @@ + + #include /* MMF_VM_HUGEPAGE */ + ++extern unsigned int khugepaged_max_ptes_none __read_mostly; + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + extern struct attribute_group khugepaged_attr_group; + +diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h +index 5769fe6e4950..5e7bc8522e91 100644 +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -185,6 +185,7 @@ enum pageflags { + /* At least one page in this folio has the hwpoison flag set */ + PG_has_hwpoisoned = PG_error, + PG_large_rmappable = PG_workingset, /* anon or file-backed */ ++ PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ + }; + + #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) +@@ -865,8 +866,18 @@ static inline void ClearPageCompound(struct page *page) + ClearPageHead(page); + } + FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) ++FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE) ++/* ++ * PG_partially_mapped is protected by deferred_split split_queue_lock, ++ * so its safe to use non-atomic set/clear. ++ */ ++__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE) ++__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE) + #else + FOLIO_FLAG_FALSE(large_rmappable) ++FOLIO_TEST_FLAG_FALSE(partially_mapped) ++__FOLIO_SET_FLAG_NOOP(partially_mapped) ++__FOLIO_CLEAR_FLAG_NOOP(partially_mapped) + #endif + + #define PG_head_mask ((1UL << PG_head)) +@@ -1175,7 +1186,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) + */ + #define PAGE_FLAGS_SECOND \ + (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ +- 1UL << PG_large_rmappable) ++ 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) + + #define PAGE_FLAGS_PRIVATE \ + (1UL << PG_private | 1UL << PG_private_2) +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index 0978c64f49d8..07854d1f9ad6 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -745,7 +745,12 @@ int folio_mkclean(struct folio *); + int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, + struct vm_area_struct *vma); + +-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); ++enum rmp_flags { ++ RMP_LOCKED = 1 << 0, ++ RMP_USE_SHARED_ZEROPAGE = 1 << 1, ++}; ++ ++void remove_migration_ptes(struct folio *src, struct folio *dst, int flags); + + /* + * rmap_walk_control: To control rmap traversing for specific needs +diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h +index 747943bc8cc2..d35e588e0ece 100644 +--- a/include/linux/vm_event_item.h ++++ b/include/linux/vm_event_item.h +@@ -104,6 +104,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, + THP_SPLIT_PAGE, + THP_SPLIT_PAGE_FAILED, + THP_DEFERRED_SPLIT_PAGE, ++ THP_UNDERUSED_SPLIT_PAGE, + THP_SPLIT_PMD, + THP_SCAN_EXCEED_NONE_PTE, + THP_SCAN_EXCEED_SWAP_PTE, +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 4d2839fcf688..3292411ad8ed 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -77,6 +77,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink, + struct shrink_control *sc); + static unsigned long deferred_split_scan(struct shrinker *shrink, + struct shrink_control *sc); ++static bool split_underused_thp = true; + + static atomic_t huge_zero_refcount; + struct folio *huge_zero_folio __read_mostly; +@@ -449,6 +450,27 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj, + static struct kobj_attribute hpage_pmd_size_attr = + __ATTR_RO(hpage_pmd_size); + ++static ssize_t split_underused_thp_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return sysfs_emit(buf, "%d\n", split_underused_thp); ++} ++ ++static ssize_t split_underused_thp_store(struct kobject *kobj, ++ struct kobj_attribute *attr, ++ const char *buf, size_t count) ++{ ++ int err = kstrtobool(buf, &split_underused_thp); ++ ++ if (err < 0) ++ return err; ++ ++ return count; ++} ++ ++static struct kobj_attribute split_underused_thp_attr = __ATTR( ++ shrink_underused, 0644, split_underused_thp_show, split_underused_thp_store); ++ + static struct attribute *hugepage_attr[] = { + &enabled_attr.attr, + &defrag_attr.attr, +@@ -457,6 +479,7 @@ static struct attribute *hugepage_attr[] = { + #ifdef CONFIG_SHMEM + &shmem_enabled_attr.attr, + #endif ++ &split_underused_thp_attr.attr, + NULL, + }; + +@@ -1013,6 +1036,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, + update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); + add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + mm_inc_nr_ptes(vma->vm_mm); ++ deferred_split_folio(folio, false); + spin_unlock(vmf->ptl); + count_vm_event(THP_FAULT_ALLOC); + count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC); +@@ -2784,7 +2808,7 @@ bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, + return false; + } + +-static void remap_page(struct folio *folio, unsigned long nr) ++static void remap_page(struct folio *folio, unsigned long nr, int flags) + { + int i = 0; + +@@ -2792,7 +2816,7 @@ static void remap_page(struct folio *folio, unsigned long nr) + if (!folio_test_anon(folio)) + return; + for (;;) { +- remove_migration_ptes(folio, folio, true); ++ remove_migration_ptes(folio, folio, RMP_LOCKED | flags); + i += folio_nr_pages(folio); + if (i >= nr) + break; +@@ -3000,7 +3024,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, + + if (nr_dropped) + shmem_uncharge(folio->mapping->host, nr_dropped); +- remap_page(folio, nr); ++ remap_page(folio, nr, PageAnon(head) ? RMP_USE_SHARED_ZEROPAGE : 0); + + /* + * set page to its compound_head when split to non order-0 pages, so +@@ -3235,6 +3259,9 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + if (folio_order(folio) > 1 && + !list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; ++ if (folio_test_partially_mapped(folio)) { ++ __folio_clear_partially_mapped(folio); ++ } + /* + * Reinitialize page_deferred_list after removing the + * page from the split_queue, otherwise a subsequent +@@ -3269,7 +3296,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + if (mapping) + xas_unlock(&xas); + local_irq_enable(); +- remap_page(folio, folio_nr_pages(folio)); ++ remap_page(folio, folio_nr_pages(folio), 0); + ret = -EAGAIN; + } + +@@ -3297,12 +3324,16 @@ void __folio_undo_large_rmappable(struct folio *folio) + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (!list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; ++ if (folio_test_partially_mapped(folio)) { ++ __folio_clear_partially_mapped(folio); ++ } + list_del_init(&folio->_deferred_list); + } + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + } + +-void deferred_split_folio(struct folio *folio) ++/* partially_mapped=false won't clear PG_partially_mapped folio flag */ ++void deferred_split_folio(struct folio *folio, bool partially_mapped) + { + struct deferred_split *ds_queue = get_deferred_split_queue(folio); + #ifdef CONFIG_MEMCG +@@ -3317,6 +3348,9 @@ void deferred_split_folio(struct folio *folio) + if (folio_order(folio) <= 1) + return; + ++ if (!partially_mapped && !split_underused_thp) ++ return; ++ + /* + * The try_to_unmap() in page reclaim path might reach here too, + * this may cause a race condition to corrupt deferred split queue. +@@ -3330,14 +3364,20 @@ void deferred_split_folio(struct folio *folio) + if (folio_test_swapcache(folio)) + return; + +- if (!list_empty(&folio->_deferred_list)) +- return; +- + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); ++ if (partially_mapped) { ++ if (!folio_test_partially_mapped(folio)) { ++ __folio_set_partially_mapped(folio); ++ if (folio_test_pmd_mappable(folio)) ++ count_vm_event(THP_DEFERRED_SPLIT_PAGE); ++ count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); ++ ++ } ++ } else { ++ /* partially mapped folios cannot become non-partially mapped */ ++ VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio); ++ } + if (list_empty(&folio->_deferred_list)) { +- if (folio_test_pmd_mappable(folio)) +- count_vm_event(THP_DEFERRED_SPLIT_PAGE); +- count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); + list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); + ds_queue->split_queue_len++; + #ifdef CONFIG_MEMCG +@@ -3362,6 +3402,39 @@ static unsigned long deferred_split_count(struct shrinker *shrink, + return READ_ONCE(ds_queue->split_queue_len); + } + ++static bool thp_underused(struct folio *folio) ++{ ++ int num_zero_pages = 0, num_filled_pages = 0; ++ void *kaddr; ++ int i; ++ ++ if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) ++ return false; ++ ++ for (i = 0; i < folio_nr_pages(folio); i++) { ++ kaddr = kmap_local_folio(folio, i * PAGE_SIZE); ++ if (!memchr_inv(kaddr, 0, PAGE_SIZE)) { ++ num_zero_pages++; ++ if (num_zero_pages > khugepaged_max_ptes_none) { ++ kunmap_local(kaddr); ++ return true; ++ } ++ } else { ++ /* ++ * Another path for early exit once the number ++ * of non-zero filled pages exceeds threshold. ++ */ ++ num_filled_pages++; ++ if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) { ++ kunmap_local(kaddr); ++ return false; ++ } ++ } ++ kunmap_local(kaddr); ++ } ++ return false; ++} ++ + static unsigned long deferred_split_scan(struct shrinker *shrink, + struct shrink_control *sc) + { +@@ -3385,6 +3458,9 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, + list_move(&folio->_deferred_list, &list); + } else { + /* We lost race with folio_put() */ ++ if (folio_test_partially_mapped(folio)) { ++ __folio_clear_partially_mapped(folio); ++ } + list_del_init(&folio->_deferred_list); + ds_queue->split_queue_len--; + } +@@ -3394,13 +3470,35 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + + list_for_each_entry_safe(folio, next, &list, _deferred_list) { ++ bool did_split = false; ++ bool underused = false; ++ ++ if (!folio_test_partially_mapped(folio)) { ++ underused = thp_underused(folio); ++ if (!underused) ++ goto next; ++ } + if (!folio_trylock(folio)) + goto next; +- /* split_huge_page() removes page from list on success */ +- if (!split_folio(folio)) ++ if (!split_folio(folio)) { ++ did_split = true; ++ if (underused) ++ count_vm_event(THP_UNDERUSED_SPLIT_PAGE); + split++; ++ } + folio_unlock(folio); + next: ++ /* ++ * split_folio() removes folio from list on success. ++ * Only add back to the queue if folio is partially mapped. ++ * If thp_underused returns false, or if split_folio fails ++ * in the case it was underused, then consider it used and ++ * don't add it back to split_queue. ++ */ ++ if (!did_split && !folio_test_partially_mapped(folio)) { ++ list_del_init(&folio->_deferred_list); ++ ds_queue->split_queue_len--; ++ } + folio_put(folio); + } + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index cdd1d8655a76..02e1463e1a79 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -85,7 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); + * + * Note that these are only respected if collapse was initiated by khugepaged. + */ +-static unsigned int khugepaged_max_ptes_none __read_mostly; ++unsigned int khugepaged_max_ptes_none __read_mostly; + static unsigned int khugepaged_max_ptes_swap __read_mostly; + static unsigned int khugepaged_max_ptes_shared __read_mostly; + +@@ -1235,6 +1235,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, + pgtable_trans_huge_deposit(mm, pmd, pgtable); + set_pmd_at(mm, address, pmd, _pmd); + update_mmu_cache_pmd(vma, address, pmd); ++ deferred_split_folio(folio, false); + spin_unlock(pmd_ptl); + + folio = NULL; +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index d563fb515766..c5078e86ac55 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -4606,7 +4606,8 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_FOLIO(folio_order(folio) > 1 && + !folio_test_hugetlb(folio) && +- !list_empty(&folio->_deferred_list), folio); ++ !list_empty(&folio->_deferred_list) && ++ folio_test_partially_mapped(folio), folio); + + /* + * Nobody should be changing or seriously looking at +diff --git a/mm/migrate.c b/mm/migrate.c +index 368ab3878fa6..d3a66f1a621b 100644 +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -177,13 +177,57 @@ void putback_movable_pages(struct list_head *l) + } + } + ++static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, ++ struct folio *folio, ++ unsigned long idx) ++{ ++ struct page *page = folio_page(folio, idx); ++ bool contains_data; ++ pte_t newpte; ++ void *addr; ++ ++ VM_BUG_ON_PAGE(PageCompound(page), page); ++ VM_BUG_ON_PAGE(!PageAnon(page), page); ++ VM_BUG_ON_PAGE(!PageLocked(page), page); ++ VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page); ++ ++ if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || ++ mm_forbids_zeropage(pvmw->vma->vm_mm)) ++ return false; ++ ++ /* ++ * The pmd entry mapping the old thp was flushed and the pte mapping ++ * this subpage has been non present. If the subpage is only zero-filled ++ * then map it to the shared zeropage. ++ */ ++ addr = kmap_local_page(page); ++ contains_data = memchr_inv(addr, 0, PAGE_SIZE); ++ kunmap_local(addr); ++ ++ if (contains_data) ++ return false; ++ ++ newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address), ++ pvmw->vma->vm_page_prot)); ++ set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte); ++ ++ dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio)); ++ return true; ++} ++ ++struct rmap_walk_arg { ++ struct folio *folio; ++ bool map_unused_to_zeropage; ++}; ++ + /* + * Restore a potential migration pte to a working pte entry + */ + static bool remove_migration_pte(struct folio *folio, +- struct vm_area_struct *vma, unsigned long addr, void *old) ++ struct vm_area_struct *vma, unsigned long addr, void *arg) + { +- DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION); ++ struct rmap_walk_arg *rmap_walk_arg = arg; ++ DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION); + + while (page_vma_mapped_walk(&pvmw)) { + rmap_t rmap_flags = RMAP_NONE; +@@ -207,6 +251,9 @@ static bool remove_migration_pte(struct folio *folio, + continue; + } + #endif ++ if (rmap_walk_arg->map_unused_to_zeropage && ++ try_to_map_unused_to_zeropage(&pvmw, folio, idx)) ++ continue; + + folio_get(folio); + pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); +@@ -285,14 +332,21 @@ static bool remove_migration_pte(struct folio *folio, + * Get rid of all migration entries and replace them by + * references to the indicated page. + */ +-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked) ++void remove_migration_ptes(struct folio *src, struct folio *dst, int flags) + { ++ struct rmap_walk_arg rmap_walk_arg = { ++ .folio = src, ++ .map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE, ++ }; ++ + struct rmap_walk_control rwc = { + .rmap_one = remove_migration_pte, +- .arg = src, ++ .arg = &rmap_walk_arg, + }; + +- if (locked) ++ VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src); ++ ++ if (flags & RMP_LOCKED) + rmap_walk_locked(dst, &rwc); + else + rmap_walk(dst, &rwc); +@@ -904,7 +958,7 @@ static int writeout(struct address_space *mapping, struct folio *folio) + * At this point we know that the migration attempt cannot + * be successful. + */ +- remove_migration_ptes(folio, folio, false); ++ remove_migration_ptes(folio, folio, 0); + + rc = mapping->a_ops->writepage(&folio->page, &wbc); + +@@ -1068,7 +1122,7 @@ static void migrate_folio_undo_src(struct folio *src, + struct list_head *ret) + { + if (page_was_mapped) +- remove_migration_ptes(src, src, false); ++ remove_migration_ptes(src, src, 0); + /* Drop an anon_vma reference if we took one */ + if (anon_vma) + put_anon_vma(anon_vma); +@@ -1306,7 +1360,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, + lru_add_drain(); + + if (old_page_state & PAGE_WAS_MAPPED) +- remove_migration_ptes(src, dst, false); ++ remove_migration_ptes(src, dst, 0); + + out_unlock_both: + folio_unlock(dst); +@@ -1444,7 +1498,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio, + + if (page_was_mapped) + remove_migration_ptes(src, +- rc == MIGRATEPAGE_SUCCESS ? dst : src, false); ++ rc == MIGRATEPAGE_SUCCESS ? dst : src, 0); + + unlock_put_anon: + folio_unlock(dst); +@@ -1682,7 +1736,8 @@ static int migrate_pages_batch(struct list_head *from, + * use _deferred_list. + */ + if (nr_pages > 2 && +- !list_empty(&folio->_deferred_list)) { ++ !list_empty(&folio->_deferred_list) && ++ folio_test_partially_mapped(folio)) { + if (!try_split_folio(folio, split_folios, mode)) { + nr_failed++; + stats->nr_thp_failed += is_thp; +diff --git a/mm/migrate_device.c b/mm/migrate_device.c +index 6d66dc1c6ffa..8f875636b35b 100644 +--- a/mm/migrate_device.c ++++ b/mm/migrate_device.c +@@ -424,7 +424,7 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, + continue; + + folio = page_folio(page); +- remove_migration_ptes(folio, folio, false); ++ remove_migration_ptes(folio, folio, 0); + + src_pfns[i] = 0; + folio_unlock(folio); +@@ -837,7 +837,7 @@ void migrate_device_finalize(unsigned long *src_pfns, + + src = page_folio(page); + dst = page_folio(newpage); +- remove_migration_ptes(src, dst, false); ++ remove_migration_ptes(src, dst, 0); + folio_unlock(src); + + if (is_zone_device_page(page)) +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index f8b4dae35fc3..6040ed48da3e 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -962,8 +962,9 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) + break; + case 2: + /* the second tail page: deferred_list overlaps ->mapping */ +- if (unlikely(!list_empty(&folio->_deferred_list))) { +- bad_page(page, "on deferred list"); ++ if (unlikely(!list_empty(&folio->_deferred_list) && ++ folio_test_partially_mapped(folio))) { ++ bad_page(page, "partially mapped folio on deferred list"); + goto out; + } + break; +diff --git a/mm/rmap.c b/mm/rmap.c +index 2490e727e2dc..77b5185058b4 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1566,8 +1566,9 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, + * Check partially_mapped first to ensure it is a large folio. + */ + if (folio_test_anon(folio) && partially_mapped && +- list_empty(&folio->_deferred_list)) +- deferred_split_folio(folio); ++ !folio_test_partially_mapped(folio)) ++ deferred_split_folio(folio, true); ++ + } + __folio_mod_stat(folio, -nr, -nr_pmdmapped); + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 47065ea3c47b..6fccc4c99907 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1232,7 +1232,8 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, + * Split partially mapped folios right away. + * We can free the unmapped pages without IO. + */ +- if (data_race(!list_empty(&folio->_deferred_list)) && ++ if (data_race(!list_empty(&folio->_deferred_list) && ++ folio_test_partially_mapped(folio)) && + split_folio_to_list(folio, folio_list)) + goto activate_locked; + } +diff --git a/mm/vmstat.c b/mm/vmstat.c +index e875f2a4915f..6c48f75eefa9 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1384,6 +1384,7 @@ const char * const vmstat_text[] = { + "thp_split_page", + "thp_split_page_failed", + "thp_deferred_split_page", ++ "thp_underused_split_page", + "thp_split_pmd", + "thp_scan_exceed_none_pte", + "thp_scan_exceed_swap_pte", +diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c +index e5e8dafc9d94..eb6d1b9fc362 100644 +--- a/tools/testing/selftests/mm/split_huge_page_test.c ++++ b/tools/testing/selftests/mm/split_huge_page_test.c +@@ -84,6 +84,76 @@ static void write_debugfs(const char *fmt, ...) + write_file(SPLIT_DEBUGFS, input, ret + 1); + } + ++static char *allocate_zero_filled_hugepage(size_t len) ++{ ++ char *result; ++ size_t i; ++ ++ result = memalign(pmd_pagesize, len); ++ if (!result) { ++ printf("Fail to allocate memory\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ madvise(result, len, MADV_HUGEPAGE); ++ ++ for (i = 0; i < len; i++) ++ result[i] = (char)0; ++ ++ return result; ++} ++ ++static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) ++{ ++ unsigned long rss_anon_before, rss_anon_after; ++ size_t i; ++ ++ if (!check_huge_anon(one_page, 4, pmd_pagesize)) { ++ printf("No THP is allocated\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ rss_anon_before = rss_anon(); ++ if (!rss_anon_before) { ++ printf("No RssAnon is allocated before split\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ /* split all THPs */ ++ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, ++ (uint64_t)one_page + len, 0); ++ ++ for (i = 0; i < len; i++) ++ if (one_page[i] != (char)0) { ++ printf("%ld byte corrupted\n", i); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (!check_huge_anon(one_page, 0, pmd_pagesize)) { ++ printf("Still AnonHugePages not split\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ rss_anon_after = rss_anon(); ++ if (rss_anon_after >= rss_anon_before) { ++ printf("Incorrect RssAnon value. Before: %ld After: %ld\n", ++ rss_anon_before, rss_anon_after); ++ exit(EXIT_FAILURE); ++ } ++} ++ ++void split_pmd_zero_pages(void) ++{ ++ char *one_page; ++ int nr_hpages = 4; ++ size_t len = nr_hpages * pmd_pagesize; ++ ++ one_page = allocate_zero_filled_hugepage(len); ++ verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); ++ printf("Split zero filled huge pages successful\n"); ++ free(one_page); ++} ++ + void split_pmd_thp(void) + { + char *one_page; +@@ -431,6 +501,7 @@ int main(int argc, char **argv) + + fd_size = 2 * pmd_pagesize; + ++ split_pmd_zero_pages(); + split_pmd_thp(); + split_pte_mapped_thp(); + split_file_backed_thp(); +diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c +index 5a62530da3b5..d8d0cf04bb57 100644 +--- a/tools/testing/selftests/mm/vm_util.c ++++ b/tools/testing/selftests/mm/vm_util.c +@@ -12,6 +12,7 @@ + + #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" + #define SMAP_FILE_PATH "/proc/self/smaps" ++#define STATUS_FILE_PATH "/proc/self/status" + #define MAX_LINE_LENGTH 500 + + unsigned int __page_size; +@@ -171,6 +172,27 @@ uint64_t read_pmd_pagesize(void) + return strtoul(buf, NULL, 10); + } + ++unsigned long rss_anon(void) ++{ ++ unsigned long rss_anon = 0; ++ FILE *fp; ++ char buffer[MAX_LINE_LENGTH]; ++ ++ fp = fopen(STATUS_FILE_PATH, "r"); ++ if (!fp) ++ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH); ++ ++ if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer))) ++ goto err_out; ++ ++ if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1) ++ ksft_exit_fail_msg("Reading status error\n"); ++ ++err_out: ++ fclose(fp); ++ return rss_anon; ++} ++ + bool __check_huge(void *addr, char *pattern, int nr_hpages, + uint64_t hpage_size) + { +diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h +index 9007c420d52c..2eaed8209925 100644 +--- a/tools/testing/selftests/mm/vm_util.h ++++ b/tools/testing/selftests/mm/vm_util.h +@@ -39,6 +39,7 @@ unsigned long pagemap_get_pfn(int fd, char *start); + void clear_softdirty(void); + bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); + uint64_t read_pmd_pagesize(void); ++unsigned long rss_anon(void); + bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); + bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); + bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); +-- +2.47.0.rc0 + +From e9cb25e7e2c3831c56deacdf47f9b84c25b8eac8 Mon Sep 17 00:00:00 2001 +From: Peter Jung +Date: Thu, 10 Oct 2024 12:39:34 +0200 +Subject: [PATCH 11/11] zstd Signed-off-by: Peter Jung ---