diff --git a/patches/0001-cachyos-base-all.patch b/patches/0001-cachyos-base-all.patch index 9f2f6c8..4b92bf6 100644 --- a/patches/0001-cachyos-base-all.patch +++ b/patches/0001-cachyos-base-all.patch @@ -1,6 +1,6 @@ -From 15ec398f577b2d406a028dc6310ed65a8d5de25e Mon Sep 17 00:00:00 2001 +From 77d9b13b6db0afead521713204ffc4dced7ad0f2 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:46:43 +0100 +Date: Mon, 11 Nov 2024 09:17:13 +0100 Subject: [PATCH 01/13] amd-cache-optimizer Signed-off-by: Peter Jung @@ -35,7 +35,7 @@ index 000000000000..1aa6ed0c10d9 + + Format: %s. diff --git a/MAINTAINERS b/MAINTAINERS -index bdae0faf000c..0ebce432a134 100644 +index 21fdaa19229a..5dc7d5839fe9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -965,6 +965,14 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/ @@ -291,9 +291,9 @@ index 000000000000..679613d02b9a -- 2.47.0 -From 2ec608e43d8d9b73183402468f1c84375534b191 Mon Sep 17 00:00:00 2001 +From 54c4f598ee011b1f701bdc2a924e9930fbf10962 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:46:55 +0100 +Date: Mon, 11 Nov 2024 09:17:26 +0100 Subject: [PATCH 02/13] amd-pstate Signed-off-by: Peter Jung @@ -373,7 +373,7 @@ index 4a686f0e5dbf..c0975815980c 100644 struct cpuinfo_x86 { diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h -index aef70336d624..9f9376db64e3 100644 +index 92f3664dd933..fd41103ad342 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -114,6 +114,12 @@ enum x86_topology_domains { @@ -400,10 +400,10 @@ index aef70336d624..9f9376db64e3 100644 { return __max_logical_packages; diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c -index 956984054bf3..59edf64ad9ed 100644 +index aab9d0570841..d745dd586303 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c -@@ -234,8 +234,10 @@ EXPORT_SYMBOL_GPL(amd_detect_prefcore); +@@ -239,8 +239,10 @@ EXPORT_SYMBOL_GPL(amd_detect_prefcore); */ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) { @@ -414,7 +414,7 @@ index 956984054bf3..59edf64ad9ed 100644 ret = amd_detect_prefcore(&prefcore); if (ret) -@@ -261,6 +263,27 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) +@@ -266,6 +268,27 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) break; } } @@ -1137,9 +1137,9 @@ index dd4682857c12..23698d0f4bb4 100644 -- 2.47.0 -From 76091118b35aeec1bf99e555aa14a8814ed9fbca Mon Sep 17 00:00:00 2001 +From 6c34d83a13cc89085c20da699633ac1f6b612596 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:53:16 +0100 +Date: Mon, 11 Nov 2024 09:17:40 +0100 Subject: [PATCH 03/13] autofdo Signed-off-by: Peter Jung @@ -1521,10 +1521,10 @@ index 000000000000..92195958e3db + + $ make LLVM=1 CLANG_AUTOFDO_PROFILE= CLANG_PROPELLER_PROFILE_PREFIX= diff --git a/MAINTAINERS b/MAINTAINERS -index 0ebce432a134..919f01186c11 100644 +index 5dc7d5839fe9..3d4709c29704 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -3673,6 +3673,13 @@ F: kernel/audit* +@@ -3674,6 +3674,13 @@ F: kernel/audit* F: lib/*audit.c K: \baudit_[a-z_0-9]\+\b @@ -1538,7 +1538,7 @@ index 0ebce432a134..919f01186c11 100644 AUXILIARY BUS DRIVER M: Greg Kroah-Hartman R: Dave Ertman -@@ -18504,6 +18511,13 @@ S: Maintained +@@ -18505,6 +18512,13 @@ S: Maintained F: include/linux/psi* F: kernel/sched/psi.c @@ -1553,7 +1553,7 @@ index 0ebce432a134..919f01186c11 100644 M: Petr Mladek R: Steven Rostedt diff --git a/Makefile b/Makefile -index b8efbfe9da94..2c3f65c68bec 100644 +index 79192a3024bf..e619df4e09b8 100644 --- a/Makefile +++ b/Makefile @@ -1018,6 +1018,8 @@ include-$(CONFIG_KMSAN) += scripts/Makefile.kmsan @@ -1883,9 +1883,9 @@ index 3d27983dc908..6f64d611faea 100644 -- 2.47.0 -From cb27311981625ec965a2795725ecabad07150096 Mon Sep 17 00:00:00 2001 +From 9f4066f41c5d80b408109ea740488da2cca89fcc Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:47:06 +0100 +Date: Mon, 11 Nov 2024 09:17:53 +0100 Subject: [PATCH 04/13] bbr3 Signed-off-by: Peter Jung @@ -5269,9 +5269,9 @@ index 79064580c8c0..697270ce1ea6 100644 -- 2.47.0 -From 7b911d200df5cd539857ad49ab9e726bad8affb8 Mon Sep 17 00:00:00 2001 +From d87383343350575ce203091b2001bde085b12fc9 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:47:19 +0100 +Date: Mon, 11 Nov 2024 09:18:05 +0100 Subject: [PATCH 05/13] cachy Signed-off-by: Peter Jung @@ -5333,7 +5333,7 @@ Signed-off-by: Peter Jung create mode 100644 drivers/pci/controller/intel-nvme-remap.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 1518343bbe22..a1773a699743 100644 +index 1666576acc0e..5b0b02e6988a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2248,6 +2248,9 @@ @@ -5363,7 +5363,7 @@ index 1518343bbe22..a1773a699743 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/Makefile b/Makefile -index 2c3f65c68bec..969f0cfe7fbd 100644 +index e619df4e09b8..7223a0d87413 100644 --- a/Makefile +++ b/Makefile @@ -801,11 +801,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks @@ -6172,10 +6172,10 @@ index 97c2d4f15d76..5a3af44d785a 100644 This driver adds a CPUFreq driver which utilizes a fine grain processor performance frequency control range instead of legacy diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index b0018f371ea3..23f51eb073e4 100644 +index cd2ac1ba53d2..ac3647df1431 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c -@@ -3800,6 +3800,8 @@ static int __init intel_pstate_setup(char *str) +@@ -3820,6 +3820,8 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; @@ -6240,7 +6240,7 @@ index df17e79c45c7..e454488c1a31 100644 + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index 13421a58210d..fc46b3e1d140 100644 +index 07e9ce99694f..cf966e8f61fa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4473,7 +4473,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) @@ -6342,10 +6342,10 @@ index d5d6ab484e5a..dccba7bcdf97 100644 } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index 80e60ea2d11e..51dea35848f6 100644 +index ee1bcfaae3e3..3388604f222b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -@@ -2775,7 +2775,10 @@ int smu_get_power_limit(void *handle, +@@ -2785,7 +2785,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: @@ -6357,7 +6357,7 @@ index 80e60ea2d11e..51dea35848f6 100644 break; default: return -EINVAL; -@@ -2799,7 +2802,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) +@@ -2809,7 +2812,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); @@ -10842,10 +10842,10 @@ index 68a5f1ff3301..291873a34079 100644 void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h -index 3625096d5f85..a07f895b8eba 100644 +index 7183e5aca282..56573371a2f8 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h -@@ -158,6 +158,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, +@@ -159,6 +159,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS @@ -10854,7 +10854,7 @@ index 3625096d5f85..a07f895b8eba 100644 static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) -@@ -191,6 +193,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); +@@ -192,6 +194,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else @@ -11235,7 +11235,7 @@ index fcd4c1439cb9..e2f7d709e819 100644 EXPORT_SYMBOL_GPL(dirty_writeback_interval); diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 94a2ffe28008..43f8f7290bf0 100644 +index c6c7bb3ea71b..d8ba1df0b5e1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = { @@ -11251,7 +11251,7 @@ index 94a2ffe28008..43f8f7290bf0 100644 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ diff --git a/mm/swap.c b/mm/swap.c -index 835bdf324b76..022f31d0924b 100644 +index b8e3259ea2c4..4e7d140d422c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1094,6 +1094,10 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) @@ -11288,7 +11288,7 @@ index bd5183dfd879..3a410f53a07c 100644 /* diff --git a/mm/vmscan.c b/mm/vmscan.c -index ddaaff67642e..6a8a68b2f7f2 100644 +index 28ba2b06fc7d..99568ccfb0fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -200,7 +200,11 @@ struct scan_control { @@ -11331,9 +11331,9 @@ index 2b698f8419fe..fd039c41d1c8 100644 -- 2.47.0 -From 26c1af5f52faff4b98337408c9e8fea43d530f54 Mon Sep 17 00:00:00 2001 +From cb33f67ae0f185239bebf9bd3491e5c671c72df0 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:47:50 +0100 +Date: Mon, 11 Nov 2024 09:18:19 +0100 Subject: [PATCH 06/13] crypto Signed-off-by: Peter Jung @@ -12936,26 +12936,48 @@ index bbcff1fb78cb..752812bc4991 100644 -- 2.47.0 -From 3032e50511074fb9d365a2a57b5905b9d0437af6 Mon Sep 17 00:00:00 2001 +From 10b2f8b54a3363a982c4d021ed29a191bea5c0b3 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:48:52 +0100 +Date: Mon, 11 Nov 2024 09:20:20 +0100 Subject: [PATCH 07/13] fixes Signed-off-by: Peter Jung --- - arch/Kconfig | 4 +-- - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++ - drivers/gpu/drm/drm_edid.c | 47 +++++++++++++++++++++++-- - drivers/misc/lkdtm/bugs.c | 2 +- - include/linux/compiler_attributes.h | 13 ------- - include/linux/compiler_types.h | 19 ++++++++++ - init/Kconfig | 8 +++++ - lib/overflow_kunit.c | 2 +- - mm/huge_memory.c | 21 ++++++++--- - mm/memcontrol.c | 3 +- - mm/page_alloc.c | 5 ++- - scripts/package/PKGBUILD | 5 +++ - 12 files changed, 105 insertions(+), 29 deletions(-) + arch/Kconfig | 4 +- + arch/x86/kernel/cpu/amd.c | 11 ++ + arch/x86/mm/tlb.c | 22 ++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 + + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 108 ++++++------------ + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 11 +- + .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 20 ++-- + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 20 ++-- + .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 21 ++-- + .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 17 +-- + .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 17 +-- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 33 +++--- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 21 ++-- + .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 24 ++-- + drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 8 -- + drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 - + drivers/gpu/drm/drm_edid.c | 47 +++++++- + drivers/misc/lkdtm/bugs.c | 2 +- + fs/ntfs3/attrib.c | 9 +- + fs/ntfs3/bitmap.c | 62 +++------- + fs/ntfs3/file.c | 34 +++--- + fs/ntfs3/frecord.c | 104 +++-------------- + fs/ntfs3/fsntfs.c | 2 +- + fs/ntfs3/ntfs_fs.h | 3 +- + fs/ntfs3/record.c | 16 ++- + fs/ntfs3/run.c | 40 +++++-- + include/linux/compiler_attributes.h | 13 --- + include/linux/compiler_types.h | 19 +++ + include/linux/mm_types.h | 1 + + init/Kconfig | 8 ++ + kernel/sched/core.c | 46 +++++--- + kernel/sched/sched.h | 5 + + lib/overflow_kunit.c | 2 +- + scripts/package/PKGBUILD | 5 + + 34 files changed, 376 insertions(+), 386 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 00551f340dbe..833b2344ce79 100644 @@ -12979,6 +13001,79 @@ index 00551f340dbe..833b2344ce79 100644 depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index fab5caec0b72..823f44f7bc94 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -924,6 +924,17 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) + { + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); ++ ++ /* ++ * These Zen4 SoCs advertise support for virtualized VMLOAD/VMSAVE ++ * in some BIOS versions but they can lead to random host reboots. ++ */ ++ switch (c->x86_model) { ++ case 0x18 ... 0x1f: ++ case 0x60 ... 0x7f: ++ clear_cpu_cap(c, X86_FEATURE_V_VMSAVE_VMLOAD); ++ break; ++ } + } + + static void init_amd_zen5(struct cpuinfo_x86 *c) +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 86593d1b787d..1aac4fa90d3d 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -568,7 +568,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + * mm_cpumask. The TLB shootdown code can figure out from + * cpu_tlbstate_shared.is_lazy whether or not to send an IPI. + */ +- if (WARN_ON_ONCE(prev != &init_mm && ++ if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm && + !cpumask_test_cpu(cpu, mm_cpumask(next)))) + cpumask_set_cpu(cpu, mm_cpumask(next)); + +@@ -606,18 +606,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + cond_mitigation(tsk); + + /* +- * Stop remote flushes for the previous mm. +- * Skip kernel threads; we never send init_mm TLB flushing IPIs, +- * but the bitmap manipulation can cause cache line contention. ++ * Leave this CPU in prev's mm_cpumask. Atomic writes to ++ * mm_cpumask can be expensive under contention. The CPU ++ * will be removed lazily at TLB flush time. + */ +- if (prev != &init_mm) { +- VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, +- mm_cpumask(prev))); +- cpumask_clear_cpu(cpu, mm_cpumask(prev)); +- } ++ VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, ++ mm_cpumask(prev))); + + /* Start receiving IPIs and then read tlb_gen (and LAM below) */ +- if (next != &init_mm) ++ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) + cpumask_set_cpu(cpu, mm_cpumask(next)); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + +@@ -761,8 +758,11 @@ static void flush_tlb_func(void *info) + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + + /* Can only happen on remote CPUs */ +- if (f->mm && f->mm != loaded_mm) ++ if (f->mm && f->mm != loaded_mm) { ++ cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); ++ trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0); + return; ++ } + } + + if (unlikely(loaded_mm == &init_mm)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 852e6f315576..f6a6fc6a4f5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -12995,6 +13090,728 @@ index 852e6f315576..f6a6fc6a4f5c 100644 /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); +diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +index 3388604f222b..daa870302cc3 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +@@ -1257,42 +1257,18 @@ static int smu_sw_init(void *handle) + INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn); + atomic64_set(&smu->throttle_int_counter, 0); + smu->watermarks_bitmap = 0; +- smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; +- smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; +- smu->user_dpm_profile.user_workload_mask = 0; + + atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); + atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); + atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); + atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); + +- smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; +- smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; +- smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; +- smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3; +- smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4; +- smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; +- smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; +- + if (smu->is_apu || +- !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) { +- smu->driver_workload_mask = +- 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; +- } else { +- smu->driver_workload_mask = +- 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; +- smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; +- } +- +- smu->workload_mask = smu->driver_workload_mask | +- smu->user_dpm_profile.user_workload_mask; +- smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; +- smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; +- smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; +- smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO; +- smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR; +- smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE; +- smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM; ++ !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) ++ smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; ++ else ++ smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; ++ + smu->display_config = &adev->pm.pm_display_cfg; + + smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; +@@ -2232,24 +2208,23 @@ static int smu_enable_umd_pstate(void *handle, + } + + static int smu_bump_power_profile_mode(struct smu_context *smu, +- long *param, +- uint32_t param_size) ++ long *param, ++ uint32_t param_size, ++ bool enable) + { + int ret = 0; + + if (smu->ppt_funcs->set_power_profile_mode) +- ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size); ++ ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size, enable); + + return ret; + } + + static int smu_adjust_power_state_dynamic(struct smu_context *smu, + enum amd_dpm_forced_level level, +- bool skip_display_settings, +- bool init) ++ bool skip_display_settings) + { + int ret = 0; +- int index = 0; + long workload[1]; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + +@@ -2287,13 +2262,10 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, + } + + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && +- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { +- index = fls(smu->workload_mask); +- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; +- workload[0] = smu->workload_setting[index]; ++ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { ++ workload[0] = smu->power_profile_mode; + +- if (init || smu->power_profile_mode != workload[0]) +- smu_bump_power_profile_mode(smu, workload, 0); ++ smu_bump_power_profile_mode(smu, workload, 0, true); + } + + return ret; +@@ -2313,13 +2285,13 @@ static int smu_handle_task(struct smu_context *smu, + ret = smu_pre_display_config_changed(smu); + if (ret) + return ret; +- ret = smu_adjust_power_state_dynamic(smu, level, false, false); ++ ret = smu_adjust_power_state_dynamic(smu, level, false); + break; + case AMD_PP_TASK_COMPLETE_INIT: +- ret = smu_adjust_power_state_dynamic(smu, level, true, true); ++ ret = smu_adjust_power_state_dynamic(smu, level, true); + break; + case AMD_PP_TASK_READJUST_POWER_STATE: +- ret = smu_adjust_power_state_dynamic(smu, level, true, false); ++ ret = smu_adjust_power_state_dynamic(smu, level, true); + break; + default: + break; +@@ -2341,12 +2313,11 @@ static int smu_handle_dpm_task(void *handle, + + static int smu_switch_power_profile(void *handle, + enum PP_SMC_POWER_PROFILE type, +- bool en) ++ bool enable) + { + struct smu_context *smu = handle; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + long workload[1]; +- uint32_t index; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; +@@ -2354,24 +2325,15 @@ static int smu_switch_power_profile(void *handle, + if (!(type < PP_SMC_POWER_PROFILE_CUSTOM)) + return -EINVAL; + +- if (!en) { +- smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]); +- index = fls(smu->workload_mask); +- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; +- workload[0] = smu->workload_setting[index]; +- } else { +- smu->driver_workload_mask |= (1 << smu->workload_priority[type]); +- index = fls(smu->workload_mask); +- index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; +- workload[0] = smu->workload_setting[index]; +- } ++ /* don't disable the user's preference */ ++ if (!enable && type == smu->power_profile_mode) ++ return 0; + +- smu->workload_mask = smu->driver_workload_mask | +- smu->user_dpm_profile.user_workload_mask; ++ workload[0] = type; + + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && +- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) +- smu_bump_power_profile_mode(smu, workload, 0); ++ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) ++ smu_bump_power_profile_mode(smu, workload, 0, enable); + + return 0; + } +@@ -3069,21 +3031,25 @@ static int smu_set_power_profile_mode(void *handle, + uint32_t param_size) + { + struct smu_context *smu = handle; +- int ret; ++ long workload[1]; ++ int ret = 0; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || + !smu->ppt_funcs->set_power_profile_mode) + return -EOPNOTSUPP; + +- if (smu->user_dpm_profile.user_workload_mask & +- (1 << smu->workload_priority[param[param_size]])) +- return 0; +- +- smu->user_dpm_profile.user_workload_mask = +- (1 << smu->workload_priority[param[param_size]]); +- smu->workload_mask = smu->user_dpm_profile.user_workload_mask | +- smu->driver_workload_mask; +- ret = smu_bump_power_profile_mode(smu, param, param_size); ++ if (param[param_size] != smu->power_profile_mode) { ++ /* clear the old user preference */ ++ workload[0] = smu->power_profile_mode; ++ ret = smu_bump_power_profile_mode(smu, workload, 0, false); ++ if (ret) ++ return ret; ++ /* set the new user preference */ ++ ret = smu_bump_power_profile_mode(smu, param, param_size, true); ++ if (!ret) ++ /* store the user's preference */ ++ smu->power_profile_mode = param[param_size]; ++ } + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +index d60d9a12a47e..fc54b2c6ede8 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h ++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +@@ -240,7 +240,6 @@ struct smu_user_dpm_profile { + /* user clock state information */ + uint32_t clk_mask[SMU_CLK_COUNT]; + uint32_t clk_dependency; +- uint32_t user_workload_mask; + }; + + #define SMU_TABLE_INIT(tables, table_id, s, a, d) \ +@@ -557,12 +556,10 @@ struct smu_context { + uint32_t hard_min_uclk_req_from_dal; + bool disable_uclk_switch; + ++ /* backend specific workload mask */ + uint32_t workload_mask; +- uint32_t driver_workload_mask; +- uint32_t workload_priority[WORKLOAD_POLICY_MAX]; +- uint32_t workload_setting[WORKLOAD_POLICY_MAX]; ++ /* default/user workload preference */ + uint32_t power_profile_mode; +- uint32_t default_power_profile_mode; + bool pm_enabled; + bool is_apu; + +@@ -734,8 +731,10 @@ struct pptable_funcs { + * create/set custom power profile modes. + * &input: Power profile mode parameters. + * &size: Size of &input. ++ * &enable: enable/disable the profile + */ +- int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size); ++ int (*set_power_profile_mode)(struct smu_context *smu, long *input, ++ uint32_t size, bool enable); + + /** + * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +index 31fe512028f4..ac7fbb815644 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +@@ -1443,7 +1443,8 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, + + static int arcturus_set_power_profile_mode(struct smu_context *smu, + long *input, +- uint32_t size) ++ uint32_t size, ++ bool enable) + { + DpmActivityMonitorCoeffInt_t activity_monitor; + int workload_type = 0; +@@ -1455,8 +1456,9 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, + return -EINVAL; + } + +- if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && +- (smu->smc_fw_version >= 0x360d00)) { ++ if (enable && ++ (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && ++ (smu->smc_fw_version >= 0x360d00)) { + if (size != 10) + return -EINVAL; + +@@ -1520,18 +1522,18 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, + return -EINVAL; + } + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetWorkloadMask, + smu->workload_mask, + NULL); +- if (ret) { ++ if (ret) + dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); +- return ret; +- } +- +- smu_cmn_assign_power_profile(smu); + +- return 0; ++ return ret; + } + + static int arcturus_set_performance_level(struct smu_context *smu, +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +index 12223f507977..656df9fce471 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +@@ -2004,19 +2004,19 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) + return size; + } + +-static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) ++static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, ++ uint32_t size, bool enable) + { + DpmActivityMonitorCoeffInt_t activity_monitor; + int workload_type, ret = 0; ++ uint32_t profile_mode = input[size]; + +- smu->power_profile_mode = input[size]; +- +- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { +- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); ++ if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { ++ dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); + return -EINVAL; + } + +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { ++ if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size != 10) + return -EINVAL; + +@@ -2078,16 +2078,18 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, +- smu->power_profile_mode); ++ profile_mode); + if (workload_type < 0) + return -EINVAL; + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + smu->workload_mask, NULL); + if (ret) + dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); +- else +- smu_cmn_assign_power_profile(smu); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +index 3b7b2ec8319a..289cba0f741e 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +@@ -1706,22 +1706,23 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * + return size; + } + +-static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) ++static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, ++ long *input, uint32_t size, ++ bool enable) + { + + DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; + DpmActivityMonitorCoeffInt_t *activity_monitor = + &(activity_monitor_external.DpmActivityMonitorCoeffInt); ++ uint32_t profile_mode = input[size]; + int workload_type, ret = 0; + +- smu->power_profile_mode = input[size]; +- +- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { +- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); ++ if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { ++ dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); + return -EINVAL; + } + +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { ++ if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size != 10) + return -EINVAL; + +@@ -1783,16 +1784,18 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, +- smu->power_profile_mode); ++ profile_mode); + if (workload_type < 0) + return -EINVAL; + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + smu->workload_mask, NULL); + if (ret) + dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); +- else +- smu_cmn_assign_power_profile(smu); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +index 952ee22cbc90..a123ae7809ec 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +@@ -1054,7 +1054,8 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu, + return size; + } + +-static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) ++static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, ++ uint32_t size, bool enable) + { + int workload_type, ret; + uint32_t profile_mode = input[size]; +@@ -1065,7 +1066,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, + } + + if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT || +- profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) ++ profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) + return 0; + + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ +@@ -1078,18 +1079,18 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, + return -EINVAL; + } + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, + smu->workload_mask, + NULL); +- if (ret) { ++ if (ret) + dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", + workload_type); +- return ret; +- } +- +- smu_cmn_assign_power_profile(smu); + +- return 0; ++ return ret; + } + + static int vangogh_set_soft_freq_limited_range(struct smu_context *smu, +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +index 62316a6707ef..25779abc5447 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +@@ -862,7 +862,8 @@ static int renoir_force_clk_levels(struct smu_context *smu, + return ret; + } + +-static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) ++static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, ++ uint32_t size, bool enable) + { + int workload_type, ret; + uint32_t profile_mode = input[size]; +@@ -873,7 +874,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u + } + + if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT || +- profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) ++ profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) + return 0; + + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ +@@ -889,17 +890,17 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u + return -EINVAL; + } + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, + smu->workload_mask, + NULL); +- if (ret) { ++ if (ret) + dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); +- return ret; +- } + +- smu_cmn_assign_power_profile(smu); +- +- return 0; ++ return ret; + } + + static int renoir_set_peak_clock_by_device(struct smu_context *smu) +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +index 5dd7ceca64fe..6861267b68fb 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +@@ -2479,22 +2479,22 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu, + + static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + long *input, +- uint32_t size) ++ uint32_t size, ++ bool enable) + { + DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; + DpmActivityMonitorCoeffInt_t *activity_monitor = + &(activity_monitor_external.DpmActivityMonitorCoeffInt); ++ uint32_t profile_mode = input[size]; + int workload_type, ret = 0; + u32 workload_mask; + +- smu->power_profile_mode = input[size]; +- +- if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { +- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); ++ if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { ++ dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); + return -EINVAL; + } + +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { ++ if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size != 9) + return -EINVAL; + +@@ -2547,13 +2547,18 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, +- smu->power_profile_mode); ++ profile_mode); + + if (workload_type < 0) + return -EINVAL; + + workload_mask = 1 << workload_type; + ++ if (enable) ++ smu->workload_mask |= workload_mask; ++ else ++ smu->workload_mask &= ~workload_mask; ++ + /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ + if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && + ((smu->adev->pm.fw_version == 0x004e6601) || +@@ -2564,25 +2569,13 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_POWERSAVING); + if (workload_type >= 0) +- workload_mask |= 1 << workload_type; ++ smu->workload_mask |= 1 << workload_type; + } + +- smu->workload_mask |= workload_mask; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetWorkloadMask, + smu->workload_mask, + NULL); +- if (!ret) { +- smu_cmn_assign_power_profile(smu); +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) { +- workload_type = smu_cmn_to_asic_specific_index(smu, +- CMN2ASIC_MAPPING_WORKLOAD, +- PP_SMC_POWER_PROFILE_FULLSCREEN3D); +- smu->power_profile_mode = smu->workload_mask & (1 << workload_type) +- ? PP_SMC_POWER_PROFILE_FULLSCREEN3D +- : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; +- } +- } + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +index 9d0b19419de0..bf1f8e63e228 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +@@ -2434,22 +2434,23 @@ do { \ + return result; + } + +-static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) ++static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, ++ long *input, uint32_t size, ++ bool enable) + { + + DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; + DpmActivityMonitorCoeffInt_t *activity_monitor = + &(activity_monitor_external.DpmActivityMonitorCoeffInt); ++ uint32_t profile_mode = input[size]; + int workload_type, ret = 0; + +- smu->power_profile_mode = input[size]; +- +- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) { +- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); ++ if (profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) { ++ dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); + return -EINVAL; + } + +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { ++ if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size != 8) + return -EINVAL; + +@@ -2496,17 +2497,19 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, +- smu->power_profile_mode); ++ profile_mode); + if (workload_type < 0) + return -EINVAL; + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + smu->workload_mask, NULL); + + if (ret) + dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); +- else +- smu_cmn_assign_power_profile(smu); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +index 1aa13d32ceb2..e9c75caaebd7 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +@@ -1731,21 +1731,22 @@ static int smu_v14_0_2_get_power_profile_mode(struct smu_context *smu, + + static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, + long *input, +- uint32_t size) ++ uint32_t size, ++ bool enable) + { + DpmActivityMonitorCoeffIntExternal_t activity_monitor_external; + DpmActivityMonitorCoeffInt_t *activity_monitor = + &(activity_monitor_external.DpmActivityMonitorCoeffInt); ++ uint32_t profile_mode = input[size]; + int workload_type, ret = 0; + uint32_t current_profile_mode = smu->power_profile_mode; +- smu->power_profile_mode = input[size]; + +- if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { +- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); ++ if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { ++ dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); + return -EINVAL; + } + +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { ++ if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size != 9) + return -EINVAL; + +@@ -1795,7 +1796,7 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, + } + } + +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) ++ if (profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) + smu_v14_0_deep_sleep_control(smu, false); + else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) + smu_v14_0_deep_sleep_control(smu, true); +@@ -1803,15 +1804,16 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, +- smu->power_profile_mode); ++ profile_mode); + if (workload_type < 0) + return -EINVAL; + ++ if (enable) ++ smu->workload_mask |= (1 << workload_type); ++ else ++ smu->workload_mask &= ~(1 << workload_type); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, +- smu->workload_mask, NULL); +- +- if (!ret) +- smu_cmn_assign_power_profile(smu); ++ smu->workload_mask, NULL); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +index bdfc5e617333..91ad434bcdae 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +@@ -1138,14 +1138,6 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, + return ret; + } + +-void smu_cmn_assign_power_profile(struct smu_context *smu) +-{ +- uint32_t index; +- index = fls(smu->workload_mask); +- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; +- smu->power_profile_mode = smu->workload_setting[index]; +-} +- + bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) + { + struct pci_dev *p = NULL; +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +index 8a801e389659..1de685defe85 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +@@ -130,8 +130,6 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); + int smu_cmn_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state); + +-void smu_cmn_assign_power_profile(struct smu_context *smu); +- + /* + * Helper function to make sysfs_emit_at() happy. Align buf to + * the current page boundary and record the offset. diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 855beafb76ff..ad78059ee954 100644 --- a/drivers/gpu/drm/drm_edid.c @@ -13101,6 +13918,667 @@ index 62ba01525479..376047beea3d 100644 pr_warn("This is expected since this %s was built with a compiler that does not support __counted_by\n", lkdtm_kernel_info); else if (IS_ENABLED(CONFIG_UBSAN_BOUNDS)) +diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c +index 0763202d00c9..8d789b017fa9 100644 +--- a/fs/ntfs3/attrib.c ++++ b/fs/ntfs3/attrib.c +@@ -977,7 +977,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, + + /* Check for compressed frame. */ + err = attr_is_frame_compressed(ni, attr_b, vcn >> NTFS_LZNT_CUNIT, +- &hint); ++ &hint, run); + if (err) + goto out; + +@@ -1521,16 +1521,16 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, + * attr_is_frame_compressed - Used to detect compressed frame. + * + * attr - base (primary) attribute segment. ++ * run - run to use, usually == &ni->file.run. + * Only base segments contains valid 'attr->nres.c_unit' + */ + int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, +- CLST frame, CLST *clst_data) ++ CLST frame, CLST *clst_data, struct runs_tree *run) + { + int err; + u32 clst_frame; + CLST clen, lcn, vcn, alen, slen, vcn_next; + size_t idx; +- struct runs_tree *run; + + *clst_data = 0; + +@@ -1542,7 +1542,6 @@ int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, + + clst_frame = 1u << attr->nres.c_unit; + vcn = frame * clst_frame; +- run = &ni->file.run; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { + err = attr_load_runs_vcn(ni, attr->type, attr_name(attr), +@@ -1678,7 +1677,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, + if (err) + goto out; + +- err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data); ++ err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data, run); + if (err) + goto out; + +diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c +index cf4fe21a5039..04107b950717 100644 +--- a/fs/ntfs3/bitmap.c ++++ b/fs/ntfs3/bitmap.c +@@ -710,20 +710,17 @@ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) + { + int err = 0; + struct super_block *sb = wnd->sb; +- size_t bits0 = bits; + u32 wbits = 8 * sb->s_blocksize; + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbit = bit & (wbits - 1); + struct buffer_head *bh; ++ u32 op; + +- while (iw < wnd->nwnd && bits) { +- u32 tail, op; +- ++ for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { + if (iw + 1 == wnd->nwnd) + wbits = wnd->bits_last; + +- tail = wbits - wbit; +- op = min_t(u32, tail, bits); ++ op = min_t(u32, wbits - wbit, bits); + + bh = wnd_map(wnd, iw); + if (IS_ERR(bh)) { +@@ -736,20 +733,15 @@ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) + ntfs_bitmap_clear_le(bh->b_data, wbit, op); + + wnd->free_bits[iw] += op; ++ wnd->total_zeroes += op; + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + put_bh(bh); + +- wnd->total_zeroes += op; +- bits -= op; +- wbit = 0; +- iw += 1; ++ wnd_add_free_ext(wnd, bit, op, false); + } +- +- wnd_add_free_ext(wnd, bit, bits0, false); +- + return err; + } + +@@ -760,20 +752,17 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) + { + int err = 0; + struct super_block *sb = wnd->sb; +- size_t bits0 = bits; + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbits = 8 * sb->s_blocksize; + u32 wbit = bit & (wbits - 1); + struct buffer_head *bh; ++ u32 op; + +- while (iw < wnd->nwnd && bits) { +- u32 tail, op; +- ++ for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { + if (unlikely(iw + 1 == wnd->nwnd)) + wbits = wnd->bits_last; + +- tail = wbits - wbit; +- op = min_t(u32, tail, bits); ++ op = min_t(u32, wbits - wbit, bits); + + bh = wnd_map(wnd, iw); + if (IS_ERR(bh)) { +@@ -785,21 +774,16 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) + + ntfs_bitmap_set_le(bh->b_data, wbit, op); + wnd->free_bits[iw] -= op; ++ wnd->total_zeroes -= op; + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + put_bh(bh); + +- wnd->total_zeroes -= op; +- bits -= op; +- wbit = 0; +- iw += 1; ++ if (!RB_EMPTY_ROOT(&wnd->start_tree)) ++ wnd_remove_free_ext(wnd, bit, op); + } +- +- if (!RB_EMPTY_ROOT(&wnd->start_tree)) +- wnd_remove_free_ext(wnd, bit, bits0); +- + return err; + } + +@@ -852,15 +836,13 @@ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbits = 8 * sb->s_blocksize; + u32 wbit = bit & (wbits - 1); ++ u32 op; + +- while (iw < wnd->nwnd && bits) { +- u32 tail, op; +- ++ for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { + if (unlikely(iw + 1 == wnd->nwnd)) + wbits = wnd->bits_last; + +- tail = wbits - wbit; +- op = min_t(u32, tail, bits); ++ op = min_t(u32, wbits - wbit, bits); + + if (wbits != wnd->free_bits[iw]) { + bool ret; +@@ -875,10 +857,6 @@ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) + if (!ret) + return false; + } +- +- bits -= op; +- wbit = 0; +- iw += 1; + } + + return true; +@@ -928,6 +906,7 @@ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbits = 8 * sb->s_blocksize; + u32 wbit = bit & (wbits - 1); ++ u32 op; + size_t end; + struct rb_node *n; + struct e_node *e; +@@ -945,14 +924,11 @@ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) + return false; + + use_wnd: +- while (iw < wnd->nwnd && bits) { +- u32 tail, op; +- ++ for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { + if (unlikely(iw + 1 == wnd->nwnd)) + wbits = wnd->bits_last; + +- tail = wbits - wbit; +- op = min_t(u32, tail, bits); ++ op = min_t(u32, wbits - wbit, bits); + + if (wnd->free_bits[iw]) { + bool ret; +@@ -966,10 +942,6 @@ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) + if (!ret) + goto out; + } +- +- bits -= op; +- wbit = 0; +- iw += 1; + } + ret = true; + +diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c +index e370eaf9bfe2..3f96a11804c9 100644 +--- a/fs/ntfs3/file.c ++++ b/fs/ntfs3/file.c +@@ -182,13 +182,15 @@ static int ntfs_extend_initialized_size(struct file *file, + loff_t pos = valid; + int err; + ++ if (valid >= new_valid) ++ return 0; ++ + if (is_resident(ni)) { + ni->i_valid = new_valid; + return 0; + } + + WARN_ON(is_compressed(ni)); +- WARN_ON(valid >= new_valid); + + for (;;) { + u32 zerofrom, len; +@@ -222,7 +224,7 @@ static int ntfs_extend_initialized_size(struct file *file, + if (err) + goto out; + +- folio_zero_range(folio, zerofrom, folio_size(folio)); ++ folio_zero_range(folio, zerofrom, folio_size(folio) - zerofrom); + + err = ntfs_write_end(file, mapping, pos, len, len, folio, NULL); + if (err < 0) +@@ -987,6 +989,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + u64 frame_vbo; + pgoff_t index; + bool frame_uptodate; ++ struct folio *folio; + + if (frame_size < PAGE_SIZE) { + /* +@@ -1041,8 +1044,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + if (err) { + for (ip = 0; ip < pages_per_frame; ip++) { + page = pages[ip]; +- unlock_page(page); +- put_page(page); ++ folio = page_folio(page); ++ folio_unlock(folio); ++ folio_put(folio); + } + goto out; + } +@@ -1052,9 +1056,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + off = offset_in_page(valid); + for (; ip < pages_per_frame; ip++, off = 0) { + page = pages[ip]; ++ folio = page_folio(page); + zero_user_segment(page, off, PAGE_SIZE); + flush_dcache_page(page); +- SetPageUptodate(page); ++ folio_mark_uptodate(folio); + } + + ni_lock(ni); +@@ -1063,9 +1068,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + + for (ip = 0; ip < pages_per_frame; ip++) { + page = pages[ip]; +- SetPageUptodate(page); +- unlock_page(page); +- put_page(page); ++ folio = page_folio(page); ++ folio_mark_uptodate(folio); ++ folio_unlock(folio); ++ folio_put(folio); + } + + if (err) +@@ -1107,8 +1113,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + for (ip = 0; ip < pages_per_frame; + ip++) { + page = pages[ip]; +- unlock_page(page); +- put_page(page); ++ folio = page_folio(page); ++ folio_unlock(folio); ++ folio_put(folio); + } + goto out; + } +@@ -1149,9 +1156,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + for (ip = 0; ip < pages_per_frame; ip++) { + page = pages[ip]; + ClearPageDirty(page); +- SetPageUptodate(page); +- unlock_page(page); +- put_page(page); ++ folio = page_folio(page); ++ folio_mark_uptodate(folio); ++ folio_unlock(folio); ++ folio_put(folio); + } + + if (err) +diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c +index 41c7ffad2790..8b39d0ce5f28 100644 +--- a/fs/ntfs3/frecord.c ++++ b/fs/ntfs3/frecord.c +@@ -1900,46 +1900,6 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, + return REPARSE_LINK; + } + +-/* +- * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent +- * but it uses 'fe_k' instead of fieinfo->fi_extents_start +- */ +-static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo, +- struct fiemap_extent *fe_k, u64 logical, +- u64 phys, u64 len, u32 flags) +-{ +- struct fiemap_extent extent; +- +- /* only count the extents */ +- if (fieinfo->fi_extents_max == 0) { +- fieinfo->fi_extents_mapped++; +- return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; +- } +- +- if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) +- return 1; +- +- if (flags & FIEMAP_EXTENT_DELALLOC) +- flags |= FIEMAP_EXTENT_UNKNOWN; +- if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED) +- flags |= FIEMAP_EXTENT_ENCODED; +- if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE)) +- flags |= FIEMAP_EXTENT_NOT_ALIGNED; +- +- memset(&extent, 0, sizeof(extent)); +- extent.fe_logical = logical; +- extent.fe_physical = phys; +- extent.fe_length = len; +- extent.fe_flags = flags; +- +- memcpy(fe_k + fieinfo->fi_extents_mapped, &extent, sizeof(extent)); +- +- fieinfo->fi_extents_mapped++; +- if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) +- return 1; +- return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; +-} +- + /* + * ni_fiemap - Helper for file_fiemap(). + * +@@ -1950,11 +1910,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + __u64 vbo, __u64 len) + { + int err = 0; +- struct fiemap_extent *fe_k = NULL; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; +- struct runs_tree *run; +- struct rw_semaphore *run_lock; ++ struct runs_tree run; + struct ATTRIB *attr; + CLST vcn = vbo >> cluster_bits; + CLST lcn, clen; +@@ -1965,13 +1923,11 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + u32 flags; + bool ok; + ++ run_init(&run); + if (S_ISDIR(ni->vfs_inode.i_mode)) { +- run = &ni->dir.alloc_run; + attr = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, I30_NAME, + ARRAY_SIZE(I30_NAME), NULL, NULL); +- run_lock = &ni->dir.run_lock; + } else { +- run = &ni->file.run; + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, + NULL); + if (!attr) { +@@ -1986,7 +1942,6 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + "fiemap is not supported for compressed file (cp -r)"); + goto out; + } +- run_lock = &ni->file.run_lock; + } + + if (!attr || !attr->non_res) { +@@ -1998,51 +1953,32 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + goto out; + } + +- /* +- * To avoid lock problems replace pointer to user memory by pointer to kernel memory. +- */ +- fe_k = kmalloc_array(fieinfo->fi_extents_max, +- sizeof(struct fiemap_extent), +- GFP_NOFS | __GFP_ZERO); +- if (!fe_k) { +- err = -ENOMEM; +- goto out; +- } +- + end = vbo + len; + alloc_size = le64_to_cpu(attr->nres.alloc_size); + if (end > alloc_size) + end = alloc_size; + +- down_read(run_lock); +- + while (vbo < end) { + if (idx == -1) { +- ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); ++ ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); + } else { + CLST vcn_next = vcn; + +- ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && ++ ok = run_get_entry(&run, ++idx, &vcn, &lcn, &clen) && + vcn == vcn_next; + if (!ok) + vcn = vcn_next; + } + + if (!ok) { +- up_read(run_lock); +- down_write(run_lock); +- + err = attr_load_runs_vcn(ni, attr->type, + attr_name(attr), +- attr->name_len, run, vcn); +- +- up_write(run_lock); +- down_read(run_lock); ++ attr->name_len, &run, vcn); + + if (err) + break; + +- ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); ++ ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); + + if (!ok) { + err = -EINVAL; +@@ -2067,8 +2003,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + } else if (is_attr_compressed(attr)) { + CLST clst_data; + +- err = attr_is_frame_compressed( +- ni, attr, vcn >> attr->nres.c_unit, &clst_data); ++ err = attr_is_frame_compressed(ni, attr, ++ vcn >> attr->nres.c_unit, ++ &clst_data, &run); + if (err) + break; + if (clst_data < NTFS_LZNT_CLUSTERS) +@@ -2097,8 +2034,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + if (vbo + dlen >= end) + flags |= FIEMAP_EXTENT_LAST; + +- err = fiemap_fill_next_extent_k(fieinfo, fe_k, vbo, lbo, +- dlen, flags); ++ err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, ++ flags); + + if (err < 0) + break; +@@ -2119,8 +2056,7 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + if (vbo + bytes >= end) + flags |= FIEMAP_EXTENT_LAST; + +- err = fiemap_fill_next_extent_k(fieinfo, fe_k, vbo, lbo, bytes, +- flags); ++ err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); + if (err < 0) + break; + if (err == 1) { +@@ -2131,19 +2067,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + vbo += bytes; + } + +- up_read(run_lock); +- +- /* +- * Copy to user memory out of lock +- */ +- if (copy_to_user(fieinfo->fi_extents_start, fe_k, +- fieinfo->fi_extents_max * +- sizeof(struct fiemap_extent))) { +- err = -EFAULT; +- } +- + out: +- kfree(fe_k); ++ run_close(&run); + return err; + } + +@@ -2672,7 +2597,8 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, + down_write(&ni->file.run_lock); + run_truncate_around(run, le64_to_cpu(attr->nres.svcn)); + frame = frame_vbo >> (cluster_bits + NTFS_LZNT_CUNIT); +- err = attr_is_frame_compressed(ni, attr, frame, &clst_data); ++ err = attr_is_frame_compressed(ni, attr, frame, &clst_data, ++ run); + up_write(&ni->file.run_lock); + if (err) + goto out1; +diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c +index 0fa636038b4e..03471bc9371c 100644 +--- a/fs/ntfs3/fsntfs.c ++++ b/fs/ntfs3/fsntfs.c +@@ -2699,4 +2699,4 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) + out: + __putname(uni); + return err; +-} +\ No newline at end of file ++} +diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h +index 26e1e1379c04..cd8e8374bb5a 100644 +--- a/fs/ntfs3/ntfs_fs.h ++++ b/fs/ntfs3/ntfs_fs.h +@@ -446,7 +446,8 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, + struct runs_tree *run, u64 frame, u64 frames, + u8 frame_bits, u32 *ondisk_size, u64 *vbo_data); + int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, +- CLST frame, CLST *clst_data); ++ CLST frame, CLST *clst_data, ++ struct runs_tree *run); + int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, + u64 new_valid); + int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes); +diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c +index f810f0419d25..61d53d39f3b9 100644 +--- a/fs/ntfs3/record.c ++++ b/fs/ntfs3/record.c +@@ -212,7 +212,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) + return NULL; + + if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 || +- !IS_ALIGNED(off, 4)) { ++ !IS_ALIGNED(off, 8)) { + return NULL; + } + +@@ -236,8 +236,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) + off += asize; + } + +- /* Can we use the first field (attr->type). */ +- /* NOTE: this code also checks attr->size availability. */ ++ /* ++ * Can we use the first fields: ++ * attr->type, ++ * attr->size ++ */ + if (off + 8 > used) { + static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8); + return NULL; +@@ -259,10 +262,17 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) + + asize = le32_to_cpu(attr->size); + ++ if (!IS_ALIGNED(asize, 8)) ++ return NULL; ++ + /* Check overflow and boundary. */ + if (off + asize < off || off + asize > used) + return NULL; + ++ /* Can we use the field attr->non_res. */ ++ if (off + 9 > used) ++ return NULL; ++ + /* Check size of attribute. */ + if (!attr->non_res) { + /* Check resident fields. */ +diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c +index 58e988cd8049..6e86d66197ef 100644 +--- a/fs/ntfs3/run.c ++++ b/fs/ntfs3/run.c +@@ -1055,8 +1055,8 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + { + int ret, err; + CLST next_vcn, lcn, len; +- size_t index; +- bool ok; ++ size_t index, done; ++ bool ok, zone; + struct wnd_bitmap *wnd; + + ret = run_unpack(run, sbi, ino, svcn, evcn, vcn, run_buf, run_buf_size); +@@ -1087,8 +1087,9 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + continue; + + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); ++ zone = max(wnd->zone_bit, lcn) < min(wnd->zone_end, lcn + len); + /* Check for free blocks. */ +- ok = wnd_is_used(wnd, lcn, len); ++ ok = !zone && wnd_is_used(wnd, lcn, len); + up_read(&wnd->rw_lock); + if (ok) + continue; +@@ -1096,14 +1097,33 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + /* Looks like volume is corrupted. */ + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + +- if (down_write_trylock(&wnd->rw_lock)) { +- /* Mark all zero bits as used in range [lcn, lcn+len). */ +- size_t done; +- err = wnd_set_used_safe(wnd, lcn, len, &done); +- up_write(&wnd->rw_lock); +- if (err) +- return err; ++ if (!down_write_trylock(&wnd->rw_lock)) ++ continue; ++ ++ if (zone) { ++ /* ++ * Range [lcn, lcn + len) intersects with zone. ++ * To avoid complex with zone just turn it off. ++ */ ++ wnd_zone_set(wnd, 0, 0); ++ } ++ ++ /* Mark all zero bits as used in range [lcn, lcn+len). */ ++ err = wnd_set_used_safe(wnd, lcn, len, &done); ++ if (zone) { ++ /* Restore zone. Lock mft run. */ ++ struct rw_semaphore *lock = ++ is_mounted(sbi) ? &sbi->mft.ni->file.run_lock : ++ NULL; ++ if (lock) ++ down_read(lock); ++ ntfs_refresh_zone(sbi); ++ if (lock) ++ up_read(lock); + } ++ up_write(&wnd->rw_lock); ++ if (err) ++ return err; + } + + return ret; diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 32284cd26d52..c16d4199bf92 100644 --- a/include/linux/compiler_attributes.h @@ -13155,6 +14633,18 @@ index 1a957ea2f4fe..639be0f30b45 100644 /* * Apply __counted_by() when the Endianness matches to increase test coverage. */ +diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h +index 6e3bdf8e38bc..6b6f05404304 100644 +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -1335,6 +1335,7 @@ enum tlb_flush_reason { + TLB_LOCAL_SHOOTDOWN, + TLB_LOCAL_MM_SHOOTDOWN, + TLB_REMOTE_SEND_IPI, ++ TLB_REMOTE_WRONG_CPU, + NR_TLB_FLUSH_REASONS, + }; + diff --git a/init/Kconfig b/init/Kconfig index 38dbd16da6a9..504e8a7c4e2a 100644 --- a/init/Kconfig @@ -13174,6 +14664,83 @@ index 38dbd16da6a9..504e8a7c4e2a 100644 config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 719e0ed1e976..b35752fdbcc0 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3734,28 +3734,38 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, + */ + static int ttwu_runnable(struct task_struct *p, int wake_flags) + { +- struct rq_flags rf; +- struct rq *rq; +- int ret = 0; ++ CLASS(__task_rq_lock, rq_guard)(p); ++ struct rq *rq = rq_guard.rq; + +- rq = __task_rq_lock(p, &rf); +- if (task_on_rq_queued(p)) { +- update_rq_clock(rq); +- if (p->se.sched_delayed) +- enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED); +- if (!task_on_cpu(rq, p)) { +- /* +- * When on_rq && !on_cpu the task is preempted, see if +- * it should preempt the task that is current now. +- */ +- wakeup_preempt(rq, p, wake_flags); ++ if (!task_on_rq_queued(p)) ++ return 0; ++ ++ update_rq_clock(rq); ++ if (p->se.sched_delayed) { ++ int queue_flags = ENQUEUE_DELAYED | ENQUEUE_NOCLOCK; ++ ++ /* ++ * Since sched_delayed means we cannot be current anywhere, ++ * dequeue it here and have it fall through to the ++ * select_task_rq() case further along the ttwu() path. ++ */ ++ if (rq->nr_running > 1 && p->nr_cpus_allowed > 1) { ++ dequeue_task(rq, p, DEQUEUE_SLEEP | queue_flags); ++ return 0; + } +- ttwu_do_wakeup(p); +- ret = 1; ++ ++ enqueue_task(rq, p, queue_flags); + } +- __task_rq_unlock(rq, &rf); ++ if (!task_on_cpu(rq, p)) { ++ /* ++ * When on_rq && !on_cpu the task is preempted, see if ++ * it should preempt the task that is current now. ++ */ ++ wakeup_preempt(rq, p, wake_flags); ++ } ++ ttwu_do_wakeup(p); + +- return ret; ++ return 1; + } + + #ifdef CONFIG_SMP +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index f610df2e0811..e7fbb1d0f316 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1779,6 +1779,11 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) + raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); + } + ++DEFINE_LOCK_GUARD_1(__task_rq_lock, struct task_struct, ++ _T->rq = __task_rq_lock(_T->lock, &_T->rf), ++ __task_rq_unlock(_T->rq, &_T->rf), ++ struct rq *rq; struct rq_flags rf) ++ + DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct, + _T->rq = task_rq_lock(_T->lock, &_T->rf), + task_rq_unlock(_T->rq, _T->lock, &_T->rf), diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 2abc78367dd1..5222c6393f11 100644 --- a/lib/overflow_kunit.c @@ -13187,82 +14754,6 @@ index 2abc78367dd1..5222c6393f11 100644 int expected_raw_size = sizeof(struct foo); #else int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16); -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 2fb328880b50..a1d345f1680c 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -3718,8 +3718,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, - struct deferred_split *ds_queue = &pgdata->deferred_split_queue; - unsigned long flags; - LIST_HEAD(list); -- struct folio *folio, *next; -- int split = 0; -+ struct folio *folio, *next, *prev = NULL; -+ int split = 0, removed = 0; - - #ifdef CONFIG_MEMCG - if (sc->memcg) -@@ -3775,15 +3775,28 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, - */ - if (!did_split && !folio_test_partially_mapped(folio)) { - list_del_init(&folio->_deferred_list); -- ds_queue->split_queue_len--; -+ removed++; -+ } else { -+ /* -+ * That unlocked list_del_init() above would be unsafe, -+ * unless its folio is separated from any earlier folios -+ * left on the list (which may be concurrently unqueued) -+ * by one safe folio with refcount still raised. -+ */ -+ swap(folio, prev); - } -- folio_put(folio); -+ if (folio) -+ folio_put(folio); - } - - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); - list_splice_tail(&list, &ds_queue->split_queue); -+ ds_queue->split_queue_len -= removed; - spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); - -+ if (prev) -+ folio_put(prev); -+ - /* - * Stop shrinker if we didn't split any page, but the queue is empty. - * This can happen if pages were freed under us. -diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 7845c64a2c57..2703227cce88 100644 ---- a/mm/memcontrol.c -+++ b/mm/memcontrol.c -@@ -4631,8 +4631,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - VM_BUG_ON_FOLIO(folio_order(folio) > 1 && - !folio_test_hugetlb(folio) && -- !list_empty(&folio->_deferred_list) && -- folio_test_partially_mapped(folio), folio); -+ !list_empty(&folio->_deferred_list), folio); - - /* - * Nobody should be changing or seriously looking at -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 43f8f7290bf0..e6e51d4a6f3e 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -965,9 +965,8 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) - break; - case 2: - /* the second tail page: deferred_list overlaps ->mapping */ -- if (unlikely(!list_empty(&folio->_deferred_list) && -- folio_test_partially_mapped(folio))) { -- bad_page(page, "partially mapped folio on deferred list"); -+ if (unlikely(!list_empty(&folio->_deferred_list))) { -+ bad_page(page, "on deferred list"); - goto out; - } - break; diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index f83493838cf9..4010899652b8 100644 --- a/scripts/package/PKGBUILD @@ -13282,9 +14773,9 @@ index f83493838cf9..4010899652b8 100644 -- 2.47.0 -From 09b968fac790022322af9311ce41ac43e80a1d89 Mon Sep 17 00:00:00 2001 +From 9001aa3709fdcb60967ed205910b873f14eed07b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:49:08 +0100 +Date: Mon, 11 Nov 2024 09:20:32 +0100 Subject: [PATCH 08/13] ksm Signed-off-by: Peter Jung @@ -13715,9 +15206,9 @@ index 01071182763e..7394bad8178e 100644 -- 2.47.0 -From ef72e883f6dfa297aa8690dd76f1365a31cde616 Mon Sep 17 00:00:00 2001 +From fe2e45be58ad904ff3ab40356c10d73e057c18fd Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:49:23 +0100 +Date: Mon, 11 Nov 2024 09:20:44 +0100 Subject: [PATCH 09/13] ntsync Signed-off-by: Peter Jung @@ -14157,10 +15648,10 @@ index 000000000000..767844637a7d + ``objs`` and in ``alert``. If this is attempted, the function fails + with ``EINVAL``. diff --git a/MAINTAINERS b/MAINTAINERS -index 919f01186c11..6113837d502a 100644 +index 3d4709c29704..3ca514d82269 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -16500,6 +16500,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git +@@ -16501,6 +16501,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git F: Documentation/filesystems/ntfs3.rst F: fs/ntfs3/ @@ -16804,9 +18295,9 @@ index 000000000000..5fa2c9a0768c -- 2.47.0 -From a4bf3a3e048257500b1ffca9827570c7dfd10aff Mon Sep 17 00:00:00 2001 +From b71dfd054ca2932d63b75136a3191c01e74a374c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:49:50 +0100 +Date: Mon, 11 Nov 2024 09:20:57 +0100 Subject: [PATCH 10/13] openvpn-dco Signed-off-by: Peter Jung @@ -17278,10 +18769,10 @@ index 000000000000..79339c25d607 + - + name: peers diff --git a/MAINTAINERS b/MAINTAINERS -index 6113837d502a..271b59a9c585 100644 +index 3ca514d82269..f509050e63ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -17361,6 +17361,17 @@ F: arch/openrisc/ +@@ -17362,6 +17362,17 @@ F: arch/openrisc/ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* @@ -26754,20 +28245,20 @@ index 000000000000..32f14bd9347a -- 2.47.0 -From f3a2622d161f49a41f8a5cd4861c1d533f338c3b Mon Sep 17 00:00:00 2001 +From df5fd664d8ea01bad5af3ff4c6575bda0917383c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 15:05:02 +0100 +Date: Mon, 11 Nov 2024 09:22:05 +0100 Subject: [PATCH 11/13] perf-per-core Signed-off-by: Peter Jung --- Documentation/arch/x86/topology.rst | 4 + - arch/x86/events/rapl.c | 412 ++++++++++++++++++-------- + arch/x86/events/rapl.c | 408 ++++++++++++++++++-------- arch/x86/include/asm/processor.h | 1 + arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/debugfs.c | 1 + arch/x86/kernel/cpu/topology_common.c | 1 + - 6 files changed, 292 insertions(+), 128 deletions(-) + 6 files changed, 288 insertions(+), 128 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 7352ab89a55a..c12837e61bda 100644 @@ -26785,7 +28276,7 @@ index 7352ab89a55a..c12837e61bda 100644 System topology examples diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c -index a481a939862e..b91d194ba51b 100644 +index a481a939862e..6b405bf46781 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -39,6 +39,10 @@ @@ -26840,18 +28331,7 @@ index a481a939862e..b91d194ba51b 100644 /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved -@@ -116,6 +131,10 @@ static struct perf_pmu_events_attr event_attr_##v = { \ - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - -+#define rapl_pmu_is_pkg_scope() \ -+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ -+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) -+ - struct rapl_pmu { - raw_spinlock_t lock; - int n_active; -@@ -128,8 +147,9 @@ struct rapl_pmu { +@@ -128,8 +143,9 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; @@ -26862,7 +28342,7 @@ index a481a939862e..b91d194ba51b 100644 }; enum rapl_unit_quirk { -@@ -139,19 +159,22 @@ enum rapl_unit_quirk { +@@ -139,19 +155,22 @@ enum rapl_unit_quirk { }; struct rapl_model { @@ -26892,7 +28372,7 @@ index a481a939862e..b91d194ba51b 100644 /* * Helper functions to get the correct topology macros according to the -@@ -177,7 +200,8 @@ static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +@@ -177,7 +196,8 @@ static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map. */ @@ -26902,7 +28382,7 @@ index a481a939862e..b91d194ba51b 100644 } static inline u64 rapl_read_counter(struct perf_event *event) -@@ -189,7 +213,7 @@ static inline u64 rapl_read_counter(struct perf_event *event) +@@ -189,7 +209,7 @@ static inline u64 rapl_read_counter(struct perf_event *event) static inline u64 rapl_scale(u64 v, int cfg) { @@ -26911,7 +28391,7 @@ index a481a939862e..b91d194ba51b 100644 pr_warn("Invalid domain %d, failed to scale data\n", cfg); return v; } -@@ -241,34 +265,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) +@@ -241,34 +261,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { @@ -26955,7 +28435,7 @@ index a481a939862e..b91d194ba51b 100644 struct perf_event *event) { if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) -@@ -276,39 +300,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, +@@ -276,39 +296,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, event->hw.state = 0; @@ -27009,7 +28489,7 @@ index a481a939862e..b91d194ba51b 100644 list_del(&event->active_entry); -@@ -326,23 +350,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) +@@ -326,23 +346,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } @@ -27038,7 +28518,7 @@ index a481a939862e..b91d194ba51b 100644 return 0; } -@@ -356,10 +380,14 @@ static int rapl_pmu_event_init(struct perf_event *event) +@@ -356,10 +376,14 @@ static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; int bit, ret = 0; @@ -27055,7 +28535,7 @@ index a481a939862e..b91d194ba51b 100644 return -ENOENT; /* check only supported bits are set */ -@@ -369,16 +397,18 @@ static int rapl_pmu_event_init(struct perf_event *event) +@@ -369,16 +393,18 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; @@ -27078,7 +28558,7 @@ index a481a939862e..b91d194ba51b 100644 return -EINVAL; /* unsupported modes and filters */ -@@ -386,12 +416,18 @@ static int rapl_pmu_event_init(struct perf_event *event) +@@ -386,12 +412,18 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; /* must be done before validate_group */ @@ -27102,7 +28582,7 @@ index a481a939862e..b91d194ba51b 100644 event->hw.config = cfg; event->hw.idx = bit; -@@ -406,7 +442,7 @@ static void rapl_pmu_event_read(struct perf_event *event) +@@ -406,7 +438,7 @@ static void rapl_pmu_event_read(struct perf_event *event) static ssize_t rapl_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { @@ -27111,7 +28591,7 @@ index a481a939862e..b91d194ba51b 100644 } static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); -@@ -420,17 +456,38 @@ static struct attribute_group rapl_pmu_attr_group = { +@@ -420,17 +452,38 @@ static struct attribute_group rapl_pmu_attr_group = { .attrs = rapl_pmu_attrs, }; @@ -27150,7 +28630,7 @@ index a481a939862e..b91d194ba51b 100644 /* * we compute in 0.23 nJ increments regardless of MSR -@@ -440,6 +497,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 +@@ -440,6 +493,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); @@ -27158,7 +28638,7 @@ index a481a939862e..b91d194ba51b 100644 /* * There are no default events, but we need to create -@@ -473,6 +531,13 @@ static const struct attribute_group *rapl_attr_groups[] = { +@@ -473,6 +527,13 @@ static const struct attribute_group *rapl_attr_groups[] = { NULL, }; @@ -27172,7 +28652,7 @@ index a481a939862e..b91d194ba51b 100644 static struct attribute *rapl_events_cores[] = { EVENT_PTR(rapl_cores), EVENT_PTR(rapl_cores_unit), -@@ -533,6 +598,18 @@ static struct attribute_group rapl_events_psys_group = { +@@ -533,6 +594,18 @@ static struct attribute_group rapl_events_psys_group = { .attrs = rapl_events_psys, }; @@ -27191,7 +28671,7 @@ index a481a939862e..b91d194ba51b 100644 static bool test_msr(int idx, void *data) { return test_bit(idx, (unsigned long *) data); -@@ -558,11 +635,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { +@@ -558,11 +631,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { }; /* @@ -27206,7 +28686,7 @@ index a481a939862e..b91d194ba51b 100644 [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, -@@ -570,77 +647,104 @@ static struct perf_msr amd_rapl_msrs[] = { +@@ -570,77 +643,104 @@ static struct perf_msr amd_rapl_msrs[] = { [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; @@ -27342,26 +28822,26 @@ index a481a939862e..b91d194ba51b 100644 /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See -@@ -679,22 +783,29 @@ static void __init rapl_advertise(void) +@@ -679,22 +779,29 @@ static void __init rapl_advertise(void) int i; pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), rapl_timer_ms); + hweight32(rapl_pkg_cntr_mask) + hweight32(rapl_core_cntr_mask), rapl_timer_ms); - -- for (i = 0; i < NR_RAPL_DOMAINS; i++) { -- if (rapl_cntr_mask & (1 << i)) { ++ + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) { + if (rapl_pkg_cntr_mask & (1 << i)) { - pr_info("hw unit of domain %s 2^-%d Joules\n", -- rapl_domain_names[i], rapl_hw_unit[i]); ++ pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_pkg_domain_names[i], rapl_hw_unit[i]); + } + } -+ + +- for (i = 0; i < NR_RAPL_DOMAINS; i++) { +- if (rapl_cntr_mask & (1 << i)) { + for (i = 0; i < NR_RAPL_CORE_DOMAINS; i++) { + if (rapl_core_cntr_mask & (1 << i)) { -+ pr_info("hw unit of domain %s 2^-%d Joules\n", + pr_info("hw unit of domain %s 2^-%d Joules\n", +- rapl_domain_names[i], rapl_hw_unit[i]); + rapl_core_domain_names[i], rapl_hw_unit[i]); } } @@ -27378,7 +28858,7 @@ index a481a939862e..b91d194ba51b 100644 kfree(rapl_pmus); } -@@ -707,14 +818,17 @@ static const struct attribute_group *rapl_attr_update[] = { +@@ -707,14 +814,17 @@ static const struct attribute_group *rapl_attr_update[] = { NULL, }; @@ -27402,7 +28882,7 @@ index a481a939862e..b91d194ba51b 100644 if (!rapl_pmus) return -ENOMEM; -@@ -730,75 +844,80 @@ static int __init init_rapl_pmus(void) +@@ -730,75 +840,80 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.read = rapl_pmu_event_read; rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; @@ -27499,7 +28979,7 @@ index a481a939862e..b91d194ba51b 100644 }; static const struct x86_cpu_id rapl_model_match[] __initconst = { -@@ -854,28 +973,47 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); +@@ -854,28 +969,47 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; @@ -27516,15 +28996,15 @@ index a481a939862e..b91d194ba51b 100644 return -ENODEV; - rm = (struct rapl_model *) id->driver_data; -- -- rapl_msrs = rm->rapl_msrs; + rapl_model = (struct rapl_model *) id->driver_data; -- rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, -- false, (void *) &rm->events); +- rapl_msrs = rm->rapl_msrs; + rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX, + false, (void *) &rapl_model->pkg_events); +- rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, +- false, (void *) &rm->events); +- - ret = rapl_check_hw_unit(rm); + ret = rapl_check_hw_unit(); if (ret) @@ -27555,7 +29035,7 @@ index a481a939862e..b91d194ba51b 100644 /* * Install callbacks. Core will call them for each online cpu. */ -@@ -885,10 +1023,24 @@ static int __init rapl_pmu_init(void) +@@ -885,10 +1019,24 @@ static int __init rapl_pmu_init(void) if (ret) goto out; @@ -27581,7 +29061,7 @@ index a481a939862e..b91d194ba51b 100644 rapl_advertise(); return 0; -@@ -896,7 +1048,7 @@ static int __init rapl_pmu_init(void) +@@ -896,7 +1044,7 @@ static int __init rapl_pmu_init(void) cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); @@ -27590,7 +29070,7 @@ index a481a939862e..b91d194ba51b 100644 return ret; } module_init(rapl_pmu_init); -@@ -904,7 +1056,11 @@ module_init(rapl_pmu_init); +@@ -904,7 +1052,11 @@ module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); @@ -27617,7 +29097,7 @@ index c0975815980c..cfd8a5591421 100644 // AMD Node ID and Nodes per Package info u32 amd_node_id; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h -index 9f9376db64e3..1c55229efb1e 100644 +index fd41103ad342..3973cb9bb2e6 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,6 +143,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); @@ -27655,9 +29135,9 @@ index 8277c64f88db..b5a5e1411469 100644 -- 2.47.0 -From ec1991f4667217d8f154c08c365cedb721bcf4eb Mon Sep 17 00:00:00 2001 +From 063003cdcfa118a0e75173a1c02094c2978bc532 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:52:03 +0100 +Date: Mon, 11 Nov 2024 09:22:19 +0100 Subject: [PATCH 12/13] t2 Signed-off-by: Peter Jung @@ -27813,10 +29293,10 @@ index 14e093da3ccd..ccd7bd29a6d6 100644 ---- diff --git a/MAINTAINERS b/MAINTAINERS -index 271b59a9c585..50764f9b3bb9 100644 +index f509050e63ed..a3bbf3d5fb9e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -7011,6 +7011,12 @@ S: Supported +@@ -7013,6 +7013,12 @@ S: Supported T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/sun4i/sun8i* @@ -37904,9 +39384,9 @@ index 4427572b2477..b60c99d61882 100755 -- 2.47.0 -From 42c5adef2d2bb73955bbbbee8b713a87fdd025f8 Mon Sep 17 00:00:00 2001 +From 126ef40989e28bba3ff5a4bb41333942de1c9dbf Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 4 Nov 2024 14:52:12 +0100 +Date: Mon, 11 Nov 2024 09:22:31 +0100 Subject: [PATCH 13/13] zstd Signed-off-by: Peter Jung @@ -56555,4 +58035,3 @@ index 469fc3059be0..0ae819f0c927 100644 -- 2.47.0 -