From a1a480f0e572761cf589415d3ca382485fbd0d79 Mon Sep 17 00:00:00 2001 From: ferreo Date: Fri, 18 Oct 2024 17:03:54 +0200 Subject: [PATCH] Update patches/0001-cachyos-base-all.patch --- patches/0001-cachyos-base-all.patch | 2655 +++++++++++++++++++++++++-- 1 file changed, 2483 insertions(+), 172 deletions(-) diff --git a/patches/0001-cachyos-base-all.patch b/patches/0001-cachyos-base-all.patch index f2dbe5c..ec70352 100644 --- a/patches/0001-cachyos-base-all.patch +++ b/patches/0001-cachyos-base-all.patch @@ -1,7 +1,7 @@ -From 1dba870e7ad0f4c52b9ffcd2aa68dd731a8f0761 Mon Sep 17 00:00:00 2001 +From d19107dae94b5d3d63245e58e7af1e849285cf7b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:36:17 +0200 -Subject: [PATCH 01/11] address-masking +Date: Thu, 17 Oct 2024 16:02:20 +0200 +Subject: [PATCH 01/14] address-masking Signed-off-by: Peter Jung --- @@ -110,10 +110,2342 @@ index feeb935a2299..6e489f9e90f1 100644 -- 2.47.0.rc0 -From 158906b885fd180a24f22a91b7ce6bebccd4237e Mon Sep 17 00:00:00 2001 +From 00a3bac7b52e5a69c2b26d3eb6d78c65b4b63f83 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:36:51 +0200 -Subject: [PATCH 02/11] bbr3 +Date: Thu, 17 Oct 2024 16:02:32 +0200 +Subject: [PATCH 02/14] amd-cache-optimizer + +Signed-off-by: Peter Jung +--- + .../sysfs-bus-platform-drivers-amd_x3d_vcache | 14 ++ + MAINTAINERS | 8 + + drivers/platform/x86/amd/Kconfig | 12 ++ + drivers/platform/x86/amd/Makefile | 2 + + drivers/platform/x86/amd/x3d_vcache.c | 193 ++++++++++++++++++ + 5 files changed, 229 insertions(+) + create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache + create mode 100644 drivers/platform/x86/amd/x3d_vcache.c + +diff --git a/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache +new file mode 100644 +index 000000000000..1aa6ed0c10d9 +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache +@@ -0,0 +1,14 @@ ++What: /sys/bus/platform/drivers/amd_x3d_vcache/AMDI0101\:00/amd_x3d_mode ++Date: October 2024 ++KernelVersion: 6.13 ++Contact: Basavaraj Natikar ++Description: (RW) AMD 3D V-Cache optimizer allows users to switch CPU core ++ rankings dynamically. ++ ++ This file switches between these two modes: ++ - "frequency" cores within the faster CCD are prioritized before ++ those in the slower CCD. ++ - "cache" cores within the larger L3 CCD are prioritized before ++ those in the smaller L3 CCD. ++ ++ Format: %s. +diff --git a/MAINTAINERS b/MAINTAINERS +index cc40a9d9b8cd..2ba00c0cd701 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -948,6 +948,14 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/ + F: drivers/infiniband/hw/efa/ + F: include/uapi/rdma/efa-abi.h + ++AMD 3D V-CACHE PERFORMANCE OPTIMIZER DRIVER ++M: Basavaraj Natikar ++R: Mario Limonciello ++L: platform-driver-x86@vger.kernel.org ++S: Supported ++F: Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache ++F: drivers/platform/x86/amd/x3d_vcache.c ++ + AMD ADDRESS TRANSLATION LIBRARY (ATL) + M: Yazen Ghannam + L: linux-edac@vger.kernel.org +diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig +index f88682d36447..d73f691020d0 100644 +--- a/drivers/platform/x86/amd/Kconfig ++++ b/drivers/platform/x86/amd/Kconfig +@@ -6,6 +6,18 @@ + source "drivers/platform/x86/amd/pmf/Kconfig" + source "drivers/platform/x86/amd/pmc/Kconfig" + ++config AMD_3D_VCACHE ++ tristate "AMD 3D V-Cache Performance Optimizer Driver" ++ depends on X86_64 && ACPI ++ help ++ The driver provides a sysfs interface, enabling the setting of a bias ++ that alters CPU core reordering. This bias prefers cores with higher ++ frequencies or larger L3 caches on processors supporting AMD 3D V-Cache ++ technology. ++ ++ If you choose to compile this driver as a module the module will be ++ called amd_3d_vcache. ++ + config AMD_HSMP + tristate "AMD HSMP Driver" + depends on AMD_NB && X86_64 && ACPI +diff --git a/drivers/platform/x86/amd/Makefile b/drivers/platform/x86/amd/Makefile +index dcec0a46f8af..16e4cce02242 100644 +--- a/drivers/platform/x86/amd/Makefile ++++ b/drivers/platform/x86/amd/Makefile +@@ -4,6 +4,8 @@ + # AMD x86 Platform-Specific Drivers + # + ++obj-$(CONFIG_AMD_3D_VCACHE) += amd_3d_vcache.o ++amd_3d_vcache-objs := x3d_vcache.o + obj-$(CONFIG_AMD_PMC) += pmc/ + amd_hsmp-y := hsmp.o + obj-$(CONFIG_AMD_HSMP) += amd_hsmp.o +diff --git a/drivers/platform/x86/amd/x3d_vcache.c b/drivers/platform/x86/amd/x3d_vcache.c +new file mode 100644 +index 000000000000..679613d02b9a +--- /dev/null ++++ b/drivers/platform/x86/amd/x3d_vcache.c +@@ -0,0 +1,193 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * AMD 3D V-Cache Performance Optimizer Driver ++ * ++ * Copyright (c) 2024, Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Authors: Basavaraj Natikar ++ * Perry Yuan ++ * Mario Limonciello ++ * ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static char *x3d_mode = "frequency"; ++module_param(x3d_mode, charp, 0444); ++MODULE_PARM_DESC(x3d_mode, "Initial 3D-VCache mode; 'frequency' (default) or 'cache'"); ++ ++#define DSM_REVISION_ID 0 ++#define DSM_GET_FUNCS_SUPPORTED 0 ++#define DSM_SET_X3D_MODE 1 ++ ++static guid_t x3d_guid = GUID_INIT(0xdff8e55f, 0xbcfd, 0x46fb, 0xba, 0x0a, ++ 0xef, 0xd0, 0x45, 0x0f, 0x34, 0xee); ++ ++enum amd_x3d_mode_type { ++ MODE_INDEX_FREQ, ++ MODE_INDEX_CACHE, ++}; ++ ++static const char * const amd_x3d_mode_strings[] = { ++ [MODE_INDEX_FREQ] = "frequency", ++ [MODE_INDEX_CACHE] = "cache", ++}; ++ ++struct amd_x3d_dev { ++ struct device *dev; ++ acpi_handle ahandle; ++ /* To protect x3d mode setting */ ++ struct mutex lock; ++ enum amd_x3d_mode_type curr_mode; ++}; ++ ++static int amd_x3d_mode_switch(struct amd_x3d_dev *data, int new_state) ++{ ++ union acpi_object *out, argv; ++ ++ guard(mutex)(&data->lock); ++ argv.type = ACPI_TYPE_INTEGER; ++ argv.integer.value = new_state; ++ ++ out = acpi_evaluate_dsm(data->ahandle, &x3d_guid, DSM_REVISION_ID, DSM_SET_X3D_MODE, ++ &argv); ++ if (!out) { ++ dev_err(data->dev, "failed to evaluate _DSM\n"); ++ return -EINVAL; ++ } ++ ++ data->curr_mode = new_state; ++ ++ ACPI_FREE(out); ++ ++ return 0; ++} ++ ++static ssize_t amd_x3d_mode_store(struct device *dev, struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct amd_x3d_dev *data = dev_get_drvdata(dev); ++ int ret; ++ ++ ret = sysfs_match_string(amd_x3d_mode_strings, buf); ++ if (ret < 0) { ++ dev_err(dev, "no matching mode to set %s\n", buf); ++ return ret; ++ } ++ ++ ret = amd_x3d_mode_switch(data, ret); ++ ++ return ret ? ret : count; ++} ++ ++static ssize_t amd_x3d_mode_show(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ struct amd_x3d_dev *data = dev_get_drvdata(dev); ++ ++ if (data->curr_mode > MODE_INDEX_CACHE || data->curr_mode < MODE_INDEX_FREQ) ++ return -EINVAL; ++ ++ return sysfs_emit(buf, "%s\n", amd_x3d_mode_strings[data->curr_mode]); ++} ++static DEVICE_ATTR_RW(amd_x3d_mode); ++ ++static struct attribute *amd_x3d_attrs[] = { ++ &dev_attr_amd_x3d_mode.attr, ++ NULL ++}; ++ATTRIBUTE_GROUPS(amd_x3d); ++ ++static int amd_x3d_supported(struct amd_x3d_dev *data) ++{ ++ union acpi_object *out; ++ ++ out = acpi_evaluate_dsm(data->ahandle, &x3d_guid, DSM_REVISION_ID, ++ DSM_GET_FUNCS_SUPPORTED, NULL); ++ if (!out) { ++ dev_err(data->dev, "failed to evaluate _DSM\n"); ++ return -ENODEV; ++ } ++ ++ if (out->type != ACPI_TYPE_BUFFER) { ++ dev_err(data->dev, "invalid type %d\n", out->type); ++ ACPI_FREE(out); ++ return -EINVAL; ++ } ++ ++ ACPI_FREE(out); ++ return 0; ++} ++ ++static const struct acpi_device_id amd_x3d_acpi_ids[] = { ++ {"AMDI0101"}, ++ { }, ++}; ++MODULE_DEVICE_TABLE(acpi, amd_x3d_acpi_ids); ++ ++static void amd_x3d_remove(void *context) ++{ ++ struct amd_x3d_dev *data = context; ++ ++ mutex_destroy(&data->lock); ++} ++ ++static int amd_x3d_probe(struct platform_device *pdev) ++{ ++ const struct acpi_device_id *id; ++ struct amd_x3d_dev *data; ++ acpi_handle handle; ++ int ret; ++ ++ handle = ACPI_HANDLE(&pdev->dev); ++ if (!handle) ++ return -ENODEV; ++ ++ id = acpi_match_device(amd_x3d_acpi_ids, &pdev->dev); ++ if (!id) ++ dev_err_probe(&pdev->dev, -ENODEV, "unable to match ACPI ID and data\n"); ++ ++ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ data->dev = &pdev->dev; ++ data->ahandle = handle; ++ platform_set_drvdata(pdev, data); ++ ++ ret = amd_x3d_supported(data); ++ if (ret) ++ dev_err_probe(&pdev->dev, ret, "not supported on this platform\n"); ++ ++ ret = match_string(amd_x3d_mode_strings, ARRAY_SIZE(amd_x3d_mode_strings), x3d_mode); ++ if (ret < 0) ++ return dev_err_probe(&pdev->dev, -EINVAL, "invalid mode %s\n", x3d_mode); ++ ++ mutex_init(&data->lock); ++ ++ ret = amd_x3d_mode_switch(data, ret); ++ if (ret < 0) ++ return ret; ++ ++ return devm_add_action_or_reset(&pdev->dev, amd_x3d_remove, data); ++} ++ ++static struct platform_driver amd_3d_vcache_driver = { ++ .driver = { ++ .name = "amd_x3d_vcache", ++ .dev_groups = amd_x3d_groups, ++ .acpi_match_table = amd_x3d_acpi_ids, ++ }, ++ .probe = amd_x3d_probe, ++}; ++module_platform_driver(amd_3d_vcache_driver); ++ ++MODULE_DESCRIPTION("AMD 3D V-Cache Performance Optimizer Driver"); ++MODULE_LICENSE("GPL"); +-- +2.47.0.rc0 + +From 943001c18b149dc43e5bf57f4556c184e8122b48 Mon Sep 17 00:00:00 2001 +From: Peter Jung +Date: Thu, 17 Oct 2024 16:02:42 +0200 +Subject: [PATCH 03/14] amd-pstate + +Signed-off-by: Peter Jung +--- + Documentation/admin-guide/pm/amd-pstate.rst | 15 +- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/processor.h | 16 +- + arch/x86/kernel/acpi/cppc.c | 202 ++++++++++++++- + arch/x86/kernel/cpu/amd.c | 45 ++-- + arch/x86/kernel/cpu/scattered.c | 1 + + drivers/cpufreq/acpi-cpufreq.c | 12 +- + drivers/cpufreq/amd-pstate.c | 263 +++++++------------- + include/acpi/cppc_acpi.h | 41 ++- + 9 files changed, 372 insertions(+), 224 deletions(-) + +diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst +index d0324d44f548..210a808b74ec 100644 +--- a/Documentation/admin-guide/pm/amd-pstate.rst ++++ b/Documentation/admin-guide/pm/amd-pstate.rst +@@ -251,7 +251,9 @@ performance supported in `AMD CPPC Performance Capability `_). + In some ASICs, the highest CPPC performance is not the one in the ``_CPC`` + table, so we need to expose it to sysfs. If boost is not active, but + still supported, this maximum frequency will be larger than the one in +-``cpuinfo``. ++``cpuinfo``. On systems that support preferred core, the driver will have ++different values for some cores than others and this will reflect the values ++advertised by the platform at bootup. + This attribute is read-only. + + ``amd_pstate_lowest_nonlinear_freq`` +@@ -262,6 +264,17 @@ lowest non-linear performance in `AMD CPPC Performance Capability + `_.) + This attribute is read-only. + ++``amd_pstate_hw_prefcore`` ++ ++Whether the platform supports the preferred core feature and it has been ++enabled. This attribute is read-only. ++ ++``amd_pstate_prefcore_ranking`` ++ ++The performance ranking of the core. This number doesn't have any unit, but ++larger numbers are preferred at the time of reading. This can change at ++runtime based on platform conditions. This attribute is read-only. ++ + ``energy_performance_available_preferences`` + + A list of all the supported EPP preferences that could be used for +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index dd4682857c12..cea1ed82aeb4 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -473,6 +473,7 @@ + #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ + #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ + #define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ ++#define X86_FEATURE_HETERO_CORE_TOPOLOGY (21*32 + 6) /* Heterogeneous Core Topology */ + + /* + * BUG word(s) +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index a75a07f4931f..279edfd36fed 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -690,9 +690,15 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu) + return per_cpu(cpu_info.topo.l2c_id, cpu); + } + +-#ifdef CONFIG_CPU_SUP_AMD +-extern u32 amd_get_highest_perf(void); ++/* defined by CPUID_Fn80000026_EBX BIT [31:28] */ ++enum amd_core_type { ++ CPU_CORE_TYPE_NO_HETERO_SUP = -1, ++ CPU_CORE_TYPE_PERFORMANCE = 0, ++ CPU_CORE_TYPE_EFFICIENCY = 1, ++ CPU_CORE_TYPE_UNDEFINED = 2, ++}; + ++#ifdef CONFIG_CPU_SUP_AMD + /* + * Issue a DIV 0/1 insn to clear any division data from previous DIV + * operations. +@@ -704,10 +710,14 @@ static __always_inline void amd_clear_divider(void) + } + + extern void amd_check_microcode(void); ++extern enum amd_core_type amd_get_core_type(void); + #else +-static inline u32 amd_get_highest_perf(void) { return 0; } + static inline void amd_clear_divider(void) { } + static inline void amd_check_microcode(void) { } ++static inline enum amd_core_type amd_get_core_type(void) ++{ ++ return CPU_CORE_TYPE_NO_HETERO_SUP; ++} + #endif + + extern unsigned long arch_align_stack(unsigned long sp); +diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c +index ff8f25faca3d..ca289e6ec82c 100644 +--- a/arch/x86/kernel/acpi/cppc.c ++++ b/arch/x86/kernel/acpi/cppc.c +@@ -9,6 +9,17 @@ + #include + #include + ++#define CPPC_HIGHEST_PERF_PERFORMANCE 196 ++#define CPPC_HIGHEST_PERF_PREFCORE 166 ++ ++enum amd_pref_core { ++ AMD_PREF_CORE_UNKNOWN = 0, ++ AMD_PREF_CORE_SUPPORTED, ++ AMD_PREF_CORE_UNSUPPORTED, ++}; ++static enum amd_pref_core amd_pref_core_detected; ++static u64 boost_numerator; ++ + /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ + + bool cpc_supported_by_cpu(void) +@@ -69,31 +80,30 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) + static void amd_set_max_freq_ratio(void) + { + struct cppc_perf_caps perf_caps; +- u64 highest_perf, nominal_perf; ++ u64 numerator, nominal_perf; + u64 perf_ratio; + int rc; + + rc = cppc_get_perf_caps(0, &perf_caps); + if (rc) { +- pr_debug("Could not retrieve perf counters (%d)\n", rc); ++ pr_warn("Could not retrieve perf counters (%d)\n", rc); + return; + } + +- highest_perf = amd_get_highest_perf(); ++ rc = amd_get_boost_ratio_numerator(0, &numerator); ++ if (rc) { ++ pr_warn("Could not retrieve highest performance (%d)\n", rc); ++ return; ++ } + nominal_perf = perf_caps.nominal_perf; + +- if (!highest_perf || !nominal_perf) { +- pr_debug("Could not retrieve highest or nominal performance\n"); ++ if (!nominal_perf) { ++ pr_warn("Could not retrieve nominal performance\n"); + return; + } + +- perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); + /* midpoint between max_boost and max_P */ +- perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; +- if (!perf_ratio) { +- pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); +- return; +- } ++ perf_ratio = (div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf) + SCHED_CAPACITY_SCALE) >> 1; + + freq_invariance_set_perf_ratio(perf_ratio, false); + } +@@ -116,3 +126,173 @@ void init_freq_invariance_cppc(void) + init_done = true; + mutex_unlock(&freq_invariance_lock); + } ++ ++/* ++ * Get the highest performance register value. ++ * @cpu: CPU from which to get highest performance. ++ * @highest_perf: Return address for highest performance value. ++ * ++ * Return: 0 for success, negative error code otherwise. ++ */ ++int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) ++{ ++ u64 val; ++ int ret; ++ ++ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ++ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val); ++ if (ret) ++ goto out; ++ ++ val = AMD_CPPC_HIGHEST_PERF(val); ++ } else { ++ ret = cppc_get_highest_perf(cpu, &val); ++ if (ret) ++ goto out; ++ } ++ ++ WRITE_ONCE(*highest_perf, (u32)val); ++out: ++ return ret; ++} ++EXPORT_SYMBOL_GPL(amd_get_highest_perf); ++ ++/** ++ * amd_detect_prefcore: Detect if CPUs in the system support preferred cores ++ * @detected: Output variable for the result of the detection. ++ * ++ * Determine whether CPUs in the system support preferred cores. On systems ++ * that support preferred cores, different highest perf values will be found ++ * on different cores. On other systems, the highest perf value will be the ++ * same on all cores. ++ * ++ * The result of the detection will be stored in the 'detected' parameter. ++ * ++ * Return: 0 for success, negative error code otherwise ++ */ ++int amd_detect_prefcore(bool *detected) ++{ ++ int cpu, count = 0; ++ u64 highest_perf[2] = {0}; ++ ++ if (WARN_ON(!detected)) ++ return -EINVAL; ++ ++ switch (amd_pref_core_detected) { ++ case AMD_PREF_CORE_SUPPORTED: ++ *detected = true; ++ return 0; ++ case AMD_PREF_CORE_UNSUPPORTED: ++ *detected = false; ++ return 0; ++ default: ++ break; ++ } ++ ++ for_each_present_cpu(cpu) { ++ u32 tmp; ++ int ret; ++ ++ ret = amd_get_highest_perf(cpu, &tmp); ++ if (ret) ++ return ret; ++ ++ if (!count || (count == 1 && tmp != highest_perf[0])) ++ highest_perf[count++] = tmp; ++ ++ if (count == 2) ++ break; ++ } ++ ++ *detected = (count == 2); ++ boost_numerator = highest_perf[0]; ++ ++ amd_pref_core_detected = *detected ? AMD_PREF_CORE_SUPPORTED : ++ AMD_PREF_CORE_UNSUPPORTED; ++ ++ pr_debug("AMD CPPC preferred core is %ssupported (highest perf: 0x%llx)\n", ++ *detected ? "" : "un", highest_perf[0]); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(amd_detect_prefcore); ++ ++static void amd_do_get_core_type(void *data) ++{ ++ enum amd_core_type *core_type = data; ++ *core_type = amd_get_core_type(); ++} ++ ++/** ++ * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation ++ * @cpu: CPU to get numerator for. ++ * @numerator: Output variable for numerator. ++ * ++ * Determine the numerator to use for calculating the boost ratio on ++ * a CPU. On systems that support preferred cores, this will be a hardcoded ++ * value. On other systems this will the highest performance register value. ++ * ++ * If booting the system with amd-pstate enabled but preferred cores disabled then ++ * the correct boost numerator will be returned to match hardware capabilities ++ * even if the preferred cores scheduling hints are not enabled. ++ * ++ * Return: 0 for success, negative error code otherwise. ++ */ ++int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) ++{ ++ enum amd_core_type core_type; ++ bool prefcore; ++ u32 tmp; ++ int ret; ++ ++ ret = amd_detect_prefcore(&prefcore); ++ if (ret) ++ return ret; ++ ++ /* without preferred cores, return the highest perf register value */ ++ if (!prefcore) { ++ *numerator = boost_numerator; ++ return 0; ++ } ++ ++ /* ++ * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, ++ * the highest performance level is set to 196. ++ * https://bugzilla.kernel.org/show_bug.cgi?id=218759 ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_ZEN4)) { ++ switch (boot_cpu_data.x86_model) { ++ case 0x70 ... 0x7f: ++ *numerator = CPPC_HIGHEST_PERF_PERFORMANCE; ++ return 0; ++ default: ++ break; ++ } ++ } ++ ++ /* detect if running on heterogeneous design */ ++ smp_call_function_single(cpu, amd_do_get_core_type, &core_type, 1); ++ switch (core_type) { ++ case CPU_CORE_TYPE_NO_HETERO_SUP: ++ break; ++ case CPU_CORE_TYPE_PERFORMANCE: ++ /* use the max scale for performance cores */ ++ *numerator = CPPC_HIGHEST_PERF_PERFORMANCE; ++ return 0; ++ case CPU_CORE_TYPE_EFFICIENCY: ++ /* use the highest perf value for efficiency cores */ ++ ret = amd_get_highest_perf(cpu, &tmp); ++ if (ret) ++ return ret; ++ *numerator = tmp; ++ return 0; ++ default: ++ pr_warn("WARNING: Undefined core type %d found\n", core_type); ++ break; ++ } ++ ++ *numerator = CPPC_HIGHEST_PERF_PREFCORE; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(amd_get_boost_ratio_numerator); +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 1e0fe5f8ab84..8ad5f1385f0e 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -1190,22 +1190,6 @@ unsigned long amd_get_dr_addr_mask(unsigned int dr) + } + EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask); + +-u32 amd_get_highest_perf(void) +-{ +- struct cpuinfo_x86 *c = &boot_cpu_data; +- +- if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || +- (c->x86_model >= 0x70 && c->x86_model < 0x80))) +- return 166; +- +- if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || +- (c->x86_model >= 0x40 && c->x86_model < 0x70))) +- return 166; +- +- return 255; +-} +-EXPORT_SYMBOL_GPL(amd_get_highest_perf); +- + static void zenbleed_check_cpu(void *unused) + { + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); +@@ -1220,3 +1204,32 @@ void amd_check_microcode(void) + + on_each_cpu(zenbleed_check_cpu, NULL, 1); + } ++ ++/** ++ * amd_get_core_type - Heterogeneous core type identification ++ * ++ * Returns the CPU type [31:28] (i.e., performance or efficient) of ++ * a CPU in the processor. ++ * ++ * If the processor has no core type support, returns ++ * CPU_CORE_TYPE_NO_HETERO_SUP. ++ */ ++enum amd_core_type amd_get_core_type(void) ++{ ++ struct { ++ u32 num_processors :16, ++ power_efficiency_ranking :8, ++ native_model_id :4, ++ core_type :4; ++ } props; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY)) ++ return CPU_CORE_TYPE_NO_HETERO_SUP; ++ ++ cpuid_leaf_reg(0x80000026, CPUID_EBX, &props); ++ if (props.core_type >= CPU_CORE_TYPE_UNDEFINED) ++ return CPU_CORE_TYPE_UNDEFINED; ++ ++ return props.core_type; ++} ++EXPORT_SYMBOL_GPL(amd_get_core_type); +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index c84c30188fdf..3bba55323163 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -52,6 +52,7 @@ static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, ++ { X86_FEATURE_HETERO_CORE_TOPOLOGY, CPUID_EAX, 30, 0x80000026, 0 }, + { 0, 0, 0, 0, 0 } + }; + +diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c +index a8ca625a98b8..0f04feb6cafa 100644 +--- a/drivers/cpufreq/acpi-cpufreq.c ++++ b/drivers/cpufreq/acpi-cpufreq.c +@@ -642,10 +642,16 @@ static u64 get_max_boost_ratio(unsigned int cpu) + return 0; + } + +- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) +- highest_perf = amd_get_highest_perf(); +- else ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { ++ ret = amd_get_boost_ratio_numerator(cpu, &highest_perf); ++ if (ret) { ++ pr_debug("CPU%d: Unable to get boost ratio numerator (%d)\n", ++ cpu, ret); ++ return 0; ++ } ++ } else { + highest_perf = perf_caps.highest_perf; ++ } + + nominal_perf = perf_caps.nominal_perf; + +diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c +index 589fde37ccd7..41965c49ec96 100644 +--- a/drivers/cpufreq/amd-pstate.c ++++ b/drivers/cpufreq/amd-pstate.c +@@ -52,8 +52,6 @@ + #define AMD_PSTATE_TRANSITION_LATENCY 20000 + #define AMD_PSTATE_TRANSITION_DELAY 1000 + #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 +-#define CPPC_HIGHEST_PERF_PERFORMANCE 196 +-#define CPPC_HIGHEST_PERF_DEFAULT 166 + + #define AMD_CPPC_EPP_PERFORMANCE 0x00 + #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 +@@ -239,7 +237,7 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) + return index; + } + +-static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, ++static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, bool fast_switch) + { + if (fast_switch) +@@ -249,7 +247,7 @@ static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + READ_ONCE(cpudata->cppc_req_cached)); + } + +-DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); ++DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); + + static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, +@@ -312,7 +310,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, + return ret; + } + +-static inline int pstate_enable(bool enable) ++static inline int msr_enable(bool enable) + { + int ret, cpu; + unsigned long logical_proc_id_mask = 0; +@@ -338,7 +336,7 @@ static inline int pstate_enable(bool enable) + return 0; + } + +-static int cppc_enable(bool enable) ++static int shmem_enable(bool enable) + { + int cpu, ret = 0; + struct cppc_perf_ctrls perf_ctrls; +@@ -365,50 +363,24 @@ static int cppc_enable(bool enable) + return ret; + } + +-DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); ++DEFINE_STATIC_CALL(amd_pstate_enable, msr_enable); + + static inline int amd_pstate_enable(bool enable) + { + return static_call(amd_pstate_enable)(enable); + } + +-static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata) +-{ +- struct cpuinfo_x86 *c = &cpu_data(0); +- +- /* +- * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, +- * the highest performance level is set to 196. +- * https://bugzilla.kernel.org/show_bug.cgi?id=218759 +- */ +- if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f)) +- return CPPC_HIGHEST_PERF_PERFORMANCE; +- +- return CPPC_HIGHEST_PERF_DEFAULT; +-} +- +-static int pstate_init_perf(struct amd_cpudata *cpudata) ++static int msr_init_perf(struct amd_cpudata *cpudata) + { + u64 cap1; +- u32 highest_perf; + + int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, + &cap1); + if (ret) + return ret; + +- /* For platforms that do not support the preferred core feature, the +- * highest_pef may be configured with 166 or 255, to avoid max frequency +- * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as +- * the default max perf. +- */ +- if (cpudata->hw_prefcore) +- highest_perf = amd_pstate_highest_perf_set(cpudata); +- else +- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); +- +- WRITE_ONCE(cpudata->highest_perf, highest_perf); +- WRITE_ONCE(cpudata->max_limit_perf, highest_perf); ++ WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); ++ WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); + WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); + WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); +@@ -417,22 +389,16 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) + return 0; + } + +-static int cppc_init_perf(struct amd_cpudata *cpudata) ++static int shmem_init_perf(struct amd_cpudata *cpudata) + { + struct cppc_perf_caps cppc_perf; +- u32 highest_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + +- if (cpudata->hw_prefcore) +- highest_perf = amd_pstate_highest_perf_set(cpudata); +- else +- highest_perf = cppc_perf.highest_perf; +- +- WRITE_ONCE(cpudata->highest_perf, highest_perf); +- WRITE_ONCE(cpudata->max_limit_perf, highest_perf); ++ WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf); ++ WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf); + WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); + WRITE_ONCE(cpudata->lowest_nonlinear_perf, + cppc_perf.lowest_nonlinear_perf); +@@ -458,14 +424,14 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) + return ret; + } + +-DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); ++DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); + + static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) + { + return static_call(amd_pstate_init_perf)(cpudata); + } + +-static void cppc_update_perf(struct amd_cpudata *cpudata, ++static void shmem_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, + u32 max_perf, bool fast_switch) + { +@@ -565,20 +531,44 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, + cpufreq_cpu_put(policy); + } + +-static int amd_pstate_verify(struct cpufreq_policy_data *policy) ++static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) + { +- cpufreq_verify_within_cpu_limits(policy); ++ /* ++ * Initialize lower frequency limit (i.e.policy->min) with ++ * lowest_nonlinear_frequency which is the most energy efficient ++ * frequency. Override the initial value set by cpufreq core and ++ * amd-pstate qos_requests. ++ */ ++ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { ++ struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); ++ struct amd_cpudata *cpudata; ++ ++ if (!policy) ++ return -EINVAL; ++ ++ cpudata = policy->driver_data; ++ policy_data->min = cpudata->lowest_nonlinear_freq; ++ cpufreq_cpu_put(policy); ++ } ++ ++ cpufreq_verify_within_cpu_limits(policy_data); ++ pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); + + return 0; + } + + static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) + { +- u32 max_limit_perf, min_limit_perf, lowest_perf; ++ u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf; + struct amd_cpudata *cpudata = policy->driver_data; + +- max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); +- min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); ++ if (cpudata->boost_supported && !policy->boost_enabled) ++ max_perf = READ_ONCE(cpudata->nominal_perf); ++ else ++ max_perf = READ_ONCE(cpudata->highest_perf); ++ ++ max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq); ++ min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq); + + lowest_perf = READ_ONCE(cpudata->lowest_perf); + if (min_limit_perf < lowest_perf) +@@ -659,12 +649,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, + unsigned long max_perf, min_perf, des_perf, + cap_perf, lowest_nonlinear_perf; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); +- struct amd_cpudata *cpudata; +- +- if (!policy) +- return; +- +- cpudata = policy->driver_data; ++ struct amd_cpudata *cpudata = policy->driver_data; + + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); +@@ -698,34 +683,12 @@ static void amd_pstate_adjust_perf(unsigned int cpu, + static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) + { + struct amd_cpudata *cpudata = policy->driver_data; +- struct cppc_perf_ctrls perf_ctrls; +- u32 highest_perf, nominal_perf, nominal_freq, max_freq; ++ u32 nominal_freq, max_freq; + int ret = 0; + +- highest_perf = READ_ONCE(cpudata->highest_perf); +- nominal_perf = READ_ONCE(cpudata->nominal_perf); + nominal_freq = READ_ONCE(cpudata->nominal_freq); + max_freq = READ_ONCE(cpudata->max_freq); + +- if (boot_cpu_has(X86_FEATURE_CPPC)) { +- u64 value = READ_ONCE(cpudata->cppc_req_cached); +- +- value &= ~GENMASK_ULL(7, 0); +- value |= on ? highest_perf : nominal_perf; +- WRITE_ONCE(cpudata->cppc_req_cached, value); +- +- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); +- } else { +- perf_ctrls.max_perf = on ? highest_perf : nominal_perf; +- ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); +- if (ret) { +- cpufreq_cpu_release(policy); +- pr_debug("Failed to set max perf on CPU:%d. ret:%d\n", +- cpudata->cpu, ret); +- return ret; +- } +- } +- + if (on) + policy->cpuinfo.max_freq = max_freq; + else if (policy->cpuinfo.max_freq > nominal_freq * 1000) +@@ -811,66 +774,22 @@ static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) + } + static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +-/* +- * Get the highest performance register value. +- * @cpu: CPU from which to get highest performance. +- * @highest_perf: Return address. +- * +- * Return: 0 for success, -EIO otherwise. +- */ +-static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +-{ +- int ret; +- +- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { +- u64 cap1; +- +- ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); +- if (ret) +- return ret; +- WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); +- } else { +- u64 cppc_highest_perf; +- +- ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); +- if (ret) +- return ret; +- WRITE_ONCE(*highest_perf, cppc_highest_perf); +- } +- +- return (ret); +-} +- + #define CPPC_MAX_PERF U8_MAX + + static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) + { +- int ret, prio; +- u32 highest_perf; +- +- ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); +- if (ret) ++ /* user disabled or not detected */ ++ if (!amd_pstate_prefcore) + return; + + cpudata->hw_prefcore = true; +- /* check if CPPC preferred core feature is enabled*/ +- if (highest_perf < CPPC_MAX_PERF) +- prio = (int)highest_perf; +- else { +- pr_debug("AMD CPPC preferred core is unsupported!\n"); +- cpudata->hw_prefcore = false; +- return; +- } +- +- if (!amd_pstate_prefcore) +- return; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ +- sched_set_itmt_core_prio(prio, cpudata->cpu); ++ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu); + + schedule_work(&sched_prefcore_work); + } +@@ -878,27 +797,22 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) + static void amd_pstate_update_limits(unsigned int cpu) + { + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); +- struct amd_cpudata *cpudata; ++ struct amd_cpudata *cpudata = policy->driver_data; + u32 prev_high = 0, cur_high = 0; + int ret; + bool highest_perf_changed = false; + +- if (!policy) ++ if (!amd_pstate_prefcore) + return; + +- cpudata = policy->driver_data; +- + mutex_lock(&amd_pstate_driver_lock); +- if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) +- goto free_cpufreq_put; +- +- ret = amd_pstate_get_highest_perf(cpu, &cur_high); ++ ret = amd_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; + + prev_high = READ_ONCE(cpudata->prefcore_ranking); +- if (prev_high != cur_high) { +- highest_perf_changed = true; ++ highest_perf_changed = (prev_high != cur_high); ++ if (highest_perf_changed) { + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) +@@ -962,8 +876,8 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu) + static int amd_pstate_init_freq(struct amd_cpudata *cpudata) + { + int ret; +- u32 min_freq; +- u32 highest_perf, max_freq; ++ u32 min_freq, max_freq; ++ u64 numerator; + u32 nominal_perf, nominal_freq; + u32 lowest_nonlinear_perf, lowest_nonlinear_freq; + u32 boost_ratio, lowest_nonlinear_ratio; +@@ -985,8 +899,10 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) + + nominal_perf = READ_ONCE(cpudata->nominal_perf); + +- highest_perf = READ_ONCE(cpudata->highest_perf); +- boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); ++ ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); ++ if (ret) ++ return ret; ++ boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf); + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); +@@ -1041,12 +957,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) + + cpudata->cpu = policy->cpu; + +- amd_pstate_init_prefcore(cpudata); +- + ret = amd_pstate_init_perf(cpudata); + if (ret) + goto free_cpudata1; + ++ amd_pstate_init_prefcore(cpudata); ++ + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; +@@ -1076,7 +992,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) + policy->fast_switch_possible = true; + + ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], +- FREQ_QOS_MIN, policy->cpuinfo.min_freq); ++ FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); + if (ret < 0) { + dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); + goto free_cpudata1; +@@ -1281,11 +1197,21 @@ static int amd_pstate_register_driver(int mode) + return -EINVAL; + + cppc_state = mode; ++ ++ ret = amd_pstate_enable(true); ++ if (ret) { ++ pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", ++ ret); ++ amd_pstate_driver_cleanup(); ++ return ret; ++ } ++ + ret = cpufreq_register_driver(current_pstate_driver); + if (ret) { + amd_pstate_driver_cleanup(); + return ret; + } ++ + return 0; + } + +@@ -1496,12 +1422,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) + cpudata->cpu = policy->cpu; + cpudata->epp_policy = 0; + +- amd_pstate_init_prefcore(cpudata); +- + ret = amd_pstate_init_perf(cpudata); + if (ret) + goto free_cpudata1; + ++ amd_pstate_init_prefcore(cpudata); ++ + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; +@@ -1571,23 +1497,13 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) + static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) + { + struct amd_cpudata *cpudata = policy->driver_data; +- u32 max_perf, min_perf, min_limit_perf, max_limit_perf; ++ u32 max_perf, min_perf; + u64 value; + s16 epp; + + max_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); +- max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); +- min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); +- +- if (min_limit_perf < min_perf) +- min_limit_perf = min_perf; +- +- if (max_limit_perf < min_limit_perf) +- max_limit_perf = min_limit_perf; +- +- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); +- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); ++ amd_pstate_update_min_max_limit(policy); + + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); +@@ -1624,12 +1540,6 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + epp = 0; + +- /* Set initial EPP value */ +- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { +- value &= ~GENMASK_ULL(31, 24); +- value |= (u64)epp << 24; +- } +- + WRITE_ONCE(cpudata->cppc_req_cached, value); + amd_pstate_set_epp(cpudata, epp); + } +@@ -1737,13 +1647,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) + return 0; + } + +-static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) +-{ +- cpufreq_verify_within_cpu_limits(policy); +- pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); +- return 0; +-} +- + static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) + { + struct amd_cpudata *cpudata = policy->driver_data; +@@ -1799,7 +1702,7 @@ static struct cpufreq_driver amd_pstate_driver = { + + static struct cpufreq_driver amd_pstate_epp_driver = { + .flags = CPUFREQ_CONST_LOOPS, +- .verify = amd_pstate_epp_verify_policy, ++ .verify = amd_pstate_verify, + .setpolicy = amd_pstate_epp_set_policy, + .init = amd_pstate_epp_cpu_init, + .exit = amd_pstate_epp_cpu_exit, +@@ -1832,7 +1735,7 @@ static int __init amd_pstate_set_driver(int mode_idx) + return -EINVAL; + } + +-/** ++/* + * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. + * show the debug message that helps to check if the CPU has CPPC support for loading issue. + */ +@@ -1955,9 +1858,15 @@ static int __init amd_pstate_init(void) + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; + } else { + pr_debug("AMD CPPC shared memory based functionality is supported\n"); +- static_call_update(amd_pstate_enable, cppc_enable); +- static_call_update(amd_pstate_init_perf, cppc_init_perf); +- static_call_update(amd_pstate_update_perf, cppc_update_perf); ++ static_call_update(amd_pstate_enable, shmem_enable); ++ static_call_update(amd_pstate_init_perf, shmem_init_perf); ++ static_call_update(amd_pstate_update_perf, shmem_update_perf); ++ } ++ ++ if (amd_pstate_prefcore) { ++ ret = amd_detect_prefcore(&amd_pstate_prefcore); ++ if (ret) ++ return ret; + } + + /* enable amd pstate feature */ +diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h +index e1720d930666..76e44e102780 100644 +--- a/include/acpi/cppc_acpi.h ++++ b/include/acpi/cppc_acpi.h +@@ -161,34 +161,37 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); + extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); + extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); + extern int cppc_set_auto_sel(int cpu, bool enable); ++extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); ++extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); ++extern int amd_detect_prefcore(bool *detected); + #else /* !CONFIG_ACPI_CPPC_LIB */ + static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_set_enable(int cpu, bool enable) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline bool cppc_perf_ctrs_in_pcc(void) + { +@@ -212,27 +215,39 @@ static inline bool cpc_ffh_supported(void) + } + static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_set_auto_sel(int cpu, bool enable) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; ++} ++static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) ++{ ++ return -ENODEV; ++} ++static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) ++{ ++ return -EOPNOTSUPP; ++} ++static inline int amd_detect_prefcore(bool *detected) ++{ ++ return -ENODEV; + } + #endif /* !CONFIG_ACPI_CPPC_LIB */ + +-- +2.47.0.rc0 + +From d26165ef11bbc42603a0530ec1c50ad19222f875 Mon Sep 17 00:00:00 2001 +From: Peter Jung +Date: Thu, 17 Oct 2024 16:03:33 +0200 +Subject: [PATCH 04/14] amd-hfi + +Signed-off-by: Peter Jung +--- + Documentation/arch/x86/amd-hfi.rst | 115 ++++++ + Documentation/arch/x86/index.rst | 1 + + MAINTAINERS | 9 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/hreset.h | 6 + + arch/x86/include/asm/msr-index.h | 5 + + arch/x86/kernel/cpu/common.c | 15 + + arch/x86/kernel/cpu/scattered.c | 1 + + arch/x86/kernel/process_32.c | 3 + + arch/x86/kernel/process_64.c | 3 + + arch/x86/kernel/smpboot.c | 5 +- + drivers/cpufreq/amd-pstate.c | 6 + + drivers/platform/x86/amd/Kconfig | 1 + + drivers/platform/x86/amd/Makefile | 1 + + drivers/platform/x86/amd/hfi/Kconfig | 21 + + drivers/platform/x86/amd/hfi/Makefile | 7 + + drivers/platform/x86/amd/hfi/hfi.c | 552 ++++++++++++++++++++++++++ + 17 files changed, 750 insertions(+), 2 deletions(-) + create mode 100644 Documentation/arch/x86/amd-hfi.rst + create mode 100644 arch/x86/include/asm/hreset.h + create mode 100644 drivers/platform/x86/amd/hfi/Kconfig + create mode 100644 drivers/platform/x86/amd/hfi/Makefile + create mode 100644 drivers/platform/x86/amd/hfi/hfi.c + +diff --git a/Documentation/arch/x86/amd-hfi.rst b/Documentation/arch/x86/amd-hfi.rst +new file mode 100644 +index 000000000000..2f0d493135c1 +--- /dev/null ++++ b/Documentation/arch/x86/amd-hfi.rst +@@ -0,0 +1,115 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ ++====================================================================== ++Hardware Feedback Interface For Hetero Core Scheduling On AMD Platform ++====================================================================== ++ ++:Copyright: 2024 Advanced Micro Devices, Inc. All Rights Reserved. ++ ++:Author: Perry Yuan ++ ++Overview ++-------- ++ ++AMD Heterogeneous Core implementations are comprised of more than one ++architectural class and CPUs are comprised of cores of various efficiency and ++power capabilities: performance-oriented *classic cores* and power-efficient ++*dense cores*. As such, power management strategies must be designed to ++accommodate the complexities introduced by incorporating different core types. ++Heterogeneous systems can also extend to more than two architectural classes as ++well. The purpose of the scheduling feedback mechanism is to provide ++information to the operating system scheduler in real time such that the ++scheduler can direct threads to the optimal core. ++ ++The goal of AMD's heterogeneous architecture is to attain power benefit by sending ++background thread to the dense cores while sending high priority threads to the classic ++cores. From a performance perspective, sending background threads to dense cores can free ++up power headroom and allow the classic cores to optimally service demanding threads. ++Furthermore, the area optimized nature of the dense cores allows for an increasing ++number of physical cores. This improved core density will have positive multithreaded ++performance impact. ++ ++AMD Heterogeneous Core Driver ++----------------------------- ++ ++The ``amd_hfi`` driver delivers the operating system a performance and energy efficiency ++capability data for each CPU in the system. The scheduler can use the ranking data ++from the HFI driver to make task placement decisions. ++ ++Thread Classification and Ranking Table Interaction ++---------------------------------------------------- ++ ++The thread classification is used to select into a ranking table that describes ++an efficiency and performance ranking for each classification. ++ ++Threads are classified during runtime into enumerated classes. The classes represent ++thread performance/power characteristics that may benefit from special scheduling behaviors. ++The below table depicts an example of thread classification and a preference where a given thread ++should be scheduled based on its thread class. The real time thread classification is consumed ++by the operating system and is used to inform the scheduler of where the thread should be placed. ++ ++Thread Classification Example Table ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +++----------+----------------+-------------------------------+---------------------+---------+ ++| class ID | Classification | Preferred scheduling behavior | Preemption priority | Counter | +++----------+----------------+-------------------------------+---------------------+---------+ ++| 0 | Default | Performant | Highest | | +++----------+----------------+-------------------------------+---------------------+---------+ ++| 1 | Non-scalable | Efficient | Lowest | PMCx1A1 | +++----------+----------------+-------------------------------+---------------------+---------+ ++| 2 | I/O bound | Efficient | Lowest | PMCx044 | +++----------+----------------+-------------------------------+---------------------+---------+ ++ ++ ++AMD Hardware Feedback Interface ++-------------------------------- ++ ++The Hardware Feedback Interface provides to the operating system information ++about the performance and energy efficiency of each CPU in the system. Each ++capability is given as a unit-less quantity in the range [0-255]. A higher ++performance value indicates higher performance capability, and a higher ++efficiency value indicates more efficiency. Energy efficiency and performance ++are reported in separate capabilities in the shared memory based ranking table. ++ ++These capabilities may change at runtime as a result of changes in the ++operating conditions of the system or the action of external factors. ++Power Management FW is responsible for detecting events that would require ++a reordering of the performance and efficiency ranking. Table updates would ++happen relatively infrequently and occur on the time scale of seconds or more. ++ ++The following events trigger a table update: ++ * Thermal Stress Events ++ * Silent Compute ++ * Extreme Low Battery Scenarios ++ ++The kernel or a userspace policy daemon can use these capabilities to modify ++task placement decisions. For instance, if either the performance or energy ++capabilities of a given logical processor becomes zero, it is an indication that ++the hardware recommends to the operating system to not schedule any tasks on ++that processor for performance or energy efficiency reasons, respectively. ++ ++Implementation details for Linux ++-------------------------------- ++ ++The implementation of threads scheduling consists of the following steps: ++ ++1. A thread is spawned and scheduled to the ideal core using the default ++ heterogeneous scheduling policy. ++2. The processor profiles thread execution and assigns an enumerated classification ID. ++ This classification is communicated to the OS via logical processor scope MSR. ++3. During the thread context switch out the operating system consumes the workload(WL) ++ classification which resides in a logical processor scope MSR. ++4. The OS triggers the hardware to clear its history by writing to an MSR, ++ after consuming the WL classification and before switching in the new thread. ++5. If due to the classification, ranking table, and processor availability, ++ the thread is not on its ideal processor, the OS will then consider scheduling ++ the thread on its ideal processor (if available). ++ ++Ranking Table update ++--------------------------- ++The power management firmware issues an platform interrupt after updating the ranking ++table and is ready for the operating system to consume it. CPUs receive such interrupt ++and read new ranking table from shared memory which PCCT table has provided, then ++``amd_hfi`` driver parse the new table to provide new consume data for scheduling decisions. ++ ++ +diff --git a/Documentation/arch/x86/index.rst b/Documentation/arch/x86/index.rst +index 8ac64d7de4dc..56f2923f5259 100644 +--- a/Documentation/arch/x86/index.rst ++++ b/Documentation/arch/x86/index.rst +@@ -43,3 +43,4 @@ x86-specific Documentation + features + elf_auxvec + xstate ++ amd-hfi +diff --git a/MAINTAINERS b/MAINTAINERS +index 2ba00c0cd701..c27b4e8f2129 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1060,6 +1060,15 @@ F: arch/x86/include/asm/amd_hsmp.h + F: arch/x86/include/uapi/asm/amd_hsmp.h + F: drivers/platform/x86/amd/hsmp.c + ++AMD HETERO CORE HARDWARE FEEDBACK DRIVER ++M: Mario Limonciello ++R: Perry Yuan ++L: platform-driver-x86@vger.kernel.org ++S: Supported ++B: https://gitlab.freedesktop.org/drm/amd/-/issues ++F: Documentation/arch/x86/amd-hfi.rst ++F: drivers/platform/x86/amd/hfi/ ++ + AMD IOMMU (AMD-VI) + M: Joerg Roedel + R: Suravee Suthikulpanit +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index cea1ed82aeb4..3e8e67b8ec7a 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -474,6 +474,7 @@ + #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ + #define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ + #define X86_FEATURE_HETERO_CORE_TOPOLOGY (21*32 + 6) /* Heterogeneous Core Topology */ ++#define X86_FEATURE_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ + + /* + * BUG word(s) +diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h +new file mode 100644 +index 000000000000..ae1f72602bbd +--- /dev/null ++++ b/arch/x86/include/asm/hreset.h +@@ -0,0 +1,6 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _ASM_X86_HRESET_H ++ ++void reset_hardware_history_hetero(void); ++ ++#endif /* _ASM_X86_HRESET_H */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 82c6a4d350e0..a70c1475725a 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -690,6 +690,11 @@ + #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 + ++/* AMD Hardware Feedback Support MSRs */ ++#define AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 ++#define AMD_WORKLOAD_CLASS_ID 0xc0000501 ++#define AMD_WORKLOAD_HRST 0xc0000502 ++ + /* AMD Last Branch Record MSRs */ + #define MSR_AMD64_LBR_SELECT 0xc000010e + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index be307c9ef263..59907fc6849f 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -57,6 +57,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -403,6 +404,7 @@ static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_C + X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED; + static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); + static unsigned long cr4_pinned_bits __ro_after_init; ++static DEFINE_STATIC_KEY_FALSE_RO(hardware_history_features); + + void native_write_cr0(unsigned long val) + { +@@ -481,6 +483,12 @@ void cr4_init(void) + this_cpu_write(cpu_tlbstate.cr4, cr4); + } + ++static void __init setup_hreset(struct cpuinfo_x86 *c) ++{ ++ if (cpu_feature_enabled(X86_FEATURE_WORKLOAD_CLASS)) ++ static_key_enable_cpuslocked(&hardware_history_features.key); ++} ++ + /* + * Once CPU feature detection is finished (and boot params have been + * parsed), record any of the sensitive CR bits that are set, and +@@ -1839,6 +1847,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) + setup_smep(c); + setup_smap(c); + setup_umip(c); ++ setup_hreset(c); + + /* Enable FSGSBASE instructions if available. */ + if (cpu_has(c, X86_FEATURE_FSGSBASE)) { +@@ -2392,3 +2401,9 @@ void __init arch_cpu_finalize_init(void) + */ + mem_encrypt_init(); + } ++ ++__always_inline void reset_hardware_history_hetero(void) ++{ ++ if (static_branch_unlikely(&hardware_history_features)) ++ wrmsrl(AMD_WORKLOAD_HRST, 0x1); ++} +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index 3bba55323163..b6547bab9e23 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, + { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, ++ { X86_FEATURE_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, + { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 0917c7f25720..6a3a1339f7a7 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -52,6 +52,7 @@ + #include + #include + #include ++#include + #include + + #include "process.h" +@@ -213,6 +214,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + /* Load the Intel cache allocation PQR MSR. */ + resctrl_sched_in(next_p); + ++ reset_hardware_history_hetero(); ++ + return prev_p; + } + +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index d8d582b750d4..53e3756384d6 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -709,6 +710,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + /* Load the Intel cache allocation PQR MSR. */ + resctrl_sched_in(next_p); + ++ reset_hardware_history_hetero(); ++ + return prev_p; + } + +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index 390e4fe7433e..9963ac376523 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -497,8 +497,9 @@ static int x86_cluster_flags(void) + + static int x86_die_flags(void) + { +- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) +- return x86_sched_itmt_flags(); ++ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU) || ++ cpu_feature_enabled(X86_FEATURE_HETERO_CORE_TOPOLOGY)) ++ return x86_sched_itmt_flags(); + + return 0; + } +diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c +index 41965c49ec96..0f9743ba10ab 100644 +--- a/drivers/cpufreq/amd-pstate.c ++++ b/drivers/cpufreq/amd-pstate.c +@@ -781,6 +781,12 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) + /* user disabled or not detected */ + if (!amd_pstate_prefcore) + return; ++ /* should use amd-hfi instead */ ++ if (boot_cpu_has(X86_FEATURE_WORKLOAD_CLASS) && ++ IS_ENABLED(CONFIG_AMD_HFI)) { ++ amd_pstate_prefcore = false; ++ return; ++ } + + cpudata->hw_prefcore = true; + +diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig +index d73f691020d0..0e5eb064116d 100644 +--- a/drivers/platform/x86/amd/Kconfig ++++ b/drivers/platform/x86/amd/Kconfig +@@ -5,6 +5,7 @@ + + source "drivers/platform/x86/amd/pmf/Kconfig" + source "drivers/platform/x86/amd/pmc/Kconfig" ++source "drivers/platform/x86/amd/hfi/Kconfig" + + config AMD_3D_VCACHE + tristate "AMD 3D V-Cache Performance Optimizer Driver" +diff --git a/drivers/platform/x86/amd/Makefile b/drivers/platform/x86/amd/Makefile +index 16e4cce02242..4a6ddcda8325 100644 +--- a/drivers/platform/x86/amd/Makefile ++++ b/drivers/platform/x86/amd/Makefile +@@ -11,3 +11,4 @@ amd_hsmp-y := hsmp.o + obj-$(CONFIG_AMD_HSMP) += amd_hsmp.o + obj-$(CONFIG_AMD_PMF) += pmf/ + obj-$(CONFIG_AMD_WBRF) += wbrf.o ++obj-$(CONFIG_AMD_HFI) += hfi/ +diff --git a/drivers/platform/x86/amd/hfi/Kconfig b/drivers/platform/x86/amd/hfi/Kconfig +new file mode 100644 +index 000000000000..4dfa7641b35b +--- /dev/null ++++ b/drivers/platform/x86/amd/hfi/Kconfig +@@ -0,0 +1,21 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++# ++# AMD Hardware Feedback Interface Driver ++# ++ ++config AMD_HFI ++ bool "AMD Hetero Core Hardware Feedback Driver" ++ depends on ACPI ++ depends on CPU_SUP_AMD ++ depends on SCHED_MC_PRIO ++ help ++ Select this option to enable the AMD Heterogeneous Core Hardware Feedback Interface. If ++ selected, hardware provides runtime thread classification guidance to the operating system ++ on the performance and energy efficiency capabilities of each heterogeneous CPU core. ++ These capabilities may vary due to the inherent differences in the core types and can ++ also change as a result of variations in the operating conditions of the system such ++ as power and thermal limits. If selected, the kernel relays updates in heterogeneous ++ CPUs' capabilities to userspace, allowing for more optimal task scheduling and ++ resource allocation, leveraging the diverse set of cores available. ++ ++ +diff --git a/drivers/platform/x86/amd/hfi/Makefile b/drivers/platform/x86/amd/hfi/Makefile +new file mode 100644 +index 000000000000..672c6ac106e9 +--- /dev/null ++++ b/drivers/platform/x86/amd/hfi/Makefile +@@ -0,0 +1,7 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# AMD Hardware Feedback Interface Driver ++# ++ ++obj-$(CONFIG_AMD_HFI) += amd_hfi.o ++amd_hfi-objs := hfi.o +diff --git a/drivers/platform/x86/amd/hfi/hfi.c b/drivers/platform/x86/amd/hfi/hfi.c +new file mode 100644 +index 000000000000..6df80f6ac73c +--- /dev/null ++++ b/drivers/platform/x86/amd/hfi/hfi.c +@@ -0,0 +1,552 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * AMD Hardware Feedback Interface Driver ++ * ++ * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved. ++ * ++ * Authors: Perry Yuan ++ * Mario Limonciello ++ */ ++ ++#define pr_fmt(fmt) "amd-hfi: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++#define AMD_HFI_DRIVER "amd_hfi" ++#define AMD_HFI_MAILBOX_COUNT 1 ++#define AMD_HETERO_RANKING_TABLE_VER 2 ++ ++#define AMD_HETERO_CPUID_27 0x80000027 ++ ++static struct platform_device *device; ++ ++/** ++ * struct amd_shmem_info - Shared memory table for AMD HFI ++ * ++ * @signature: The PCC signature. The signature of a subspace is computed by ++ * a bitwise of the value 0x50434300 with the subspace ID. ++ * @flags: Notify on completion ++ * @length: Length of payload being transmitted including command field ++ * @command: Command being sent over the subspace ++ * @version_number: Version number of the table ++ * @n_logical_processors: Number of logical processors ++ * @n_capabilities: Number of ranking dimensions (performance, efficiency, etc) ++ * @table_update_context: Command being sent over the subspace ++ * @n_bitmaps: Number of 32-bit bitmaps to enumerate all the APIC IDs ++ * This is based on the maximum APIC ID enumerated in the system ++ * @reserved: 24 bit spare ++ * @table_data: Bit Map(s) of enabled logical processors ++ * Followed by the ranking data for each logical processor ++ */ ++struct amd_shmem_info { ++ struct acpi_pcct_ext_pcc_shared_memory header; ++ u32 version_number :8, ++ n_logical_processors :8, ++ n_capabilities :8, ++ table_update_context :8; ++ u32 n_bitmaps :8, ++ reserved :24; ++ u32 table_data[]; ++} __packed; ++ ++struct amd_hfi_data { ++ const char *name; ++ struct device *dev; ++ struct mutex lock; ++ ++ /* PCCT table related*/ ++ struct pcc_mbox_chan *pcc_chan; ++ void __iomem *pcc_comm_addr; ++ struct acpi_subtable_header *pcct_entry; ++ struct amd_shmem_info *shmem; ++ ++ struct dentry *dbgfs_dir; ++}; ++ ++/** ++ * struct amd_hfi_classes - HFI class capabilities per CPU ++ * @perf: Performance capability ++ * @eff: Power efficiency capability ++ * ++ * Capabilities of a logical processor in the ranking table. These capabilities ++ * are unitless and specific to each HFI class. ++ */ ++struct amd_hfi_classes { ++ u32 perf; ++ u32 eff; ++}; ++ ++/** ++ * struct amd_hfi_cpuinfo - HFI workload class info per CPU ++ * @cpu: cpu index ++ * @apic_id: apic id of the current cpu ++ * @cpus: mask of cpus associated with amd_hfi_cpuinfo ++ * @class_index: workload class ID index ++ * @nr_class: max number of workload class supported ++ * @ipcc_scores: ipcc scores for each class ++ * @amd_hfi_classes: current cpu workload class ranking data ++ * ++ * Parameters of a logical processor linked with hardware feedback class ++ */ ++struct amd_hfi_cpuinfo { ++ int cpu; ++ u32 apic_id; ++ cpumask_var_t cpus; ++ s16 class_index; ++ u8 nr_class; ++ int *ipcc_scores; ++ struct amd_hfi_classes *amd_hfi_classes; ++}; ++ ++static DEFINE_PER_CPU(struct amd_hfi_cpuinfo, amd_hfi_cpuinfo) = {.class_index = -1}; ++ ++static DEFINE_MUTEX(hfi_cpuinfo_lock); ++ ++static void amd_hfi_sched_itmt_work(struct work_struct *work) ++{ ++ sched_set_itmt_support(); ++} ++static DECLARE_WORK(sched_amd_hfi_itmt_work, amd_hfi_sched_itmt_work); ++ ++static int find_cpu_index_by_apicid(unsigned int target_apicid) ++{ ++ int cpu_index; ++ ++ for_each_present_cpu(cpu_index) { ++ struct cpuinfo_x86 *info = &cpu_data(cpu_index); ++ ++ if (info->topo.apicid == target_apicid) { ++ pr_debug("match APIC id %d for CPU index: %d\n", ++ info->topo.apicid, cpu_index); ++ return cpu_index; ++ } ++ } ++ ++ return -ENODEV; ++} ++ ++static int amd_hfi_fill_metadata(struct amd_hfi_data *amd_hfi_data) ++{ ++ struct acpi_pcct_ext_pcc_slave *pcct_ext = ++ (struct acpi_pcct_ext_pcc_slave *)amd_hfi_data->pcct_entry; ++ void __iomem *pcc_comm_addr; ++ ++ pcc_comm_addr = acpi_os_ioremap(amd_hfi_data->pcc_chan->shmem_base_addr, ++ amd_hfi_data->pcc_chan->shmem_size); ++ if (!pcc_comm_addr) { ++ pr_err("failed to ioremap PCC common region mem\n"); ++ return -ENOMEM; ++ } ++ ++ memcpy_fromio(amd_hfi_data->shmem, pcc_comm_addr, pcct_ext->length); ++ iounmap(pcc_comm_addr); ++ ++ if (amd_hfi_data->shmem->header.signature != PCC_SIGNATURE) { ++ pr_err("invalid signature in shared memory\n"); ++ return -EINVAL; ++ } ++ if (amd_hfi_data->shmem->version_number != AMD_HETERO_RANKING_TABLE_VER) { ++ pr_err("invalid version %d\n", amd_hfi_data->shmem->version_number); ++ return -EINVAL; ++ } ++ ++ for (unsigned int i = 0; i < amd_hfi_data->shmem->n_bitmaps; i++) { ++ u32 bitmap = amd_hfi_data->shmem->table_data[i]; ++ ++ for (unsigned int j = 0; j < BITS_PER_TYPE(u32); j++) { ++ struct amd_hfi_cpuinfo *info; ++ int apic_id = i * BITS_PER_TYPE(u32) + j; ++ int cpu_index; ++ ++ if (!(bitmap & BIT(j))) ++ continue; ++ ++ cpu_index = find_cpu_index_by_apicid(apic_id); ++ if (cpu_index < 0) { ++ pr_warn("APIC ID %d not found\n", apic_id); ++ continue; ++ } ++ ++ info = per_cpu_ptr(&amd_hfi_cpuinfo, cpu_index); ++ info->apic_id = apic_id; ++ ++ /* Fill the ranking data for each logical processor */ ++ info = per_cpu_ptr(&amd_hfi_cpuinfo, cpu_index); ++ for (unsigned int k = 0; k < info->nr_class; k++) { ++ u32 *table = amd_hfi_data->shmem->table_data + ++ amd_hfi_data->shmem->n_bitmaps + ++ i * info->nr_class; ++ ++ info->amd_hfi_classes[k].eff = table[apic_id + 2 * k]; ++ info->amd_hfi_classes[k].perf = table[apic_id + 2 * k + 1]; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int amd_hfi_alloc_class_data(struct platform_device *pdev) ++{ ++ struct amd_hfi_cpuinfo *hfi_cpuinfo; ++ struct device *dev = &pdev->dev; ++ int idx; ++ int nr_class_id; ++ ++ nr_class_id = cpuid_eax(AMD_HETERO_CPUID_27); ++ if (nr_class_id < 0 || nr_class_id > 255) { ++ dev_err(dev, "failed to get number of supported classes: %d\n", ++ nr_class_id); ++ return -EINVAL; ++ } ++ ++ for_each_present_cpu(idx) { ++ struct amd_hfi_classes *classes; ++ int *ipcc_scores; ++ ++ classes = devm_kzalloc(dev, ++ nr_class_id * sizeof(struct amd_hfi_classes), ++ GFP_KERNEL); ++ if (!classes) ++ return -ENOMEM; ++ ipcc_scores = devm_kcalloc(dev, nr_class_id, sizeof(int), GFP_KERNEL); ++ if (!ipcc_scores) ++ return -ENOMEM; ++ hfi_cpuinfo = per_cpu_ptr(&amd_hfi_cpuinfo, idx); ++ hfi_cpuinfo->amd_hfi_classes = classes; ++ hfi_cpuinfo->ipcc_scores = ipcc_scores; ++ hfi_cpuinfo->nr_class = nr_class_id; ++ } ++ ++ return 0; ++} ++ ++static void amd_hfi_remove(struct platform_device *pdev) ++{ ++ struct amd_hfi_data *dev = platform_get_drvdata(pdev); ++ ++ debugfs_remove_recursive(dev->dbgfs_dir); ++ ++ mutex_destroy(&dev->lock); ++} ++ ++static int amd_set_hfi_ipcc_score(struct amd_hfi_cpuinfo *hfi_cpuinfo, int cpu) ++{ ++ for (int i = 0; i < hfi_cpuinfo->nr_class; i++) ++ WRITE_ONCE(hfi_cpuinfo->ipcc_scores[i], ++ hfi_cpuinfo->amd_hfi_classes[i].perf); ++ ++ sched_set_itmt_core_prio(hfi_cpuinfo->ipcc_scores[0], cpu); ++ ++ return 0; ++} ++ ++static int amd_hfi_set_state(unsigned int cpu, bool state) ++{ ++ int ret; ++ ++ ret = wrmsrl_on_cpu(cpu, AMD_WORKLOAD_CLASS_CONFIG, state); ++ if (ret) ++ return ret; ++ ++ return wrmsrl_on_cpu(cpu, AMD_WORKLOAD_HRST, 0x1); ++} ++ ++/** ++ * amd_hfi_online() - Enable workload classification on @cpu ++ * @cpu: CPU in which the workload classification will be enabled ++ * ++ * Return: 0 on success, negative error code on failure ++ */ ++static int amd_hfi_online(unsigned int cpu) ++{ ++ struct amd_hfi_cpuinfo *hfi_info = per_cpu_ptr(&amd_hfi_cpuinfo, cpu); ++ struct amd_hfi_classes *hfi_classes; ++ int ret; ++ ++ if (WARN_ON_ONCE(!hfi_info)) ++ return -EINVAL; ++ ++ /* ++ * Check if @cpu as an associated, initialized and ranking data must be filled ++ */ ++ hfi_classes = hfi_info->amd_hfi_classes; ++ if (!hfi_classes) ++ return -EINVAL; ++ ++ guard(mutex)(&hfi_cpuinfo_lock); ++ ++ if (!zalloc_cpumask_var(&hfi_info->cpus, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ cpumask_set_cpu(cpu, hfi_info->cpus); ++ ++ ret = amd_hfi_set_state(cpu, true); ++ if (ret) ++ pr_err("WCT enable failed for cpu %d\n", cpu); ++ ++ return ret; ++} ++ ++/** ++ * amd_hfi_offline() - Disable workload classification on @cpu ++ * @cpu: CPU in which the workload classification will be disabled ++ * ++ * Remove @cpu from those covered by its HFI instance. ++ * ++ * Return: 0 on success, negative error code on failure ++ */ ++static int amd_hfi_offline(unsigned int cpu) ++{ ++ struct amd_hfi_cpuinfo *hfi_info = &per_cpu(amd_hfi_cpuinfo, cpu); ++ int ret; ++ ++ if (WARN_ON_ONCE(!hfi_info)) ++ return -EINVAL; ++ ++ guard(mutex)(&hfi_cpuinfo_lock); ++ ++ ret = amd_hfi_set_state(cpu, false); ++ if (ret) ++ pr_err("WCT disable failed for CPU %d\n", cpu); ++ ++ free_cpumask_var(hfi_info->cpus); ++ ++ return ret; ++} ++ ++static int update_hfi_ipcc_scores(void) ++{ ++ int cpu; ++ int ret; ++ ++ for_each_present_cpu(cpu) { ++ struct amd_hfi_cpuinfo *hfi_cpuinfo = per_cpu_ptr(&amd_hfi_cpuinfo, cpu); ++ ++ ret = amd_set_hfi_ipcc_score(hfi_cpuinfo, cpu); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int amd_hfi_metadata_parser(struct platform_device *pdev, ++ struct amd_hfi_data *amd_hfi_data) ++{ ++ struct acpi_pcct_ext_pcc_slave *pcct_ext; ++ struct acpi_subtable_header *pcct_entry; ++ struct mbox_chan *pcc_mbox_channels; ++ struct acpi_table_header *pcct_tbl; ++ struct pcc_mbox_chan *pcc_chan; ++ acpi_status status; ++ int ret; ++ ++ pcc_mbox_channels = devm_kcalloc(&pdev->dev, AMD_HFI_MAILBOX_COUNT, ++ sizeof(*pcc_mbox_channels), GFP_KERNEL); ++ if (!pcc_mbox_channels) ++ return -ENOMEM; ++ ++ pcc_chan = devm_kcalloc(&pdev->dev, AMD_HFI_MAILBOX_COUNT, ++ sizeof(*pcc_chan), GFP_KERNEL); ++ if (!pcc_chan) ++ return -ENOMEM; ++ ++ status = acpi_get_table(ACPI_SIG_PCCT, 0, &pcct_tbl); ++ if (ACPI_FAILURE(status) || !pcct_tbl) ++ return -ENODEV; ++ ++ /* get pointer to the first PCC subspace entry */ ++ pcct_entry = (struct acpi_subtable_header *) ( ++ (unsigned long)pcct_tbl + sizeof(struct acpi_table_pcct)); ++ ++ pcc_chan->mchan = &pcc_mbox_channels[0]; ++ ++ amd_hfi_data->pcc_chan = pcc_chan; ++ amd_hfi_data->pcct_entry = pcct_entry; ++ pcct_ext = (struct acpi_pcct_ext_pcc_slave *)pcct_entry; ++ ++ if (pcct_ext->length <= 0) ++ return -EINVAL; ++ ++ amd_hfi_data->shmem = devm_kzalloc(amd_hfi_data->dev, pcct_ext->length, GFP_KERNEL); ++ if (!amd_hfi_data->shmem) ++ return -ENOMEM; ++ ++ pcc_chan->shmem_base_addr = pcct_ext->base_address; ++ pcc_chan->shmem_size = pcct_ext->length; ++ ++ /* parse the shared memory info from the pcct table */ ++ ret = amd_hfi_fill_metadata(amd_hfi_data); ++ ++ acpi_put_table(pcct_tbl); ++ ++ return ret; ++} ++ ++static int class_capabilities_show(struct seq_file *s, void *unused) ++{ ++ int cpu, idx; ++ ++ seq_puts(s, "CPU #\tWLC\tPerf\tEff\n"); ++ for_each_present_cpu(cpu) { ++ struct amd_hfi_cpuinfo *hfi_cpuinfo = per_cpu_ptr(&amd_hfi_cpuinfo, cpu); ++ ++ seq_printf(s, "%d\t", cpu); ++ for (idx = 0; idx < hfi_cpuinfo->nr_class; idx++) { ++ seq_printf(s, "%s%d\t%d\t%d\n", ++ idx == 0 ? "" : "\t", ++ idx, ++ hfi_cpuinfo->amd_hfi_classes[idx].perf, ++ hfi_cpuinfo->amd_hfi_classes[idx].eff); ++ } ++ } ++ ++ return 0; ++} ++DEFINE_SHOW_ATTRIBUTE(class_capabilities); ++ ++static int amd_hfi_pm_resume(struct device *dev) ++{ ++ int ret, cpu; ++ ++ for_each_present_cpu(cpu) { ++ ret = amd_hfi_set_state(cpu, true); ++ if (ret < 0) { ++ dev_err(dev, "failed to enable workload class config: %d\n", ret); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int amd_hfi_pm_suspend(struct device *dev) ++{ ++ int ret, cpu; ++ ++ for_each_present_cpu(cpu) { ++ ret = amd_hfi_set_state(cpu, false); ++ if (ret < 0) { ++ dev_err(dev, "failed to disable workload class config: %d\n", ret); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static DEFINE_SIMPLE_DEV_PM_OPS(amd_hfi_pm_ops, amd_hfi_pm_suspend, amd_hfi_pm_resume); ++ ++static const struct acpi_device_id amd_hfi_platform_match[] = { ++ { "AMDI0104", 0}, ++ { } ++}; ++MODULE_DEVICE_TABLE(acpi, amd_hfi_platform_match); ++ ++static int amd_hfi_probe(struct platform_device *pdev) ++{ ++ struct amd_hfi_data *amd_hfi_data; ++ int ret; ++ ++ if (!acpi_match_device(amd_hfi_platform_match, &pdev->dev)) ++ return -ENODEV; ++ ++ amd_hfi_data = devm_kzalloc(&pdev->dev, sizeof(*amd_hfi_data), GFP_KERNEL); ++ if (!amd_hfi_data) ++ return -ENOMEM; ++ ++ amd_hfi_data->dev = &pdev->dev; ++ mutex_init(&amd_hfi_data->lock); ++ platform_set_drvdata(pdev, amd_hfi_data); ++ ++ ret = amd_hfi_alloc_class_data(pdev); ++ if (ret) ++ return ret; ++ ++ ret = amd_hfi_metadata_parser(pdev, amd_hfi_data); ++ if (ret) ++ return ret; ++ ++ ret = update_hfi_ipcc_scores(); ++ if (ret) ++ return ret; ++ ++ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/amd_hfi:online", ++ amd_hfi_online, amd_hfi_offline); ++ if (ret < 0) ++ return ret; ++ ++ schedule_work(&sched_amd_hfi_itmt_work); ++ ++ amd_hfi_data->dbgfs_dir = debugfs_create_dir("amd_hfi", arch_debugfs_dir); ++ debugfs_create_file("class_capabilities", 0644, amd_hfi_data->dbgfs_dir, pdev, ++ &class_capabilities_fops); ++ ++ return 0; ++} ++ ++static struct platform_driver amd_hfi_driver = { ++ .driver = { ++ .name = AMD_HFI_DRIVER, ++ .owner = THIS_MODULE, ++ .pm = &amd_hfi_pm_ops, ++ .acpi_match_table = ACPI_PTR(amd_hfi_platform_match), ++ }, ++ .probe = amd_hfi_probe, ++ .remove_new = amd_hfi_remove, ++}; ++ ++static int __init amd_hfi_init(void) ++{ ++ int ret; ++ ++ if (acpi_disabled || ++ !boot_cpu_has(X86_FEATURE_HETERO_CORE_TOPOLOGY) || ++ !boot_cpu_has(X86_FEATURE_WORKLOAD_CLASS)) ++ return -ENODEV; ++ ++ device = platform_device_register_simple(AMD_HFI_DRIVER, -1, NULL, 0); ++ if (IS_ERR(device)) { ++ pr_err("unable to register HFI platform device\n"); ++ return PTR_ERR(device); ++ } ++ ++ ret = platform_driver_register(&amd_hfi_driver); ++ if (ret) ++ pr_err("failed to register HFI driver\n"); ++ ++ return ret; ++} ++ ++static __exit void amd_hfi_exit(void) ++{ ++ platform_device_unregister(device); ++ platform_driver_unregister(&amd_hfi_driver); ++} ++module_init(amd_hfi_init); ++module_exit(amd_hfi_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("AMD Hardware Feedback Interface Driver"); +-- +2.47.0.rc0 + +From 7cf8ae09aff45db05ea4618ab67a94411add263c Mon Sep 17 00:00:00 2001 +From: Peter Jung +Date: Thu, 17 Oct 2024 16:03:48 +0200 +Subject: [PATCH 05/14] bbr3 Signed-off-by: Peter Jung --- @@ -3144,7 +5476,7 @@ index 0306d257fa64..28f581c0dab7 100644 icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index e37488d3453f..62eef7d067c2 100644 +index 889db23bfc05..b924a852f108 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -370,7 +370,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) @@ -3196,7 +5528,7 @@ index e37488d3453f..62eef7d067c2 100644 /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep -@@ -3799,7 +3815,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +@@ -3826,7 +3842,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) /* This routine deals with acks during a TLP episode and ends an episode by * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ @@ -3206,7 +5538,7 @@ index e37488d3453f..62eef7d067c2 100644 { struct tcp_sock *tp = tcp_sk(sk); -@@ -3816,6 +3833,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +@@ -3843,6 +3860,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) /* ACK advances: there was a loss, so reduce cwnd. Reset * tlp_high_seq in tcp_init_cwnd_reduction() */ @@ -3214,7 +5546,7 @@ index e37488d3453f..62eef7d067c2 100644 tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); -@@ -3826,6 +3844,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +@@ -3853,6 +3871,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; @@ -3226,7 +5558,7 @@ index e37488d3453f..62eef7d067c2 100644 } } -@@ -3934,6 +3957,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -3961,6 +3984,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); @@ -3234,7 +5566,7 @@ index e37488d3453f..62eef7d067c2 100644 /* ts_recent update must be made after we are sure that the packet * is in window. -@@ -4008,7 +4032,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -4035,7 +4059,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_rack_update_reo_wnd(sk, &rs); if (tp->tlp_high_seq) @@ -3243,7 +5575,7 @@ index e37488d3453f..62eef7d067c2 100644 if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | -@@ -4032,6 +4056,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -4059,6 +4083,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); @@ -3251,7 +5583,7 @@ index e37488d3453f..62eef7d067c2 100644 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); -@@ -4051,7 +4076,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -4078,7 +4103,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_ack_probe(sk); if (tp->tlp_high_seq) @@ -3260,7 +5592,7 @@ index e37488d3453f..62eef7d067c2 100644 return 1; old_ack: -@@ -5718,13 +5743,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) +@@ -5745,13 +5770,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && @@ -3496,10 +5828,10 @@ index 4d40615dc8fc..f27941201ef2 100644 -- 2.47.0.rc0 -From 4b6c037745c54b45bdea3c80010809161d95484f Mon Sep 17 00:00:00 2001 +From f623d7c10eb37781bcac491a7910c9b571c3e05a Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:37:04 +0200 -Subject: [PATCH 03/11] cachy +Date: Thu, 17 Oct 2024 16:03:58 +0200 +Subject: [PATCH 06/14] cachy Signed-off-by: Peter Jung --- @@ -3602,7 +5934,7 @@ index be010fec7654..900113802ffc 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/Makefile b/Makefile -index 108d314ea95b..6d817eab1d73 100644 +index 50c615983e44..0ae8b28f1046 100644 --- a/Makefile +++ b/Makefile @@ -803,11 +803,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks @@ -4578,10 +6910,10 @@ index df17e79c45c7..e454488c1a31 100644 + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index 1ab7cd8a6b6a..1d619a4bf1d6 100644 +index 4f19e9736a67..575fdcfb138c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4444,7 +4444,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) +@@ -4445,7 +4445,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } @@ -9752,10 +12084,10 @@ index 000000000000..e105e6f5cc91 +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index 5d57ea27dbc4..0f7f8c737ae3 100644 +index dccb60c1d9cc..d9a8af789de8 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c -@@ -3745,6 +3745,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) +@@ -3747,6 +3747,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; } @@ -9862,7 +12194,7 @@ index 5d57ea27dbc4..0f7f8c737ae3 100644 /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. -@@ -5160,6 +5260,7 @@ static const struct pci_dev_acs_enabled { +@@ -5168,6 +5268,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, @@ -10524,20 +12856,20 @@ index 663ce300dd06..f83493838cf9 100644 -- 2.47.0.rc0 -From e033c3648c593fe0bd0d0bc911cccefdb4ec3e4a Mon Sep 17 00:00:00 2001 +From c094e7c9e3dfae9733aa8ab22ed230f07d975c5f Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:37:16 +0200 -Subject: [PATCH 04/11] fixes +Date: Thu, 17 Oct 2024 16:04:15 +0200 +Subject: [PATCH 07/14] fixes Signed-off-by: Peter Jung --- arch/Kconfig | 4 +- - arch/x86/kernel/amd_nb.c | 4 ++ + arch/x86/kernel/apic/apic.c | 14 +++++- arch/x86/kernel/cpu/amd.c | 3 +- drivers/bluetooth/btusb.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++ - drivers/gpu/drm/amd/display/dc/core/dc.c | 45 ++++++++++++----- - drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 9 +++- + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +- + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 15 ++++-- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 3 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h | 2 +- @@ -10559,7 +12891,8 @@ Signed-off-by: Peter Jung .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 33 +++++++++---- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 3 +- drivers/net/wireless/realtek/rtw89/pci.c | 48 ++++++++++++++++--- - 28 files changed, 259 insertions(+), 124 deletions(-) + mm/shrinker.c | 4 +- + 29 files changed, 244 insertions(+), 119 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 975dd22a2dbd..de69b8f5b5be 100644 @@ -10583,33 +12916,36 @@ index 975dd22a2dbd..de69b8f5b5be 100644 depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to -diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c -index 059e5c16af05..e8c3d65aee60 100644 ---- a/arch/x86/kernel/amd_nb.c -+++ b/arch/x86/kernel/amd_nb.c -@@ -43,6 +43,8 @@ - #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 - #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc - #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 -+#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c -+#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc - #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 - #define PCI_DEVICE_ID_AMD_MI300_DF_F4 0x152c +diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c +index 373638691cd4..81089f99576c 100644 +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -440,7 +440,19 @@ static int lapic_timer_shutdown(struct clock_event_device *evt) + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); +- apic_write(APIC_TMICT, 0); ++ ++ /* ++ * Setting APIC_LVT_MASKED should be enough to tell the ++ * hardware that this timer will never fire. But AMD ++ * erratum 411 and some Intel CPU behavior circa 2024 ++ * say otherwise. Time for belt and suspenders programming, ++ * mask the timer and zero the counter registers: ++ */ ++ if (v & APIC_LVT_TIMER_TSCDEADLINE) ++ wrmsrl(MSR_IA32_TSC_DEADLINE, 0); ++ else ++ apic_write(APIC_TMICT, 0); ++ + return 0; + } -@@ -122,6 +124,8 @@ static const struct pci_device_id amd_nb_link_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F4) }, - {} diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 1e0fe5f8ab84..65b2f0c5ec2d 100644 +index 8ad5f1385f0e..353e5b8cad34 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c -@@ -1218,5 +1218,6 @@ void amd_check_microcode(void) +@@ -1202,7 +1202,8 @@ void amd_check_microcode(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return; @@ -10617,8 +12953,10 @@ index 1e0fe5f8ab84..65b2f0c5ec2d 100644 + if (boot_cpu_has(X86_FEATURE_ZEN2)) + on_each_cpu(zenbleed_check_cpu, NULL, 1); } + + /** diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 93dbeb8b348d..e0458bcde8d1 100644 +index a1e9b052bc84..447905a5cf1d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -692,6 +692,10 @@ static const struct usb_device_id quirks_table[] = { @@ -10648,94 +12986,24 @@ index 9c3b7b027485..ad5c05ee92f3 100644 /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); -diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c -index 9e05d77453ac..5e12a1624124 100644 ---- a/drivers/gpu/drm/amd/display/dc/core/dc.c -+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c -@@ -5120,11 +5120,26 @@ static bool update_planes_and_stream_v3(struct dc *dc, - return true; - } - -+static void clear_update_flags(struct dc_surface_update *srf_updates, -+ int surface_count, struct dc_stream_state *stream) -+{ -+ int i; -+ -+ if (stream) -+ stream->update_flags.raw = 0; -+ -+ for (i = 0; i < surface_count; i++) -+ if (srf_updates[i].surface) -+ srf_updates[i].surface->update_flags.raw = 0; -+} -+ - bool dc_update_planes_and_stream(struct dc *dc, - struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_state *stream, - struct dc_stream_update *stream_update) - { -+ bool ret = false; -+ - dc_exit_ips_for_hw_access(dc); - /* - * update planes and stream version 3 separates FULL and FAST updates -@@ -5141,10 +5156,16 @@ bool dc_update_planes_and_stream(struct dc *dc, - * features as they are now transparent to the new sequence. - */ - if (dc->ctx->dce_version >= DCN_VERSION_4_01) -- return update_planes_and_stream_v3(dc, srf_updates, -+ ret = update_planes_and_stream_v3(dc, srf_updates, - surface_count, stream, stream_update); -- return update_planes_and_stream_v2(dc, srf_updates, -+ else -+ ret = update_planes_and_stream_v2(dc, srf_updates, - surface_count, stream, stream_update); -+ -+ if (ret) -+ clear_update_flags(srf_updates, surface_count, stream); -+ -+ return ret; - } - - void dc_commit_updates_for_stream(struct dc *dc, -@@ -5154,6 +5175,8 @@ void dc_commit_updates_for_stream(struct dc *dc, - struct dc_stream_update *stream_update, - struct dc_state *state) - { -+ bool ret = false; -+ - dc_exit_ips_for_hw_access(dc); - /* TODO: Since change commit sequence can have a huge impact, - * we decided to only enable it for DCN3x. However, as soon as -@@ -5161,17 +5184,17 @@ void dc_commit_updates_for_stream(struct dc *dc, - * the new sequence for all ASICs. - */ - if (dc->ctx->dce_version >= DCN_VERSION_4_01) { -- update_planes_and_stream_v3(dc, srf_updates, surface_count, -+ ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, - stream, stream_update); -- return; -- } -- if (dc->ctx->dce_version >= DCN_VERSION_3_2) { -- update_planes_and_stream_v2(dc, srf_updates, surface_count, -+ } else if (dc->ctx->dce_version >= DCN_VERSION_3_2) { -+ ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, - stream, stream_update); -- return; -- } -- update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, -- stream_update, state); -+ } else -+ ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, -+ stream_update, state); -+ -+ if (ret) -+ clear_update_flags(srf_updates, surface_count, stream); - } - - uint8_t dc_get_current_stream_count(struct dc *dc) +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +index 1bb602c4f9b3..00df025a8636 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -4711,7 +4711,10 @@ static int gfx_v10_0_sw_init(void *handle) + case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): + adev->gfx.me.num_me = 1; +- adev->gfx.me.num_pipe_per_me = 2; ++ if (amdgpu_sriov_vf(adev)) ++ adev->gfx.me.num_pipe_per_me = 2; ++ else ++ adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index 1a53bf05f8fc..495dc3729daf 100644 +index 1a53bf05f8fc..f1f69911e8ce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -140,7 +140,8 @@ int smu_set_soft_freq_range(struct smu_context *smu, @@ -10768,6 +13036,33 @@ index 1a53bf05f8fc..495dc3729daf 100644 smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; +@@ -2226,7 +2231,7 @@ static int smu_bump_power_profile_mode(struct smu_context *smu, + static int smu_adjust_power_state_dynamic(struct smu_context *smu, + enum amd_dpm_forced_level level, + bool skip_display_settings, +- bool force_update) ++ bool init) + { + int ret = 0; + int index = 0; +@@ -2255,7 +2260,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, + } + } + +- if (force_update || smu_dpm_ctx->dpm_level != level) { ++ if (smu_dpm_ctx->dpm_level != level) { + ret = smu_asic_set_performance_level(smu, level); + if (ret) { + dev_err(smu->adev->dev, "Failed to set performance level!"); +@@ -2272,7 +2277,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + workload[0] = smu->workload_setting[index]; + +- if (force_update || smu->power_profile_mode != workload[0]) ++ if (init || smu->power_profile_mode != workload[0]) + smu_bump_power_profile_mode(smu, workload, 0); + } + diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b44a185d07e8..5eb4e5c75981 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -11754,13 +14049,29 @@ index 02afeb3acce4..5aef7fa37878 100644 resource_len = pci_resource_len(pdev, bar_id); rtwpci->mmap = pci_iomap(pdev, bar_id, resource_len); +diff --git a/mm/shrinker.c b/mm/shrinker.c +index dc5d2a6fcfc4..e4b795ee6d2e 100644 +--- a/mm/shrinker.c ++++ b/mm/shrinker.c +@@ -87,8 +87,10 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) + if (!info) + goto err; + info->map_nr_max = shrinker_nr_max; +- if (shrinker_unit_alloc(info, NULL, nid)) ++ if (shrinker_unit_alloc(info, NULL, nid)) { ++ kvfree(info); + goto err; ++ } + rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); + } + mutex_unlock(&shrinker_mutex); -- 2.47.0.rc0 -From 49a2293cad3542ec1383bff6eb8d8bbc83b6c46d Mon Sep 17 00:00:00 2001 +From 7dfbd6b54cfa794699206bd5f882c85b326523a4 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:37:26 +0200 -Subject: [PATCH 05/11] intel-pstate +Date: Thu, 17 Oct 2024 16:04:25 +0200 +Subject: [PATCH 08/14] intel-pstate Signed-off-by: Peter Jung --- @@ -12241,10 +14552,10 @@ index 348a330678bd..c11be253bfa3 100644 -- 2.47.0.rc0 -From b696b1ddc76a50fca46d2f4e0f2993114042e613 Mon Sep 17 00:00:00 2001 +From 2a946474720372a10c7584d5fa279feb2edb591a Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:37:57 +0200 -Subject: [PATCH 06/11] ksm +Date: Thu, 17 Oct 2024 16:04:35 +0200 +Subject: [PATCH 09/14] ksm Signed-off-by: Peter Jung --- @@ -12674,10 +14985,10 @@ index 01071182763e..7394bad8178e 100644 -- 2.47.0.rc0 -From 98440e16d1442aa58b0817752fab19604706bd84 Mon Sep 17 00:00:00 2001 +From d97e18a40b5cdf18c6d15befa2e3aef060d2971a Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:38:18 +0200 -Subject: [PATCH 07/11] ntsync +Date: Thu, 17 Oct 2024 16:04:50 +0200 +Subject: [PATCH 10/14] ntsync Signed-off-by: Peter Jung --- @@ -13116,10 +15427,10 @@ index 000000000000..767844637a7d + ``objs`` and in ``alert``. If this is attempted, the function fails + with ``EINVAL``. diff --git a/MAINTAINERS b/MAINTAINERS -index cc40a9d9b8cd..2cd7168dc401 100644 +index c27b4e8f2129..84ad898cd8f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -16319,6 +16319,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git +@@ -16336,6 +16336,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git F: Documentation/filesystems/ntfs3.rst F: fs/ntfs3/ @@ -15763,10 +18074,10 @@ index 000000000000..5fa2c9a0768c -- 2.47.0.rc0 -From a0b3cb05c95954b4055a3327b1a00553583fbdde Mon Sep 17 00:00:00 2001 +From bc16b715a926921dcce09026210a30e444f09365 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:38:31 +0200 -Subject: [PATCH 08/11] perf-per-core +Date: Thu, 17 Oct 2024 16:05:01 +0200 +Subject: [PATCH 11/14] perf-per-core Signed-off-by: Peter Jung --- @@ -16621,7 +18932,7 @@ index b985ca79cf97..8206038a01ac 100644 } module_exit(intel_rapl_exit); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index a75a07f4931f..5a59713ec62b 100644 +index 279edfd36fed..350a5f9fbd12 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -98,6 +98,7 @@ struct cpuinfo_topology { @@ -16671,10 +18982,10 @@ index 9a6069e7133c..23722aa21e2f 100644 -- 2.47.0.rc0 -From 074c7e9eebb929b35d8f128ae1250ae0d8832eec Mon Sep 17 00:00:00 2001 +From c988ff6876f550e82bddfecf5048e3403e2d4b8e Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:39:01 +0200 -Subject: [PATCH 09/11] t2 +Date: Thu, 17 Oct 2024 16:05:21 +0200 +Subject: [PATCH 12/14] t2 Signed-off-by: Peter Jung --- @@ -16832,10 +19143,10 @@ index 4451ef501936..c726a846f752 100644 ---- diff --git a/MAINTAINERS b/MAINTAINERS -index 2cd7168dc401..16df466c205d 100644 +index 84ad898cd8f8..94083f400734 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -6895,6 +6895,12 @@ S: Supported +@@ -6912,6 +6912,12 @@ S: Supported T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/sun4i/sun8i* @@ -18696,7 +21007,7 @@ index 6e4ebc349e45..4e79fafeeafa 100644 } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index c4a6908bbe54..30df701af5da 100644 +index 847462650549..6c4cb3883955 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -72,6 +72,7 @@ MODULE_LICENSE("GPL"); @@ -18830,7 +21141,7 @@ index c4a6908bbe54..30df701af5da 100644 if (cls->is_indirect) app->mt_flags |= INPUT_MT_POINTER; -@@ -1757,6 +1779,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) +@@ -1758,6 +1780,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) } } @@ -18846,7 +21157,7 @@ index c4a6908bbe54..30df701af5da 100644 td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL); if (!td) { dev_err(&hdev->dev, "cannot allocate multitouch data\n"); -@@ -1804,10 +1835,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) +@@ -1805,10 +1836,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) timer_setup(&td->release_timer, mt_expired_timeout, 0); @@ -18857,7 +21168,7 @@ index c4a6908bbe54..30df701af5da 100644 if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) mt_fix_const_fields(hdev, HID_DG_CONTACTID); -@@ -2273,6 +2300,11 @@ static const struct hid_device_id mt_devices[] = { +@@ -2277,6 +2304,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, @@ -27086,10 +29397,10 @@ index 4427572b2477..b60c99d61882 100755 -- 2.47.0.rc0 -From 3346172fe2036b00ffdf8b707c1aba850d6825ca Mon Sep 17 00:00:00 2001 +From 1399f3445a16951721064f590b8c3fce99aa2689 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:39:25 +0200 -Subject: [PATCH 10/11] thp shrinker +Date: Thu, 17 Oct 2024 16:05:34 +0200 +Subject: [PATCH 13/14] thp-shrinker Signed-off-by: Peter Jung --- @@ -27900,10 +30211,10 @@ index 9007c420d52c..2eaed8209925 100644 -- 2.47.0.rc0 -From e9cb25e7e2c3831c56deacdf47f9b84c25b8eac8 Mon Sep 17 00:00:00 2001 +From 450642cca4b044b4f2f485f6038045c1ddd32086 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 10 Oct 2024 12:39:34 +0200 -Subject: [PATCH 11/11] zstd +Date: Thu, 17 Oct 2024 16:05:48 +0200 +Subject: [PATCH 14/14] zstd Signed-off-by: Peter Jung ---