From 5bf06daace2ce9502c0ae99268185f7bd8e71b02 Mon Sep 17 00:00:00 2001 From: ferreo Date: Mon, 20 Jan 2025 16:36:47 +0100 Subject: [PATCH] Update patches/0001-cachyos-base-all.patch --- patches/0001-cachyos-base-all.patch | 9194 +++++++++++---------------- 1 file changed, 3657 insertions(+), 5537 deletions(-) diff --git a/patches/0001-cachyos-base-all.patch b/patches/0001-cachyos-base-all.patch index 1ae782f..83b4d00 100644 --- a/patches/0001-cachyos-base-all.patch +++ b/patches/0001-cachyos-base-all.patch @@ -1,834 +1,565 @@ -From 26a3b5401e1e07e2462dca715baf251161031432 Mon Sep 17 00:00:00 2001 +From 1ec94c7b86986796d5d14135302e81dd3ddbe223 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 9 Jan 2025 16:26:47 +0100 -Subject: [PATCH 01/13] amd-cache-optimizer +Date: Mon, 20 Jan 2025 13:21:23 +0100 +Subject: [PATCH 01/12] amd-pstate Signed-off-by: Peter Jung --- - .../sysfs-bus-platform-drivers-amd_x3d_vcache | 12 ++ - MAINTAINERS | 8 + - drivers/platform/x86/amd/Kconfig | 12 ++ - drivers/platform/x86/amd/Makefile | 2 + - drivers/platform/x86/amd/x3d_vcache.c | 176 ++++++++++++++++++ - 5 files changed, 210 insertions(+) - create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache - create mode 100644 drivers/platform/x86/amd/x3d_vcache.c + drivers/cpufreq/amd-pstate-trace.h | 52 +++- + drivers/cpufreq/amd-pstate-ut.c | 12 +- + drivers/cpufreq/amd-pstate.c | 474 ++++++++++++++--------------- + drivers/cpufreq/amd-pstate.h | 3 - + 4 files changed, 276 insertions(+), 265 deletions(-) -diff --git a/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache -new file mode 100644 -index 000000000000..ac3431736f5c ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache -@@ -0,0 +1,12 @@ -+What: /sys/bus/platform/drivers/amd_x3d_vcache/AMDI0101:00/amd_x3d_mode -+Date: November 2024 -+KernelVersion: 6.13 -+Contact: Basavaraj Natikar -+Description: (RW) AMD 3D V-Cache optimizer allows users to switch CPU core -+ rankings dynamically. -+ -+ This file switches between these two modes: -+ - "frequency" cores within the faster CCD are prioritized before -+ those in the slower CCD. -+ - "cache" cores within the larger L3 CCD are prioritized before -+ those in the smaller L3 CCD. -diff --git a/MAINTAINERS b/MAINTAINERS -index 6bb4ec0c162a..a578178468f1 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -965,6 +965,14 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/ - F: drivers/infiniband/hw/efa/ - F: include/uapi/rdma/efa-abi.h +diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h +index 35f38ae67fb1..8d692415d905 100644 +--- a/drivers/cpufreq/amd-pstate-trace.h ++++ b/drivers/cpufreq/amd-pstate-trace.h +@@ -32,7 +32,6 @@ TRACE_EVENT(amd_pstate_perf, + u64 aperf, + u64 tsc, + unsigned int cpu_id, +- bool changed, + bool fast_switch + ), -+AMD 3D V-CACHE PERFORMANCE OPTIMIZER DRIVER -+M: Basavaraj Natikar -+R: Mario Limonciello -+L: platform-driver-x86@vger.kernel.org -+S: Supported -+F: Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache -+F: drivers/platform/x86/amd/x3d_vcache.c -+ - AMD ADDRESS TRANSLATION LIBRARY (ATL) - M: Yazen Ghannam - L: linux-edac@vger.kernel.org -diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig -index f88682d36447..d77600eacb05 100644 ---- a/drivers/platform/x86/amd/Kconfig -+++ b/drivers/platform/x86/amd/Kconfig -@@ -19,6 +19,18 @@ config AMD_HSMP - If you choose to compile this driver as a module the module will be - called amd_hsmp. +@@ -44,7 +43,6 @@ TRACE_EVENT(amd_pstate_perf, + aperf, + tsc, + cpu_id, +- changed, + fast_switch + ), -+config AMD_3D_VCACHE -+ tristate "AMD 3D V-Cache Performance Optimizer Driver" -+ depends on X86_64 && ACPI -+ help -+ The driver provides a sysfs interface, enabling the setting of a bias -+ that alters CPU core reordering. This bias prefers cores with higher -+ frequencies or larger L3 caches on processors supporting AMD 3D V-Cache -+ technology. -+ -+ If you choose to compile this driver as a module the module will be -+ called amd_3d_vcache. -+ - config AMD_WBRF - bool "AMD Wifi RF Band mitigations (WBRF)" - depends on ACPI -diff --git a/drivers/platform/x86/amd/Makefile b/drivers/platform/x86/amd/Makefile -index dcec0a46f8af..86d73f3bd176 100644 ---- a/drivers/platform/x86/amd/Makefile -+++ b/drivers/platform/x86/amd/Makefile -@@ -4,6 +4,8 @@ - # AMD x86 Platform-Specific Drivers - # +@@ -57,7 +55,6 @@ TRACE_EVENT(amd_pstate_perf, + __field(unsigned long long, aperf) + __field(unsigned long long, tsc) + __field(unsigned int, cpu_id) +- __field(bool, changed) + __field(bool, fast_switch) + ), -+obj-$(CONFIG_AMD_3D_VCACHE) += amd_3d_vcache.o -+amd_3d_vcache-objs := x3d_vcache.o - obj-$(CONFIG_AMD_PMC) += pmc/ - amd_hsmp-y := hsmp.o - obj-$(CONFIG_AMD_HSMP) += amd_hsmp.o -diff --git a/drivers/platform/x86/amd/x3d_vcache.c b/drivers/platform/x86/amd/x3d_vcache.c -new file mode 100644 -index 000000000000..0f6d3c54d879 ---- /dev/null -+++ b/drivers/platform/x86/amd/x3d_vcache.c -@@ -0,0 +1,176 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * AMD 3D V-Cache Performance Optimizer Driver -+ * -+ * Copyright (c) 2024, Advanced Micro Devices, Inc. -+ * All Rights Reserved. -+ * -+ * Authors: Basavaraj Natikar -+ * Perry Yuan -+ * Mario Limonciello -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static char *x3d_mode = "frequency"; -+module_param(x3d_mode, charp, 0); -+MODULE_PARM_DESC(x3d_mode, "Initial 3D-VCache mode; 'frequency' (default) or 'cache'"); -+ -+#define DSM_REVISION_ID 0 -+#define DSM_SET_X3D_MODE 1 -+ -+static guid_t x3d_guid = GUID_INIT(0xdff8e55f, 0xbcfd, 0x46fb, 0xba, 0x0a, -+ 0xef, 0xd0, 0x45, 0x0f, 0x34, 0xee); -+ -+enum amd_x3d_mode_type { -+ MODE_INDEX_FREQ, -+ MODE_INDEX_CACHE, -+}; -+ -+static const char * const amd_x3d_mode_strings[] = { -+ [MODE_INDEX_FREQ] = "frequency", -+ [MODE_INDEX_CACHE] = "cache", -+}; -+ -+struct amd_x3d_dev { -+ struct device *dev; -+ acpi_handle ahandle; -+ /* To protect x3d mode setting */ -+ struct mutex lock; -+ enum amd_x3d_mode_type curr_mode; -+}; -+ -+static int amd_x3d_get_mode(struct amd_x3d_dev *data) -+{ -+ guard(mutex)(&data->lock); -+ -+ return data->curr_mode; -+} -+ -+static int amd_x3d_mode_switch(struct amd_x3d_dev *data, int new_state) -+{ -+ union acpi_object *out, argv; -+ -+ guard(mutex)(&data->lock); -+ argv.type = ACPI_TYPE_INTEGER; -+ argv.integer.value = new_state; -+ -+ out = acpi_evaluate_dsm(data->ahandle, &x3d_guid, DSM_REVISION_ID, -+ DSM_SET_X3D_MODE, &argv); -+ if (!out) { -+ dev_err(data->dev, "failed to evaluate _DSM\n"); -+ return -EINVAL; -+ } -+ -+ data->curr_mode = new_state; -+ -+ kfree(out); -+ -+ return 0; -+} -+ -+static ssize_t amd_x3d_mode_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct amd_x3d_dev *data = dev_get_drvdata(dev); -+ int ret; -+ -+ ret = sysfs_match_string(amd_x3d_mode_strings, buf); -+ if (ret < 0) -+ return ret; -+ -+ ret = amd_x3d_mode_switch(data, ret); -+ if (ret < 0) -+ return ret; -+ -+ return count; -+} -+ -+static ssize_t amd_x3d_mode_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ struct amd_x3d_dev *data = dev_get_drvdata(dev); -+ int mode = amd_x3d_get_mode(data); -+ -+ return sysfs_emit(buf, "%s\n", amd_x3d_mode_strings[mode]); -+} -+static DEVICE_ATTR_RW(amd_x3d_mode); -+ -+static struct attribute *amd_x3d_attrs[] = { -+ &dev_attr_amd_x3d_mode.attr, -+ NULL -+}; -+ATTRIBUTE_GROUPS(amd_x3d); -+ -+static int amd_x3d_resume_handler(struct device *dev) -+{ -+ struct amd_x3d_dev *data = dev_get_drvdata(dev); -+ int ret = amd_x3d_get_mode(data); -+ -+ return amd_x3d_mode_switch(data, ret); -+} -+ -+static DEFINE_SIMPLE_DEV_PM_OPS(amd_x3d_pm, NULL, amd_x3d_resume_handler); -+ -+static const struct acpi_device_id amd_x3d_acpi_ids[] = { -+ {"AMDI0101"}, -+ { }, -+}; -+MODULE_DEVICE_TABLE(acpi, amd_x3d_acpi_ids); -+ -+static int amd_x3d_probe(struct platform_device *pdev) -+{ -+ struct amd_x3d_dev *data; -+ acpi_handle handle; -+ int ret; -+ -+ handle = ACPI_HANDLE(&pdev->dev); -+ if (!handle) -+ return -ENODEV; -+ -+ if (!acpi_check_dsm(handle, &x3d_guid, DSM_REVISION_ID, BIT(DSM_SET_X3D_MODE))) -+ return -ENODEV; -+ -+ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return -ENOMEM; -+ -+ data->dev = &pdev->dev; -+ -+ ret = devm_mutex_init(data->dev, &data->lock); -+ if (ret) -+ return ret; -+ -+ data->ahandle = handle; -+ platform_set_drvdata(pdev, data); -+ -+ ret = match_string(amd_x3d_mode_strings, ARRAY_SIZE(amd_x3d_mode_strings), x3d_mode); -+ if (ret < 0) -+ return dev_err_probe(&pdev->dev, -EINVAL, "invalid mode %s\n", x3d_mode); -+ -+ return amd_x3d_mode_switch(data, ret); -+} -+ -+static struct platform_driver amd_3d_vcache_driver = { -+ .driver = { -+ .name = "amd_x3d_vcache", -+ .dev_groups = amd_x3d_groups, -+ .acpi_match_table = amd_x3d_acpi_ids, -+ .pm = pm_sleep_ptr(&amd_x3d_pm), -+ }, -+ .probe = amd_x3d_probe, -+}; -+module_platform_driver(amd_3d_vcache_driver); -+ -+MODULE_DESCRIPTION("AMD 3D V-Cache Performance Optimizer Driver"); -+MODULE_LICENSE("GPL"); --- -2.48.0.rc1 - -From 3cf853a692e28a4760849d5b392bbeaf4245ce0b Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Thu, 9 Jan 2025 16:34:55 +0100 -Subject: [PATCH 02/13] amd-pstate - -Signed-off-by: Peter Jung ---- - Documentation/admin-guide/pm/amd-pstate.rst | 4 +- - arch/x86/include/asm/cpufeatures.h | 3 +- - arch/x86/include/asm/intel-family.h | 6 + - arch/x86/include/asm/processor.h | 18 ++ - arch/x86/include/asm/topology.h | 9 + - arch/x86/kernel/acpi/cppc.c | 23 ++ - arch/x86/kernel/cpu/debugfs.c | 1 + - arch/x86/kernel/cpu/scattered.c | 3 +- - arch/x86/kernel/cpu/topology_amd.c | 3 + - arch/x86/kernel/cpu/topology_common.c | 34 +++ - arch/x86/kernel/smpboot.c | 14 +- - arch/x86/mm/init.c | 23 +- - drivers/cpufreq/amd-pstate-ut.c | 6 +- - drivers/cpufreq/amd-pstate.c | 237 +++++++++----------- - tools/arch/x86/include/asm/cpufeatures.h | 2 +- - 15 files changed, 240 insertions(+), 146 deletions(-) - -diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst -index 210a808b74ec..412423c54f25 100644 ---- a/Documentation/admin-guide/pm/amd-pstate.rst -+++ b/Documentation/admin-guide/pm/amd-pstate.rst -@@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability `_). - In some ASICs, the highest CPPC performance is not the one in the ``_CPC`` - table, so we need to expose it to sysfs. If boost is not active, but - still supported, this maximum frequency will be larger than the one in --``cpuinfo``. On systems that support preferred core, the driver will have --different values for some cores than others and this will reflect the values --advertised by the platform at bootup. -+``cpuinfo``. - This attribute is read-only. +@@ -70,11 +67,10 @@ TRACE_EVENT(amd_pstate_perf, + __entry->aperf = aperf; + __entry->tsc = tsc; + __entry->cpu_id = cpu_id; +- __entry->changed = changed; + __entry->fast_switch = fast_switch; + ), - ``amd_pstate_lowest_nonlinear_freq`` -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 913fd3a7bac6..a7c93191b7c6 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -473,7 +473,8 @@ - #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ - #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ - #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ --#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ -+#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ -+#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ +- TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", ++ TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", + (unsigned long)__entry->min_perf, + (unsigned long)__entry->target_perf, + (unsigned long)__entry->capacity, +@@ -83,11 +79,55 @@ TRACE_EVENT(amd_pstate_perf, + (unsigned long long)__entry->aperf, + (unsigned long long)__entry->tsc, + (unsigned int)__entry->cpu_id, +- (__entry->changed) ? "true" : "false", + (__entry->fast_switch) ? "true" : "false" + ) + ); - /* - * BUG word(s) -diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h -index 1a42f829667a..736764472048 100644 ---- a/arch/x86/include/asm/intel-family.h -+++ b/arch/x86/include/asm/intel-family.h -@@ -183,4 +183,10 @@ - /* Family 19 */ - #define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ - -+/* CPU core types */ -+enum intel_cpu_type { -+ INTEL_CPU_TYPE_ATOM = 0x20, -+ INTEL_CPU_TYPE_CORE = 0x40, -+}; ++TRACE_EVENT(amd_pstate_epp_perf, + - #endif /* _ASM_X86_INTEL_FAMILY_H */ -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 2d776635aa53..20e6009381ed 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -105,6 +105,24 @@ struct cpuinfo_topology { - // Cache level topology IDs - u32 llc_id; - u32 l2c_id; ++ TP_PROTO(unsigned int cpu_id, ++ unsigned int highest_perf, ++ unsigned int epp, ++ unsigned int min_perf, ++ unsigned int max_perf, ++ bool boost ++ ), + -+ // Hardware defined CPU-type -+ union { -+ u32 cpu_type; -+ struct { -+ // CPUID.1A.EAX[23-0] -+ u32 intel_native_model_id :24; -+ // CPUID.1A.EAX[31-24] -+ u32 intel_type :8; -+ }; -+ struct { -+ // CPUID 0x80000026.EBX -+ u32 amd_num_processors :16, -+ amd_power_eff_ranking :8, -+ amd_native_model_id :4, -+ amd_type :4; -+ }; -+ }; - }; - - struct cpuinfo_x86 { -diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h -index 92f3664dd933..fd41103ad342 100644 ---- a/arch/x86/include/asm/topology.h -+++ b/arch/x86/include/asm/topology.h -@@ -114,6 +114,12 @@ enum x86_topology_domains { - TOPO_MAX_DOMAIN, - }; - -+enum x86_topology_cpu_type { -+ TOPO_CPU_TYPE_PERFORMANCE, -+ TOPO_CPU_TYPE_EFFICIENCY, -+ TOPO_CPU_TYPE_UNKNOWN, -+}; ++ TP_ARGS(cpu_id, ++ highest_perf, ++ epp, ++ min_perf, ++ max_perf, ++ boost), + - struct x86_topology_system { - unsigned int dom_shifts[TOPO_MAX_DOMAIN]; - unsigned int dom_size[TOPO_MAX_DOMAIN]; -@@ -149,6 +155,9 @@ extern unsigned int __max_threads_per_core; - extern unsigned int __num_threads_per_package; - extern unsigned int __num_cores_per_package; - -+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); -+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c); ++ TP_STRUCT__entry( ++ __field(unsigned int, cpu_id) ++ __field(unsigned int, highest_perf) ++ __field(unsigned int, epp) ++ __field(unsigned int, min_perf) ++ __field(unsigned int, max_perf) ++ __field(bool, boost) ++ ), + - static inline unsigned int topology_max_packages(void) - { - return __max_logical_packages; -diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c -index aab9d0570841..d745dd586303 100644 ---- a/arch/x86/kernel/acpi/cppc.c -+++ b/arch/x86/kernel/acpi/cppc.c -@@ -239,8 +239,10 @@ EXPORT_SYMBOL_GPL(amd_detect_prefcore); - */ - int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) - { -+ enum x86_topology_cpu_type core_type = get_topology_cpu_type(&cpu_data(cpu)); - bool prefcore; - int ret; -+ u32 tmp; - - ret = amd_detect_prefcore(&prefcore); - if (ret) -@@ -266,6 +268,27 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) - break; - } - } ++ TP_fast_assign( ++ __entry->cpu_id = cpu_id; ++ __entry->highest_perf = highest_perf; ++ __entry->epp = epp; ++ __entry->min_perf = min_perf; ++ __entry->max_perf = max_perf; ++ __entry->boost = boost; ++ ), + -+ /* detect if running on heterogeneous design */ -+ if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) { -+ switch (core_type) { -+ case TOPO_CPU_TYPE_UNKNOWN: -+ pr_warn("Undefined core type found for cpu %d\n", cpu); -+ break; -+ case TOPO_CPU_TYPE_PERFORMANCE: -+ /* use the max scale for performance cores */ -+ *numerator = CPPC_HIGHEST_PERF_PERFORMANCE; -+ return 0; -+ case TOPO_CPU_TYPE_EFFICIENCY: -+ /* use the highest perf value for efficiency cores */ -+ ret = amd_get_highest_perf(cpu, &tmp); -+ if (ret) -+ return ret; -+ *numerator = tmp; -+ return 0; -+ } -+ } ++ TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u", ++ (unsigned int)__entry->cpu_id, ++ (unsigned int)__entry->min_perf, ++ (unsigned int)__entry->max_perf, ++ (unsigned int)__entry->highest_perf, ++ (unsigned int)__entry->epp, ++ (bool)__entry->boost ++ ) ++); + - *numerator = CPPC_HIGHEST_PERF_PREFCORE; + #endif /* _AMD_PSTATE_TRACE_H */ - return 0; -diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c -index 3baf3e435834..10719aba6276 100644 ---- a/arch/x86/kernel/cpu/debugfs.c -+++ b/arch/x86/kernel/cpu/debugfs.c -@@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) - seq_printf(m, "die_id: %u\n", c->topo.die_id); - seq_printf(m, "cu_id: %u\n", c->topo.cu_id); - seq_printf(m, "core_id: %u\n", c->topo.core_id); -+ seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c)); - seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); - seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); - seq_printf(m, "llc_id: %u\n", c->topo.llc_id); -diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c -index c84c30188fdf..307a91741534 100644 ---- a/arch/x86/kernel/cpu/scattered.c -+++ b/arch/x86/kernel/cpu/scattered.c -@@ -45,13 +45,14 @@ static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, - { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, -- { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, -+ { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, - { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, - { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, - { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, - { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, - { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, - { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, -+ { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 }, - { 0, 0, 0, 0, 0 } - }; - -diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c -index 7d476fa697ca..03b3c9c3a45e 100644 ---- a/arch/x86/kernel/cpu/topology_amd.c -+++ b/arch/x86/kernel/cpu/topology_amd.c -@@ -182,6 +182,9 @@ static void parse_topology_amd(struct topo_scan *tscan) - if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) - has_topoext = cpu_parse_topology_ext(tscan); - -+ if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) -+ tscan->c->topo.cpu_type = cpuid_ebx(0x80000026); -+ - if (!has_topoext && !parse_8000_0008(tscan)) - return; - -diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c -index 9a6069e7133c..8277c64f88db 100644 ---- a/arch/x86/kernel/cpu/topology_common.c -+++ b/arch/x86/kernel/cpu/topology_common.c -@@ -3,6 +3,7 @@ - - #include - -+#include - #include - #include - #include -@@ -27,6 +28,36 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, - } - } - -+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c) -+{ -+ if (c->x86_vendor == X86_VENDOR_INTEL) { -+ switch (c->topo.intel_type) { -+ case INTEL_CPU_TYPE_ATOM: return TOPO_CPU_TYPE_EFFICIENCY; -+ case INTEL_CPU_TYPE_CORE: return TOPO_CPU_TYPE_PERFORMANCE; -+ } -+ } -+ if (c->x86_vendor == X86_VENDOR_AMD) { -+ switch (c->topo.amd_type) { -+ case 0: return TOPO_CPU_TYPE_PERFORMANCE; -+ case 1: return TOPO_CPU_TYPE_EFFICIENCY; -+ } -+ } -+ -+ return TOPO_CPU_TYPE_UNKNOWN; -+} -+ -+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c) -+{ -+ switch (get_topology_cpu_type(c)) { -+ case TOPO_CPU_TYPE_PERFORMANCE: -+ return "performance"; -+ case TOPO_CPU_TYPE_EFFICIENCY: -+ return "efficiency"; -+ default: -+ return "unknown"; -+ } -+} -+ - static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) - { - struct { -@@ -87,6 +118,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) - .cu_id = 0xff, - .llc_id = BAD_APICID, - .l2c_id = BAD_APICID, -+ .cpu_type = TOPO_CPU_TYPE_UNKNOWN, - }; - struct cpuinfo_x86 *c = tscan->c; - struct { -@@ -132,6 +164,8 @@ static void parse_topology(struct topo_scan *tscan, bool early) - case X86_VENDOR_INTEL: - if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) - parse_legacy(tscan); -+ if (c->cpuid_level >= 0x1a) -+ c->topo.cpu_type = cpuid_eax(0x1a); - break; - case X86_VENDOR_HYGON: - if (IS_ENABLED(CONFIG_CPU_SUP_HYGON)) -diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c -index 766f092dab80..419e7ae09639 100644 ---- a/arch/x86/kernel/smpboot.c -+++ b/arch/x86/kernel/smpboot.c -@@ -62,6 +62,8 @@ - #include - #include - -+#include -+ - #include - #include - #include -@@ -498,7 +500,17 @@ static int x86_cluster_flags(void) - static int x86_die_flags(void) - { - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) -- return x86_sched_itmt_flags(); -+ return x86_sched_itmt_flags(); -+ -+ switch (boot_cpu_data.x86_vendor) { -+ case X86_VENDOR_AMD: -+ case X86_VENDOR_HYGON: -+ bool prefcore = false; -+ -+ amd_detect_prefcore(&prefcore); -+ if (prefcore || cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) -+ return x86_sched_itmt_flags(); -+ }; - - return 0; - } -diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c -index eb503f53c319..101725c149c4 100644 ---- a/arch/x86/mm/init.c -+++ b/arch/x86/mm/init.c -@@ -263,28 +263,33 @@ static void __init probe_page_size_mask(void) - } - - /* -- * INVLPG may not properly flush Global entries -- * on these CPUs when PCIDs are enabled. -+ * INVLPG may not properly flush Global entries on -+ * these CPUs. New microcode fixes the issue. - */ - static const struct x86_cpu_id invlpg_miss_ids[] = { -- X86_MATCH_VFM(INTEL_ALDERLAKE, 0), -- X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0), -- X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0), -- X86_MATCH_VFM(INTEL_RAPTORLAKE, 0), -- X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0), -- X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0), -+ X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e), -+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c), -+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11), -+ X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118), -+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117), -+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e), - {} - }; - - static void setup_pcid(void) - { -+ const struct x86_cpu_id *invlpg_miss_match; -+ - if (!IS_ENABLED(CONFIG_X86_64)) - return; - - if (!boot_cpu_has(X86_FEATURE_PCID)) - return; - -- if (x86_match_cpu(invlpg_miss_ids)) { -+ invlpg_miss_match = x86_match_cpu(invlpg_miss_ids); -+ -+ if (invlpg_miss_match && -+ boot_cpu_data.microcode < invlpg_miss_match->driver_data) { - pr_info("Incomplete global flushes, disabling PCID"); - setup_clear_cpu_cap(X86_FEATURE_PCID); - return; + /* This part must be outside protection */ diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c -index f66701514d90..a261d7300951 100644 +index a261d7300951..3a0a380c3590 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -227,10 +227,10 @@ static void amd_pstate_ut_check_freq(u32 index) - goto skip_test; - } +@@ -207,7 +207,6 @@ static void amd_pstate_ut_check_freq(u32 index) + int cpu = 0; + struct cpufreq_policy *policy = NULL; + struct amd_cpudata *cpudata = NULL; +- u32 nominal_freq_khz; -- if (cpudata->min_freq != policy->min) { -+ if (cpudata->lowest_nonlinear_freq != policy->min) { + for_each_possible_cpu(cpu) { + policy = cpufreq_cpu_get(cpu); +@@ -215,14 +214,13 @@ static void amd_pstate_ut_check_freq(u32 index) + break; + cpudata = policy->driver_data; + +- nominal_freq_khz = cpudata->nominal_freq*1000; +- if (!((cpudata->max_freq >= nominal_freq_khz) && +- (nominal_freq_khz > cpudata->lowest_nonlinear_freq) && ++ if (!((cpudata->max_freq >= cpudata->nominal_freq) && ++ (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && + (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && + (cpudata->min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; -- pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n", -- __func__, cpu, cpudata->min_freq, policy->min); -+ pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n", -+ __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min); + pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", +- __func__, cpu, cpudata->max_freq, nominal_freq_khz, ++ __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, + cpudata->lowest_nonlinear_freq, cpudata->min_freq); goto skip_test; } +@@ -236,13 +234,13 @@ static void amd_pstate_ut_check_freq(u32 index) + if (cpudata->boost_supported) { + if ((policy->max == cpudata->max_freq) || +- (policy->max == nominal_freq_khz)) ++ (policy->max == cpudata->nominal_freq)) + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + else { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", + __func__, cpu, policy->max, cpudata->max_freq, +- nominal_freq_khz); ++ cpudata->nominal_freq); + goto skip_test; + } + } else { diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c -index 91d3c3b1c2d3..f6d04eb40af9 100644 +index 66e5dfc711c0..2330903a8b45 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c -@@ -233,7 +233,7 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) - return index; - } +@@ -22,6 +22,7 @@ --static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, -+static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + ++#include + #include + #include + #include +@@ -88,6 +89,11 @@ static bool cppc_enabled; + static bool amd_pstate_prefcore = true; + static struct quirk_entry *quirks; + ++#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) ++#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) ++#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) ++#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) ++ + /* + * AMD Energy Preference Performance (EPP) + * The EPP is used in the CCLK DPM controller to drive +@@ -180,120 +186,145 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) + static DEFINE_MUTEX(amd_pstate_limits_lock); + static DEFINE_MUTEX(amd_pstate_driver_lock); + +-static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) ++static s16 msr_get_epp(struct amd_cpudata *cpudata) { - if (fast_switch) -@@ -243,7 +243,7 @@ static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - READ_ONCE(cpudata->cppc_req_cached)); +- u64 epp; ++ u64 value; + int ret; + +- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { +- if (!cppc_req_cached) { +- epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, +- &cppc_req_cached); +- if (epp) +- return epp; +- } +- epp = (cppc_req_cached >> 24) & 0xFF; +- } else { +- ret = cppc_get_epp_perf(cpudata->cpu, &epp); +- if (ret < 0) { +- pr_debug("Could not retrieve energy perf value (%d)\n", ret); +- return -EIO; +- } ++ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); ++ if (ret < 0) { ++ pr_debug("Could not retrieve energy perf value (%d)\n", ret); ++ return ret; + } + +- return (s16)(epp & 0xff); ++ return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); } --DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); -+DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); - - static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, -@@ -306,11 +306,17 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - return ret; - } - --static inline int pstate_enable(bool enable) -+static inline int msr_cppc_enable(bool enable) +-static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) ++DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); ++ ++static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) { - int ret, cpu; - unsigned long logical_proc_id_mask = 0; +- s16 epp; +- int index = -EINVAL; ++ return static_call(amd_pstate_get_epp)(cpudata); ++} -+ /* -+ * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. -+ */ -+ if (!enable) +- epp = amd_pstate_get_epp(cpudata, 0); +- if (epp < 0) +- return epp; ++static s16 shmem_get_epp(struct amd_cpudata *cpudata) ++{ ++ u64 epp; ++ int ret; + +- switch (epp) { +- case AMD_CPPC_EPP_PERFORMANCE: +- index = EPP_INDEX_PERFORMANCE; +- break; +- case AMD_CPPC_EPP_BALANCE_PERFORMANCE: +- index = EPP_INDEX_BALANCE_PERFORMANCE; +- break; +- case AMD_CPPC_EPP_BALANCE_POWERSAVE: +- index = EPP_INDEX_BALANCE_POWERSAVE; +- break; +- case AMD_CPPC_EPP_POWERSAVE: +- index = EPP_INDEX_POWERSAVE; +- break; +- default: +- break; ++ ret = cppc_get_epp_perf(cpudata->cpu, &epp); ++ if (ret < 0) { ++ pr_debug("Could not retrieve energy perf value (%d)\n", ret); ++ return ret; + } + +- return index; ++ return (s16)(epp & 0xff); + } + +-static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, +- u32 des_perf, u32 max_perf, bool fast_switch) ++static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, ++ u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) + { +- if (fast_switch) +- wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); +- else +- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, +- READ_ONCE(cpudata->cppc_req_cached)); ++ u64 value, prev; ++ ++ value = prev = READ_ONCE(cpudata->cppc_req_cached); ++ ++ value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | ++ AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); ++ value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); ++ value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); ++ value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); ++ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); ++ ++ if (value == prev) + return 0; + - if (enable == cppc_enabled) - return 0; - -@@ -332,7 +338,7 @@ static inline int pstate_enable(bool enable) - return 0; ++ if (fast_switch) { ++ wrmsrl(MSR_AMD_CPPC_REQ, value); ++ return 0; ++ } else { ++ int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); ++ ++ if (ret) ++ return ret; ++ } ++ ++ WRITE_ONCE(cpudata->cppc_req_cached, value); ++ WRITE_ONCE(cpudata->epp_cached, epp); ++ ++ return 0; } --static int cppc_enable(bool enable) -+static int shmem_cppc_enable(bool enable) + DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); + +-static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, ++static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, +- u32 max_perf, bool fast_switch) ++ u32 max_perf, u32 epp, ++ bool fast_switch) { - int cpu, ret = 0; - struct cppc_perf_ctrls perf_ctrls; -@@ -359,24 +365,28 @@ static int cppc_enable(bool enable) +- static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, +- max_perf, fast_switch); ++ return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, ++ max_perf, epp, fast_switch); + } + +-static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) ++static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) + { ++ u64 value, prev; + int ret; +- struct cppc_perf_ctrls perf_ctrls; + +- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { +- u64 value = READ_ONCE(cpudata->cppc_req_cached); +- +- value &= ~GENMASK_ULL(31, 24); +- value |= (u64)epp << 24; +- WRITE_ONCE(cpudata->cppc_req_cached, value); ++ value = prev = READ_ONCE(cpudata->cppc_req_cached); ++ value &= ~AMD_CPPC_EPP_PERF_MASK; ++ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + +- ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); +- if (!ret) +- cpudata->epp_cached = epp; +- } else { +- amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, +- cpudata->max_limit_perf, false); ++ if (value == prev) ++ return 0; + +- perf_ctrls.energy_perf = epp; +- ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); +- if (ret) { +- pr_debug("failed to set energy perf value (%d)\n", ret); +- return ret; +- } +- cpudata->epp_cached = epp; ++ ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); ++ if (ret) { ++ pr_err("failed to set energy perf value (%d)\n", ret); ++ return ret; + } + ++ /* update both so that msr_update_perf() can effectively check */ ++ WRITE_ONCE(cpudata->epp_cached, epp); ++ WRITE_ONCE(cpudata->cppc_req_cached, value); ++ return ret; } --DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); -+DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); - --static inline int amd_pstate_enable(bool enable) -+static inline int amd_pstate_cppc_enable(bool enable) - { -- return static_call(amd_pstate_enable)(enable); -+ return static_call(amd_pstate_cppc_enable)(enable); - } - --static int pstate_init_perf(struct amd_cpudata *cpudata) -+static int msr_init_perf(struct amd_cpudata *cpudata) - { -- u64 cap1; -+ u64 cap1, numerator; - - int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, - &cap1); - if (ret) - return ret; - -- WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); -- WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1)); -+ ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); -+ if (ret) -+ return ret; +-static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, +- int pref_index) ++DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); + -+ WRITE_ONCE(cpudata->highest_perf, numerator); -+ WRITE_ONCE(cpudata->max_limit_perf, numerator); - WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); -@@ -385,16 +395,21 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) - return 0; - } - --static int cppc_init_perf(struct amd_cpudata *cpudata) -+static int shmem_init_perf(struct amd_cpudata *cpudata) - { - struct cppc_perf_caps cppc_perf; -+ u64 numerator; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; - -- WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf); -- WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf); -+ ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); -+ if (ret) -+ return ret; ++static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) ++{ ++ return static_call(amd_pstate_set_epp)(cpudata, epp); ++} + -+ WRITE_ONCE(cpudata->highest_perf, numerator); -+ WRITE_ONCE(cpudata->max_limit_perf, numerator); - WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, - cppc_perf.lowest_nonlinear_perf); -@@ -420,14 +435,14 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) - return ret; ++static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) + { +- int epp = -EINVAL; + int ret; ++ struct cppc_perf_ctrls perf_ctrls; ++ ++ if (epp == cpudata->epp_cached) ++ return 0; ++ ++ perf_ctrls.energy_perf = epp; ++ ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); ++ if (ret) { ++ pr_debug("failed to set energy perf value (%d)\n", ret); ++ return ret; ++ } ++ WRITE_ONCE(cpudata->epp_cached, epp); ++ ++ return ret; ++} ++ ++static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, ++ int pref_index) ++{ ++ struct amd_cpudata *cpudata = policy->driver_data; ++ int epp; + + if (!pref_index) + epp = cpudata->epp_default; +- +- if (epp == -EINVAL) ++ else + epp = epp_values[pref_index]; + + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { +@@ -301,9 +332,15 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, + return -EBUSY; + } + +- ret = amd_pstate_set_epp(cpudata, epp); ++ if (trace_amd_pstate_epp_perf_enabled()) { ++ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, ++ epp, ++ FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), ++ FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), ++ policy->boost_enabled); ++ } + +- return ret; ++ return amd_pstate_set_epp(cpudata, epp); } --DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); -+DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); - - static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) - { + static inline int msr_cppc_enable(bool enable) +@@ -442,17 +479,23 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } --static void cppc_update_perf(struct amd_cpudata *cpudata, -+static void shmem_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +-static void shmem_update_perf(struct amd_cpudata *cpudata, +- u32 min_perf, u32 des_perf, +- u32 max_perf, bool fast_switch) ++static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, ++ u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { -@@ -527,25 +542,41 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, + struct cppc_perf_ctrls perf_ctrls; + ++ if (cppc_state == AMD_PSTATE_ACTIVE) { ++ int ret = shmem_set_epp(cpudata, epp); ++ ++ if (ret) ++ return ret; ++ } ++ + perf_ctrls.max_perf = max_perf; + perf_ctrls.min_perf = min_perf; + perf_ctrls.desired_perf = des_perf; + +- cppc_set_perf(cpudata->cpu, &perf_ctrls); ++ return cppc_set_perf(cpudata->cpu, &perf_ctrls); + } + + static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) +@@ -493,14 +536,8 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, + { + unsigned long max_freq; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); +- u64 prev = READ_ONCE(cpudata->cppc_req_cached); + u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); +- u64 value = prev; + +- min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, +- cpudata->max_limit_perf); +- max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, +- cpudata->max_limit_perf); + des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + + max_freq = READ_ONCE(cpudata->max_limit_freq); +@@ -511,34 +548,18 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, + des_perf = 0; + } + +- value &= ~AMD_CPPC_MIN_PERF(~0L); +- value |= AMD_CPPC_MIN_PERF(min_perf); +- +- value &= ~AMD_CPPC_DES_PERF(~0L); +- value |= AMD_CPPC_DES_PERF(des_perf); +- + /* limit the max perf when core performance boost feature is disabled */ + if (!cpudata->boost_supported) + max_perf = min_t(unsigned long, nominal_perf, max_perf); + +- value &= ~AMD_CPPC_MAX_PERF(~0L); +- value |= AMD_CPPC_MAX_PERF(max_perf); +- + if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { + trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, + cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, +- cpudata->cpu, (value != prev), fast_switch); ++ cpudata->cpu, fast_switch); + } + +- if (value == prev) +- goto cpufreq_policy_put; ++ amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); + +- WRITE_ONCE(cpudata->cppc_req_cached, value); +- +- amd_pstate_update_perf(cpudata, min_perf, des_perf, +- max_perf, fast_switch); +- +-cpufreq_policy_put: cpufreq_cpu_put(policy); } --static int amd_pstate_verify(struct cpufreq_policy_data *policy) -+static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) - { -- cpufreq_verify_within_cpu_limits(policy); -+ /* -+ * Initialize lower frequency limit (i.e.policy->min) with -+ * lowest_nonlinear_frequency which is the most energy efficient -+ * frequency. Override the initial value set by cpufreq core and -+ * amd-pstate qos_requests. -+ */ -+ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { -+ struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); -+ struct amd_cpudata *cpudata; -+ -+ if (!policy) -+ return -EINVAL; -+ -+ cpudata = policy->driver_data; -+ policy_data->min = cpudata->lowest_nonlinear_freq; -+ cpufreq_cpu_put(policy); -+ } -+ -+ cpufreq_verify_within_cpu_limits(policy_data); -+ pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); - - return 0; - } +@@ -570,7 +591,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { -- u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf; -+ u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; +- u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; ++ u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; -- if (cpudata->boost_supported && !policy->boost_enabled) -- max_perf = READ_ONCE(cpudata->nominal_perf); -- else -- max_perf = READ_ONCE(cpudata->highest_perf); -- -- max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq); -- min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq); -+ max_perf = READ_ONCE(cpudata->highest_perf); -+ max_freq = READ_ONCE(cpudata->max_freq); -+ max_limit_perf = div_u64(policy->max * max_perf, max_freq); -+ min_limit_perf = div_u64(policy->min * max_perf, max_freq); + max_perf = READ_ONCE(cpudata->highest_perf); +@@ -578,12 +599,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) + max_limit_perf = div_u64(policy->max * max_perf, max_freq); + min_limit_perf = div_u64(policy->min * max_perf, max_freq); - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) -@@ -771,7 +802,7 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +- lowest_perf = READ_ONCE(cpudata->lowest_perf); +- if (min_limit_perf < lowest_perf) +- min_limit_perf = lowest_perf; +- +- if (max_limit_perf < min_limit_perf) +- max_limit_perf = min_limit_perf; ++ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) ++ min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); + + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); +@@ -704,8 +721,8 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) + + if (on) + policy->cpuinfo.max_freq = max_freq; +- else if (policy->cpuinfo.max_freq > nominal_freq * 1000) +- policy->cpuinfo.max_freq = nominal_freq * 1000; ++ else if (policy->cpuinfo.max_freq > nominal_freq) ++ policy->cpuinfo.max_freq = nominal_freq; + + policy->max = policy->cpuinfo.max_freq; + +@@ -727,12 +744,11 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -EOPNOTSUPP; + } +- mutex_lock(&amd_pstate_driver_lock); ++ guard(mutex)(&amd_pstate_driver_lock); ++ + ret = amd_pstate_cpu_boost_update(policy, state); +- WRITE_ONCE(cpudata->boost_state, !ret ? state : false); + policy->boost_enabled = !ret ? state : false; + refresh_frequency_limits(policy); +- mutex_unlock(&amd_pstate_driver_lock); + + return ret; + } +@@ -752,9 +768,6 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) + goto exit_err; + } + +- /* at least one CPU supports CPB, even if others fail later on to set up */ +- current_pstate_driver->boost_enabled = true; +- + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); + if (ret) { + pr_err_once("failed to read initial CPU boost state!\n"); +@@ -802,7 +815,7 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) * sched_set_itmt_support(true) has been called and it is valid to * update them at any time after it has been called. */ @@ -837,388 +568,479 @@ index 91d3c3b1c2d3..f6d04eb40af9 100644 schedule_work(&sched_prefcore_work); } -@@ -825,7 +856,7 @@ static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) +@@ -823,7 +836,8 @@ static void amd_pstate_update_limits(unsigned int cpu) + if (!amd_pstate_prefcore) + return; - transition_delay_ns = cppc_get_transition_latency(cpu); - if (transition_delay_ns == CPUFREQ_ETERNAL) { -- if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC)) -+ if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) - return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; - else - return AMD_PSTATE_TRANSITION_DELAY; -@@ -864,7 +895,6 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) - { - int ret; - u32 min_freq, max_freq; -- u64 numerator; - u32 nominal_perf, nominal_freq; - u32 lowest_nonlinear_perf, lowest_nonlinear_freq; - u32 boost_ratio, lowest_nonlinear_ratio; -@@ -886,10 +916,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) +- mutex_lock(&amd_pstate_driver_lock); ++ guard(mutex)(&amd_pstate_driver_lock); ++ + ret = amd_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; +@@ -843,7 +857,6 @@ static void amd_pstate_update_limits(unsigned int cpu) + if (!highest_perf_changed) + cpufreq_update_policy(cpu); + +- mutex_unlock(&amd_pstate_driver_lock); + } + + /* +@@ -905,29 +918,29 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) + return ret; + + if (quirks && quirks->lowest_freq) +- min_freq = quirks->lowest_freq * 1000; ++ min_freq = quirks->lowest_freq; + else +- min_freq = cppc_perf.lowest_freq * 1000; ++ min_freq = cppc_perf.lowest_freq; + + if (quirks && quirks->nominal_freq) +- nominal_freq = quirks->nominal_freq ; ++ nominal_freq = quirks->nominal_freq; + else + nominal_freq = cppc_perf.nominal_freq; nominal_perf = READ_ONCE(cpudata->nominal_perf); -- ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); -- if (ret) -- return ret; -- boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf); -+ boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); +- max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; ++ max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); -@@ -979,7 +1006,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - policy->fast_switch_possible = true; + lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); +- lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; ++ lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], -- FREQ_QOS_MIN, policy->cpuinfo.min_freq); -+ FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); - if (ret < 0) { - dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); - goto free_cpudata1; -@@ -1023,7 +1050,7 @@ static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) +- WRITE_ONCE(cpudata->min_freq, min_freq); +- WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); +- WRITE_ONCE(cpudata->nominal_freq, nominal_freq); +- WRITE_ONCE(cpudata->max_freq, max_freq); ++ WRITE_ONCE(cpudata->min_freq, min_freq * 1000); ++ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); ++ WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); ++ WRITE_ONCE(cpudata->max_freq, max_freq * 1000); + + /** + * Below values need to be initialized correctly, otherwise driver will fail to load +@@ -937,13 +950,13 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) + */ + if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { + pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", +- min_freq, max_freq, nominal_freq * 1000); ++ min_freq, max_freq, nominal_freq); + return -EINVAL; + } + +- if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { ++ if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { + pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", +- lowest_nonlinear_freq, min_freq, nominal_freq * 1000); ++ lowest_nonlinear_freq, min_freq, nominal_freq); + return -EINVAL; + } + +@@ -1160,7 +1173,6 @@ static ssize_t show_energy_performance_available_preferences( + static ssize_t store_energy_performance_preference( + struct cpufreq_policy *policy, const char *buf, size_t count) { - int ret; +- struct amd_cpudata *cpudata = policy->driver_data; + char str_preference[21]; + ssize_t ret; -- ret = amd_pstate_enable(true); -+ ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd-pstate during resume, return %d\n", ret); +@@ -1172,11 +1184,11 @@ static ssize_t store_energy_performance_preference( + if (ret < 0) + return -EINVAL; -@@ -1034,7 +1061,7 @@ static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) - { - int ret; +- mutex_lock(&amd_pstate_limits_lock); +- ret = amd_pstate_set_energy_pref_index(cpudata, ret); +- mutex_unlock(&amd_pstate_limits_lock); ++ guard(mutex)(&amd_pstate_limits_lock); ++ ++ ret = amd_pstate_set_energy_pref_index(policy, ret); -- ret = amd_pstate_enable(false); -+ ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); - -@@ -1167,25 +1194,41 @@ static ssize_t show_energy_performance_preference( - - static void amd_pstate_driver_cleanup(void) - { -- amd_pstate_enable(false); -+ amd_pstate_cppc_enable(false); - cppc_state = AMD_PSTATE_DISABLE; - current_pstate_driver = NULL; +- return ret ?: count; ++ return ret ? ret : count; } -+static int amd_pstate_set_driver(int mode_idx) -+{ -+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { -+ cppc_state = mode_idx; -+ if (cppc_state == AMD_PSTATE_DISABLE) -+ pr_info("driver is explicitly disabled\n"); -+ -+ if (cppc_state == AMD_PSTATE_ACTIVE) -+ current_pstate_driver = &amd_pstate_epp_driver; -+ -+ if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) -+ current_pstate_driver = &amd_pstate_driver; -+ -+ return 0; + static ssize_t show_energy_performance_preference( +@@ -1185,9 +1197,22 @@ static ssize_t show_energy_performance_preference( + struct amd_cpudata *cpudata = policy->driver_data; + int preference; + +- preference = amd_pstate_get_energy_pref_index(cpudata); +- if (preference < 0) +- return preference; ++ switch (cpudata->epp_cached) { ++ case AMD_CPPC_EPP_PERFORMANCE: ++ preference = EPP_INDEX_PERFORMANCE; ++ break; ++ case AMD_CPPC_EPP_BALANCE_PERFORMANCE: ++ preference = EPP_INDEX_BALANCE_PERFORMANCE; ++ break; ++ case AMD_CPPC_EPP_BALANCE_POWERSAVE: ++ preference = EPP_INDEX_BALANCE_POWERSAVE; ++ break; ++ case AMD_CPPC_EPP_POWERSAVE: ++ preference = EPP_INDEX_POWERSAVE; ++ break; ++ default: ++ return -EINVAL; + } + + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); + } +@@ -1236,6 +1261,9 @@ static int amd_pstate_register_driver(int mode) + return ret; + } + ++ /* at least one CPU supports CPB */ ++ current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + -+ return -EINVAL; -+} -+ - static int amd_pstate_register_driver(int mode) + ret = cpufreq_register_driver(current_pstate_driver); + if (ret) { + amd_pstate_driver_cleanup(); +@@ -1340,13 +1368,10 @@ EXPORT_SYMBOL_GPL(amd_pstate_update_status); + static ssize_t status_show(struct device *dev, + struct device_attribute *attr, char *buf) { +- ssize_t ret; + +- mutex_lock(&amd_pstate_driver_lock); +- ret = amd_pstate_show_status(buf); +- mutex_unlock(&amd_pstate_driver_lock); ++ guard(mutex)(&amd_pstate_driver_lock); + +- return ret; ++ return amd_pstate_show_status(buf); + } + + static ssize_t status_store(struct device *a, struct device_attribute *b, +@@ -1355,9 +1380,8 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, + char *p = memchr(buf, '\n', count); int ret; -- if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) -- current_pstate_driver = &amd_pstate_driver; -- else if (mode == AMD_PSTATE_ACTIVE) -- current_pstate_driver = &amd_pstate_epp_driver; +- mutex_lock(&amd_pstate_driver_lock); ++ guard(mutex)(&amd_pstate_driver_lock); + ret = amd_pstate_update_status(buf, p ? p - buf : count); +- mutex_unlock(&amd_pstate_driver_lock); + + return ret < 0 ? ret : count; + } +@@ -1451,7 +1475,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) + return -ENOMEM; + + cpudata->cpu = policy->cpu; +- cpudata->epp_policy = 0; + + ret = amd_pstate_init_perf(cpudata); + if (ret) +@@ -1477,8 +1500,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) + + policy->driver_data = cpudata; + +- cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); +- + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + +@@ -1489,10 +1510,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) + * the default cpufreq governor is neither powersave nor performance. + */ + if (amd_pstate_acpi_pm_profile_server() || +- amd_pstate_acpi_pm_profile_undefined()) ++ amd_pstate_acpi_pm_profile_undefined()) { + policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else -- return -EINVAL; -+ ret = amd_pstate_set_driver(mode); ++ cpudata->epp_default = amd_pstate_get_epp(cpudata); ++ } else { + policy->policy = CPUFREQ_POLICY_POWERSAVE; ++ cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; ++ } + + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); +@@ -1505,6 +1529,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) + return ret; + WRITE_ONCE(cpudata->cppc_cap1_cached, value); + } ++ ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + if (ret) + return ret; - cppc_state = mode; + current_pstate_driver->adjust_perf = NULL; -- ret = amd_pstate_enable(true); -+ ret = amd_pstate_cppc_enable(true); - if (ret) { - pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", - ret); -@@ -1463,6 +1506,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - WRITE_ONCE(cpudata->cppc_cap1_cached, value); - } - -+ current_pstate_driver->adjust_perf = NULL; -+ - return 0; - - free_cpudata1: -@@ -1485,26 +1530,13 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) +@@ -1530,51 +1557,24 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; -- u32 max_perf, min_perf, min_limit_perf, max_limit_perf; -+ u32 max_perf, min_perf; - u64 value; - s16 epp; +- u32 max_perf, min_perf; +- u64 value; +- s16 epp; ++ u32 epp; -- if (cpudata->boost_supported && !policy->boost_enabled) -- max_perf = READ_ONCE(cpudata->nominal_perf); -- else -- max_perf = READ_ONCE(cpudata->highest_perf); -+ max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); -- max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq); -- min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq); -- -- if (min_limit_perf < min_perf) -- min_limit_perf = min_perf; -- -- if (max_limit_perf < min_limit_perf) -- max_limit_perf = min_limit_perf; -- -- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); -- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); -+ amd_pstate_update_min_max_limit(policy); +- max_perf = READ_ONCE(cpudata->highest_perf); +- min_perf = READ_ONCE(cpudata->lowest_perf); + amd_pstate_update_min_max_limit(policy); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); -@@ -1541,12 +1573,6 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) +- max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, +- cpudata->max_limit_perf); +- min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, +- cpudata->max_limit_perf); +- value = READ_ONCE(cpudata->cppc_req_cached); +- if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; - -- /* Set initial EPP value */ -- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { -- value &= ~GENMASK_ULL(31, 24); -- value |= (u64)epp << 24; -- } +- min_perf = min(cpudata->nominal_perf, max_perf); - - WRITE_ONCE(cpudata->cppc_req_cached, value); - return amd_pstate_set_epp(cpudata, epp); - } -@@ -1583,7 +1609,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) - u64 value, max_perf; - int ret; +- /* Initial min/max values for CPPC Performance Controls Register */ +- value &= ~AMD_CPPC_MIN_PERF(~0L); +- value |= AMD_CPPC_MIN_PERF(min_perf); +- +- value &= ~AMD_CPPC_MAX_PERF(~0L); +- value |= AMD_CPPC_MAX_PERF(max_perf); +- +- /* CPPC EPP feature require to set zero to the desire perf bit */ +- value &= ~AMD_CPPC_DES_PERF(~0L); +- value |= AMD_CPPC_DES_PERF(0); +- +- cpudata->epp_policy = cpudata->policy; ++ epp = 0; ++ else ++ epp = READ_ONCE(cpudata->epp_cached); -- ret = amd_pstate_enable(true); -+ ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); - -@@ -1594,8 +1620,9 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; -- perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_perf(cpudata->cpu, &perf_ctrls); -+ perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); -+ cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); +- /* Get BIOS pre-defined epp value */ +- epp = amd_pstate_get_epp(cpudata, value); +- if (epp < 0) { +- /** +- * This return value can only be negative for shared_memory +- * systems where EPP register read/write not supported. +- */ +- return epp; ++ if (trace_amd_pstate_epp_perf_enabled()) { ++ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, ++ cpudata->min_limit_perf, ++ cpudata->max_limit_perf, ++ policy->boost_enabled); } + +- if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) +- epp = 0; +- +- WRITE_ONCE(cpudata->cppc_req_cached, value); +- return amd_pstate_set_epp(cpudata, epp); ++ return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, ++ cpudata->max_limit_perf, epp, false); } -@@ -1635,9 +1662,11 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; -+ perf_ctrls.min_perf = min_perf; - perf_ctrls.max_perf = min_perf; -- perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_perf(cpudata->cpu, &perf_ctrls); -+ perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); -+ cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } - mutex_unlock(&amd_pstate_limits_lock); - } -@@ -1657,13 +1686,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) + static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) +@@ -1603,87 +1603,63 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } --static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) --{ -- cpufreq_verify_within_cpu_limits(policy); -- pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); -- return 0; --} -- - static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +-static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) ++static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) + { +- struct cppc_perf_ctrls perf_ctrls; +- u64 value, max_perf; ++ struct amd_cpudata *cpudata = policy->driver_data; ++ u64 max_perf; + int ret; + + ret = amd_pstate_cppc_enable(true); + if (ret) + pr_err("failed to enable amd pstate during resume, return %d\n", ret); + +- value = READ_ONCE(cpudata->cppc_req_cached); + max_perf = READ_ONCE(cpudata->highest_perf); + +- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { +- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); +- } else { +- perf_ctrls.max_perf = max_perf; +- cppc_set_perf(cpudata->cpu, &perf_ctrls); +- perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); +- cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); ++ if (trace_amd_pstate_epp_perf_enabled()) { ++ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, ++ cpudata->epp_cached, ++ FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), ++ max_perf, policy->boost_enabled); + } ++ ++ return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); + } + + static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; -@@ -1677,7 +1699,7 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) - cpudata->suspended = true; ++ int ret; - /* disable CPPC in lowlevel firmware */ -- ret = amd_pstate_enable(false); -+ ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to suspend, return %d\n", ret); + pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); -@@ -1719,7 +1741,7 @@ static struct cpufreq_driver amd_pstate_driver = { - - static struct cpufreq_driver amd_pstate_epp_driver = { - .flags = CPUFREQ_CONST_LOOPS, -- .verify = amd_pstate_epp_verify_policy, -+ .verify = amd_pstate_verify, - .setpolicy = amd_pstate_epp_set_policy, - .init = amd_pstate_epp_cpu_init, - .exit = amd_pstate_epp_cpu_exit, -@@ -1733,26 +1755,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { - .attr = amd_pstate_epp_attr, - }; - --static int __init amd_pstate_set_driver(int mode_idx) --{ -- if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { -- cppc_state = mode_idx; -- if (cppc_state == AMD_PSTATE_DISABLE) -- pr_info("driver is explicitly disabled\n"); -- -- if (cppc_state == AMD_PSTATE_ACTIVE) -- current_pstate_driver = &amd_pstate_epp_driver; -- -- if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) -- current_pstate_driver = &amd_pstate_driver; -- -- return 0; +- if (cppc_state == AMD_PSTATE_ACTIVE) { +- amd_pstate_epp_reenable(cpudata); +- cpudata->suspended = false; - } ++ ret = amd_pstate_epp_reenable(policy); ++ if (ret) ++ return ret; ++ cpudata->suspended = false; + + return 0; + } + +-static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +-{ +- struct amd_cpudata *cpudata = policy->driver_data; +- struct cppc_perf_ctrls perf_ctrls; +- int min_perf; +- u64 value; - -- return -EINVAL; +- min_perf = READ_ONCE(cpudata->lowest_perf); +- value = READ_ONCE(cpudata->cppc_req_cached); +- +- mutex_lock(&amd_pstate_limits_lock); +- if (cpu_feature_enabled(X86_FEATURE_CPPC)) { +- cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; +- +- /* Set max perf same as min perf */ +- value &= ~AMD_CPPC_MAX_PERF(~0L); +- value |= AMD_CPPC_MAX_PERF(min_perf); +- value &= ~AMD_CPPC_MIN_PERF(~0L); +- value |= AMD_CPPC_MIN_PERF(min_perf); +- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); +- } else { +- perf_ctrls.desired_perf = 0; +- perf_ctrls.min_perf = min_perf; +- perf_ctrls.max_perf = min_perf; +- cppc_set_perf(cpudata->cpu, &perf_ctrls); +- perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); +- cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); +- } +- mutex_unlock(&amd_pstate_limits_lock); -} - --/** -+/* - * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. - * show the debug message that helps to check if the CPU has CPPC support for loading issue. - */ -@@ -1842,10 +1845,10 @@ static int __init amd_pstate_init(void) - if (cppc_state == AMD_PSTATE_UNDEFINED) { - /* Disable on the following configs by default: - * 1. Undefined platforms -- * 2. Server platforms -+ * 2. Server platforms with CPUs older than Family 0x1A. - */ - if (amd_pstate_acpi_pm_profile_undefined() || -- amd_pstate_acpi_pm_profile_server()) { -+ (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) { - pr_info("driver load is disabled, boot with specific mode to enable this\n"); - return -ENODEV; - } -@@ -1853,31 +1856,19 @@ static int __init amd_pstate_init(void) - cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; - } + static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) + { + struct amd_cpudata *cpudata = policy->driver_data; +- +- pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); ++ int min_perf; -- switch (cppc_state) { -- case AMD_PSTATE_DISABLE: -+ if (cppc_state == AMD_PSTATE_DISABLE) { - pr_info("driver load is disabled, boot with specific mode to enable this\n"); - return -ENODEV; -- case AMD_PSTATE_PASSIVE: -- case AMD_PSTATE_ACTIVE: -- case AMD_PSTATE_GUIDED: -- ret = amd_pstate_set_driver(cppc_state); -- if (ret) -- return ret; -- break; -- default: -- return -EINVAL; - } + if (cpudata->suspended) + return 0; - /* capability check */ - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - pr_debug("AMD CPPC MSR based functionality is supported\n"); -- if (cppc_state != AMD_PSTATE_ACTIVE) -- current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; - } else { - pr_debug("AMD CPPC shared memory based functionality is supported\n"); -- static_call_update(amd_pstate_enable, cppc_enable); -- static_call_update(amd_pstate_init_perf, cppc_init_perf); -- static_call_update(amd_pstate_update_perf, cppc_update_perf); -+ static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); -+ static_call_update(amd_pstate_init_perf, shmem_init_perf); -+ static_call_update(amd_pstate_update_perf, shmem_update_perf); +- if (cppc_state == AMD_PSTATE_ACTIVE) +- amd_pstate_epp_offline(policy); ++ min_perf = READ_ONCE(cpudata->lowest_perf); + +- return 0; ++ guard(mutex)(&amd_pstate_limits_lock); ++ ++ if (trace_amd_pstate_epp_perf_enabled()) { ++ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, ++ AMD_CPPC_EPP_BALANCE_POWERSAVE, ++ min_perf, min_perf, policy->boost_enabled); ++ } ++ ++ return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, ++ AMD_CPPC_EPP_BALANCE_POWERSAVE, false); + } + + static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +@@ -1711,12 +1687,10 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata->suspended) { +- mutex_lock(&amd_pstate_limits_lock); ++ guard(mutex)(&amd_pstate_limits_lock); + + /* enable amd pstate from suspend state*/ +- amd_pstate_epp_reenable(cpudata); +- +- mutex_unlock(&amd_pstate_limits_lock); ++ amd_pstate_epp_reenable(policy); + + cpudata->suspended = false; + } +@@ -1869,6 +1843,8 @@ static int __init amd_pstate_init(void) + static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); + static_call_update(amd_pstate_init_perf, shmem_init_perf); + static_call_update(amd_pstate_update_perf, shmem_update_perf); ++ static_call_update(amd_pstate_get_epp, shmem_get_epp); ++ static_call_update(amd_pstate_set_epp, shmem_set_epp); } if (amd_pstate_prefcore) { -@@ -1886,17 +1877,10 @@ static int __init amd_pstate_init(void) - return ret; - } +diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h +index cd573bc6b6db..9747e3be6cee 100644 +--- a/drivers/cpufreq/amd-pstate.h ++++ b/drivers/cpufreq/amd-pstate.h +@@ -57,7 +57,6 @@ struct amd_aperf_mperf { + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. +- * @epp_policy: Last saved policy used to set energy-performance preference + * @epp_cached: Cached CPPC energy-performance preference value + * @policy: Cpufreq policy value + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value +@@ -94,13 +93,11 @@ struct amd_cpudata { + bool hw_prefcore; -- /* enable amd pstate feature */ -- ret = amd_pstate_enable(true); -- if (ret) { -- pr_err("failed to enable driver mode(%d)\n", cppc_state); -- return ret; -- } -- -- ret = cpufreq_register_driver(current_pstate_driver); -+ ret = amd_pstate_register_driver(cppc_state); - if (ret) { - pr_err("failed to register with return %d\n", ret); -- goto disable_driver; -+ return ret; - } - - dev_root = bus_get_dev_root(&cpu_subsys); -@@ -1913,8 +1897,7 @@ static int __init amd_pstate_init(void) - - global_attr_free: - cpufreq_unregister_driver(current_pstate_driver); --disable_driver: -- amd_pstate_enable(false); -+ amd_pstate_cppc_enable(false); - return ret; - } - device_initcall(amd_pstate_init); -diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h -index dd4682857c12..23698d0f4bb4 100644 ---- a/tools/arch/x86/include/asm/cpufeatures.h -+++ b/tools/arch/x86/include/asm/cpufeatures.h -@@ -472,7 +472,7 @@ - #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ - #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ - #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ --#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ -+#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ + /* EPP feature related attributes*/ +- s16 epp_policy; + s16 epp_cached; + u32 policy; + u64 cppc_cap1_cached; + bool suspended; + s16 epp_default; +- bool boost_state; + }; /* - * BUG word(s) -- 2.48.0.rc1 -From ce805cd6b10a3c02064ce29d5368294478d5488b Mon Sep 17 00:00:00 2001 +From b74b9b0459100443f73ce718d0191bf58d6cb4b4 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 9 Jan 2025 16:36:26 +0100 -Subject: [PATCH 03/13] amd-tlb-broadcast +Date: Mon, 20 Jan 2025 13:21:35 +0100 +Subject: [PATCH 02/12] amd-tlb-broadcast Signed-off-by: Peter Jung --- - arch/x86/Kconfig | 2 +- - arch/x86/hyperv/mmu.c | 1 - - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/invlpgb.h | 93 ++++++ - arch/x86/include/asm/mmu.h | 6 + - arch/x86/include/asm/mmu_context.h | 12 + - arch/x86/include/asm/paravirt.h | 5 - - arch/x86/include/asm/paravirt_types.h | 2 - - arch/x86/include/asm/tlbbatch.h | 1 + - arch/x86/include/asm/tlbflush.h | 31 +- - arch/x86/kernel/cpu/amd.c | 16 ++ - arch/x86/kernel/kvm.c | 1 - - arch/x86/kernel/paravirt.c | 6 - - arch/x86/kernel/setup.c | 4 + - arch/x86/mm/pgtable.c | 16 +- - arch/x86/mm/tlb.c | 393 +++++++++++++++++++++++++- - arch/x86/xen/mmu_pv.c | 1 - - mm/memory.c | 1 - - mm/mmap.c | 2 - - mm/swap_state.c | 1 - - mm/vma.c | 2 - - 21 files changed, 541 insertions(+), 56 deletions(-) + arch/x86/Kconfig | 2 +- + arch/x86/Kconfig.cpu | 5 + + arch/x86/hyperv/mmu.c | 1 - + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/invlpgb.h | 103 +++++ + arch/x86/include/asm/mmu.h | 6 + + arch/x86/include/asm/mmu_context.h | 14 + + arch/x86/include/asm/msr-index.h | 2 + + arch/x86/include/asm/paravirt.h | 5 - + arch/x86/include/asm/paravirt_types.h | 2 - + arch/x86/include/asm/tlbbatch.h | 1 + + arch/x86/include/asm/tlbflush.h | 92 ++++- + arch/x86/kernel/cpu/amd.c | 12 + + arch/x86/kernel/kvm.c | 1 - + arch/x86/kernel/paravirt.c | 6 - + arch/x86/mm/pgtable.c | 16 +- + arch/x86/mm/tlb.c | 496 +++++++++++++++++++++++-- + arch/x86/xen/mmu_pv.c | 1 - + mm/memory.c | 1 - + mm/mmap.c | 2 - + mm/swap_state.c | 1 - + mm/vma.c | 2 - + tools/arch/x86/include/asm/msr-index.h | 2 + + 23 files changed, 695 insertions(+), 79 deletions(-) create mode 100644 arch/x86/include/asm/invlpgb.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 171be04eca1f..76f9e6d11872 100644 +index ef6cfea9df73..1f824dcab4dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -270,7 +270,7 @@ config X86 +@@ -273,7 +273,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -1227,6 +1049,29 @@ index 171be04eca1f..76f9e6d11872 100644 select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API +diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu +index 2a7279d80460..bacdc502903f 100644 +--- a/arch/x86/Kconfig.cpu ++++ b/arch/x86/Kconfig.cpu +@@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL + ++config X86_BROADCAST_TLB_FLUSH ++ def_bool y ++ depends on CPU_SUP_AMD ++ + menuconfig PROCESSOR_SELECT + bool "Supported processor vendors" if EXPERT + help +@@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32 + config CPU_SUP_AMD + default y + bool "Support AMD processors" if PROCESSOR_SELECT ++ select X86_BROADCAST_TLB_FLUSH + help + This enables detection, tunings and quirks for AMD processors + diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 1cc113200ff5..cbe6c71e17c1 100644 --- a/arch/x86/hyperv/mmu.c @@ -1238,27 +1083,28 @@ index 1cc113200ff5..cbe6c71e17c1 100644 - pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index a7c93191b7c6..19892faf43d5 100644 +index 645aa360628d..989e4c9cad2e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h -@@ -335,6 +335,7 @@ +@@ -338,6 +338,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ -+#define X86_FEATURE_INVLPGB (13*32+ 3) /* "invlpgb" INVLPGB instruction */ ++#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h new file mode 100644 -index 000000000000..2669ebfffe81 +index 000000000000..418402535319 --- /dev/null +++ b/arch/x86/include/asm/invlpgb.h -@@ -0,0 +1,93 @@ +@@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVLPGB +#define _ASM_X86_INVLPGB + ++#include +#include + +/* @@ -1270,21 +1116,31 @@ index 000000000000..2669ebfffe81 + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from + * this CPU have completed. + */ -+static inline void __invlpgb(unsigned long asid, unsigned long pcid, unsigned long addr, -+ int extra_count, bool pmd_stride, unsigned long flags) ++static inline void __invlpgb(unsigned long asid, unsigned long pcid, ++ unsigned long addr, u16 extra_count, ++ bool pmd_stride, unsigned long flags) +{ -+ u64 rax = addr | flags; -+ u32 ecx = (pmd_stride << 31) | extra_count; + u32 edx = (pcid << 16) | asid; ++ u32 ecx = (pmd_stride << 31) | extra_count; ++ u64 rax = addr | flags; + -+ asm volatile("invlpgb" : : "a" (rax), "c" (ecx), "d" (edx)); ++ /* INVLPGB; supported in binutils >= 2.36. */ ++ asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx)); ++} ++ ++/* Wait for INVLPGB originated by this CPU to complete. */ ++static inline void tlbsync(void) ++{ ++ cant_migrate(); ++ /* TLBSYNC: supported in binutils >= 0.36. */ ++ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory"); +} + +/* + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination + * of the three. For example: + * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID ++ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID + * + * The first can be used to invalidate (kernel) mappings at a particular + * address across all processes. @@ -1303,22 +1159,25 @@ index 000000000000..2669ebfffe81 + unsigned long addr) +{ + __invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA); ++ tlbsync(); +} + -+static inline void invlpgb_flush_user_nr(unsigned long pcid, unsigned long addr, -+ int nr, bool pmd_stride) ++static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, ++ unsigned long addr, ++ u16 nr, ++ bool pmd_stride, ++ bool freed_tables) +{ -+ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA | INVLPGB_FINAL_ONLY); -+} ++ unsigned long flags = INVLPGB_PCID | INVLPGB_VA; + -+/* Flush all mappings for a given ASID, not including globals. */ -+static inline void invlpgb_flush_single_asid(unsigned long asid) -+{ -+ __invlpgb(asid, 0, 0, 0, 0, INVLPGB_ASID); ++ if (!freed_tables) ++ flags |= INVLPGB_FINAL_ONLY; ++ ++ __invlpgb(0, pcid, addr, nr - 1, pmd_stride, flags); +} + +/* Flush all mappings for a given PCID, not including globals. */ -+static inline void invlpgb_flush_single_pcid(unsigned long pcid) ++static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) +{ + __invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID); +} @@ -1327,10 +1186,11 @@ index 000000000000..2669ebfffe81 +static inline void invlpgb_flush_all(void) +{ + __invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL); ++ tlbsync(); +} + +/* Flush addr, including globals, for all PCIDs. */ -+static inline void invlpgb_flush_addr(unsigned long addr, int nr) ++static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) +{ + __invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL); +} @@ -1339,69 +1199,86 @@ index 000000000000..2669ebfffe81 +static inline void invlpgb_flush_all_nonglobals(void) +{ + __invlpgb(0, 0, 0, 0, 0, 0); -+} -+ -+/* Wait for INVLPGB originated by this CPU to complete. */ -+static inline void tlbsync(void) -+{ -+ asm volatile("tlbsync"); ++ tlbsync(); +} + +#endif /* _ASM_X86_INVLPGB */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index ce4677b8b735..83d0986295d3 100644 +index ce4677b8b735..51f25d38de86 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h -@@ -46,6 +46,12 @@ typedef struct { - unsigned long flags; +@@ -67,6 +67,12 @@ typedef struct { + u16 pkey_allocation_map; + s16 execute_only_pkey; #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+ struct list_head broadcast_asid_list; -+ u16 broadcast_asid; ++ ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ u16 global_asid; + bool asid_transition; +#endif + - #ifdef CONFIG_ADDRESS_MASKING - /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */ - unsigned long lam_cr3_mask; + } mm_context_t; + + #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h -index 2886cb668d7f..2c347b51d9b9 100644 +index 2886cb668d7f..65f50464b5c3 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -139,6 +139,8 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm) #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); -+extern void destroy_context_free_broadcast_asid(struct mm_struct *mm); ++extern void destroy_context_free_global_asid(struct mm_struct *mm); + /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). -@@ -160,6 +162,13 @@ static inline int init_new_context(struct task_struct *tsk, +@@ -160,6 +162,14 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif + -+#ifdef CONFIG_CPU_SUP_AMD -+ INIT_LIST_HEAD(&mm->context.broadcast_asid_list); -+ mm->context.broadcast_asid = 0; -+ mm->context.asid_transition = false; ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { ++ mm->context.global_asid = 0; ++ mm->context.asid_transition = false; ++ } +#endif + mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; -@@ -169,6 +178,9 @@ static inline int init_new_context(struct task_struct *tsk, +@@ -169,6 +179,10 @@ static inline int init_new_context(struct task_struct *tsk, static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); -+#ifdef CONFIG_CPU_SUP_AMD -+ destroy_context_free_broadcast_asid(mm); ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ destroy_context_free_global_asid(mm); +#endif } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 3ae84c3b8e6d..dc1c1057f26e 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -25,6 +25,7 @@ + #define _EFER_SVME 12 /* Enable virtualization */ + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ ++#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ + #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ + + #define EFER_SCE (1<<_EFER_SCE) +@@ -34,6 +35,7 @@ + #define EFER_SVME (1<<_EFER_SVME) + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSR (1<<_EFER_FFXSR) ++#define EFER_TCE (1<<_EFER_TCE) + #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) + + /* diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d4eb9e1d61b8..794ba3647c6c 100644 --- a/arch/x86/include/asm/paravirt.h @@ -1444,7 +1321,7 @@ index 1ad56eb3e8a8..f9a17edf63ad 100644 #endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 69e79fff41b8..a2f9b7370717 100644 +index 69e79fff41b8..5490ca71e27f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,6 +10,7 @@ @@ -1455,39 +1332,100 @@ index 69e79fff41b8..a2f9b7370717 100644 #include #include #include -@@ -64,6 +65,23 @@ static inline void cr4_clear_bits(unsigned long mask) - */ - #define TLB_NR_DYN_ASIDS 6 +@@ -183,6 +184,13 @@ static inline void cr4_init_shadow(void) + extern unsigned long mmu_cr4_features; + extern u32 *trampoline_cr4_features; -+#ifdef CONFIG_CPU_SUP_AMD -+#define is_dyn_asid(asid) (asid) < TLB_NR_DYN_ASIDS -+#define is_broadcast_asid(asid) (asid) >= TLB_NR_DYN_ASIDS -+#define in_asid_transition(info) (info->mm && info->mm->context.asid_transition) -+#define mm_broadcast_asid(mm) (mm->context.broadcast_asid) ++/* How many pages can we invalidate with one INVLPGB. */ ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++extern u16 invlpgb_count_max; +#else -+#define is_dyn_asid(asid) true -+#define is_broadcast_asid(asid) false -+#define in_asid_transition(info) false -+#define mm_broadcast_asid(mm) 0 ++#define invlpgb_count_max 1 ++#endif + -+inline bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) + extern void initialize_tlbstate_and_flush(void); + + /* +@@ -230,6 +238,78 @@ void flush_tlb_one_kernel(unsigned long addr); + void flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH ++static inline bool is_dyn_asid(u16 asid) ++{ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return true; ++ ++ return asid < TLB_NR_DYN_ASIDS; ++} ++ ++static inline bool is_global_asid(u16 asid) ++{ ++ return !is_dyn_asid(asid); ++} ++ ++static inline bool in_asid_transition(const struct flush_tlb_info *info) ++{ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ return info->mm && READ_ONCE(info->mm->context.asid_transition); ++} ++ ++static inline u16 mm_global_asid(struct mm_struct *mm) ++{ ++ u16 asid; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return 0; ++ ++ asid = READ_ONCE(mm->context.global_asid); ++ ++ /* mm->context.global_asid is either 0, or a global ASID */ ++ VM_WARN_ON_ONCE(is_dyn_asid(asid)); ++ ++ return asid; ++} ++#else ++static inline bool is_dyn_asid(u16 asid) ++{ ++ return true; ++} ++ ++static inline bool is_global_asid(u16 asid) +{ + return false; +} ++ ++static inline bool in_asid_transition(const struct flush_tlb_info *info) ++{ ++ return false; ++} ++ ++static inline u16 mm_global_asid(struct mm_struct *mm) ++{ ++ return 0; ++} ++ ++static inline bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid) ++{ ++ return false; ++} ++ ++static inline void broadcast_tlb_flush(struct flush_tlb_info *info) ++{ ++ VM_WARN_ON_ONCE(1); ++} ++ ++static inline void consider_global_asid(struct mm_struct *mm) ++{ ++} +#endif + - struct tlb_context { - u64 ctx_id; - u64 tlb_gen; -@@ -182,6 +200,7 @@ static inline void cr4_init_shadow(void) - - extern unsigned long mmu_cr4_features; - extern u32 *trampoline_cr4_features; -+extern u16 invlpgb_count_max; - - extern void initialize_tlbstate_and_flush(void); - -@@ -277,21 +296,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) + #ifdef CONFIG_PARAVIRT + #include + #endif +@@ -277,21 +357,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } @@ -1513,28 +1451,38 @@ index 69e79fff41b8..a2f9b7370717 100644 static inline bool pte_flags_need_flush(unsigned long oldflags, unsigned long newflags, diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 79d2e17f6582..4dc42705aaca 100644 +index 79d2e17f6582..21076252a491 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c -@@ -1135,6 +1135,22 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) +@@ -29,6 +29,8 @@ + + #include "cpu.h" + ++u16 invlpgb_count_max __ro_after_init; ++ + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) + { + u32 gprs[8] = { 0 }; +@@ -1069,6 +1071,10 @@ static void init_amd(struct cpuinfo_x86 *c) + + /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ + clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); ++ ++ /* Enable Translation Cache Extension */ ++ if (cpu_feature_enabled(X86_FEATURE_TCE)) ++ msr_set_bit(MSR_EFER, _EFER_TCE); + } + + #ifdef CONFIG_X86_32 +@@ -1135,6 +1141,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) tlb_lli_2m[ENTRIES] = eax & mask; tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + -+ if (c->extended_cpuid_level < 0x80000008) -+ return; -+ -+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); -+ + /* Max number of pages INVLPGB can invalidate in one shot */ -+ invlpgb_count_max = (edx & 0xffff) + 1; -+ -+ /* If supported, enable translation cache extensions (TCE) */ -+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx); -+ if (ecx & BIT(17)) { -+ u64 msr = native_read_msr(MSR_EFER);; -+ msr |= BIT(15); -+ wrmsrl(MSR_EFER, msr); ++ if (boot_cpu_has(X86_FEATURE_INVLPGB)) { ++ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ invlpgb_count_max = (edx & 0xffff) + 1; + } } @@ -1575,21 +1523,6 @@ index fec381533555..c019771e0123 100644 .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index f1fea506e20f..6c4d08f8f7b1 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -138,6 +138,10 @@ __visible unsigned long mmu_cr4_features __ro_after_init; - __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; - #endif - -+#ifdef CONFIG_CPU_SUP_AMD -+u16 invlpgb_count_max __ro_after_init; -+#endif -+ - #ifdef CONFIG_IMA - static phys_addr_t ima_kexec_buffer_phys; - static size_t ima_kexec_buffer_size; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5745a354a241..3dc4af1f7868 100644 --- a/arch/x86/mm/pgtable.c @@ -1645,7 +1578,7 @@ index 5745a354a241..3dc4af1f7868 100644 #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index b0678d59ebdb..0ea9d1c077f6 100644 +index a2becb85bea7..6449ac701c88 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -74,13 +74,15 @@ @@ -1667,120 +1600,136 @@ index b0678d59ebdb..0ea9d1c077f6 100644 * for KPTI each mm has two address spaces and thus needs two * PCID values, but we can still do with a single ASID denomination * for each mm. Corresponds to kPCID + 2048. -@@ -225,6 +227,18 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, +@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + /* -+ * TLB consistency for this ASID is maintained with INVLPGB; -+ * TLB flushes happen even while the process isn't running. ++ * TLB consistency for global ASIDs is maintained with broadcast TLB ++ * flushing. The TLB is never outdated, and does not need flushing. + */ -+#ifdef CONFIG_CPU_SUP_AMD -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(next)) { -+ *new_asid = mm_broadcast_asid(next); -+ *need_flush = false; -+ return; ++ if (IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH) && static_cpu_has(X86_FEATURE_INVLPGB)) { ++ u16 global_asid = mm_global_asid(next); ++ ++ if (global_asid) { ++ *new_asid = global_asid; ++ *need_flush = false; ++ return; ++ } + } -+#endif + if (this_cpu_read(cpu_tlbstate.invalidate_other)) clear_asid_other(); -@@ -251,6 +265,245 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, +@@ -251,6 +267,290 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } -+#ifdef CONFIG_CPU_SUP_AMD ++#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH +/* -+ * Logic for AMD INVLPGB support. ++ * Logic for broadcast TLB invalidation. + */ -+static DEFINE_RAW_SPINLOCK(broadcast_asid_lock); -+static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS; -+static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 }; -+static LIST_HEAD(broadcast_asid_list); -+static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; ++static DEFINE_RAW_SPINLOCK(global_asid_lock); ++static u16 last_global_asid = MAX_ASID_AVAILABLE; ++static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE) = { 0 }; ++static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE) = { 0 }; ++static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; + -+static void reset_broadcast_asid_space(void) ++static void reset_global_asid_space(void) +{ -+ mm_context_t *context; -+ -+ lockdep_assert_held(&broadcast_asid_lock); ++ lockdep_assert_held(&global_asid_lock); + + /* -+ * Flush once when we wrap around the ASID space, so we won't need -+ * to flush every time we allocate an ASID for boradcast flushing. ++ * A global TLB flush guarantees that any stale entries from ++ * previously freed global ASIDs get flushed from the TLB ++ * everywhere, making these global ASIDs safe to reuse. + */ + invlpgb_flush_all_nonglobals(); -+ tlbsync(); + + /* -+ * Leave the currently used broadcast ASIDs set in the bitmap, since -+ * those cannot be reused before the next wraparound and flush.. ++ * Clear all the previously freed global ASIDs from the ++ * broadcast_asid_used bitmap, now that the global TLB flush ++ * has made them actually available for re-use. + */ -+ bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE); -+ list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list) -+ __set_bit(context->broadcast_asid, broadcast_asid_used); ++ bitmap_andnot(global_asid_used, global_asid_used, ++ global_asid_freed, MAX_ASID_AVAILABLE); ++ bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE); + -+ last_broadcast_asid = TLB_NR_DYN_ASIDS; ++ /* ++ * ASIDs 0-TLB_NR_DYN_ASIDS are used for CPU-local ASID ++ * assignments, for tasks doing IPI based TLB shootdowns. ++ * Restart the search from the start of the global ASID space. ++ */ ++ last_global_asid = TLB_NR_DYN_ASIDS; +} + -+static u16 get_broadcast_asid(void) ++static u16 get_global_asid(void) +{ -+ lockdep_assert_held(&broadcast_asid_lock); ++ lockdep_assert_held(&global_asid_lock); + + do { -+ u16 start = last_broadcast_asid; -+ u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start); ++ u16 start = last_global_asid; ++ u16 asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, start); + + if (asid >= MAX_ASID_AVAILABLE) { -+ reset_broadcast_asid_space(); ++ reset_global_asid_space(); + continue; + } + -+ /* Try claiming this broadcast ASID. */ -+ if (!test_and_set_bit(asid, broadcast_asid_used)) { -+ last_broadcast_asid = asid; -+ return asid; -+ } ++ /* Claim this global ASID. */ ++ __set_bit(asid, global_asid_used); ++ last_global_asid = asid; ++ global_asid_available--; ++ return asid; + } while (1); +} + +/* -+ * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast ++ * Returns true if the mm is transitioning from a CPU-local ASID to a global + * (INVLPGB) ASID, or the other way around. + */ -+static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid) ++static bool needs_global_asid_reload(struct mm_struct *next, u16 prev_asid) +{ -+ u16 broadcast_asid = mm_broadcast_asid(next); ++ u16 global_asid = mm_global_asid(next); + -+ if (broadcast_asid && prev_asid != broadcast_asid) ++ if (global_asid && prev_asid != global_asid) + return true; + -+ if (!broadcast_asid && is_broadcast_asid(prev_asid)) ++ if (!global_asid && is_global_asid(prev_asid)) + return true; + + return false; +} + -+void destroy_context_free_broadcast_asid(struct mm_struct *mm) ++void destroy_context_free_global_asid(struct mm_struct *mm) +{ -+ if (!mm->context.broadcast_asid) ++ if (!mm->context.global_asid) + return; + -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); -+ mm->context.broadcast_asid = 0; -+ list_del(&mm->context.broadcast_asid_list); -+ broadcast_asid_available++; ++ guard(raw_spinlock_irqsave)(&global_asid_lock); ++ ++ /* The global ASID can be re-used only after flush at wrap-around. */ ++ __set_bit(mm->context.global_asid, global_asid_freed); ++ ++ mm->context.global_asid = 0; ++ global_asid_available++; +} + ++/* ++ * Check whether a process is currently active on more than "threshold" CPUs. ++ * This is a cheap estimation on whether or not it may make sense to assign ++ * a global ASID to this process, and use broadcast TLB invalidation. ++ */ +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) +{ + int count = 0; + int cpu; + ++ /* This quick check should eliminate most single threaded programs. */ + if (cpumask_weight(mm_cpumask(mm)) <= threshold) + return false; + ++ /* Slower check to make sure. */ + for_each_cpu(cpu, mm_cpumask(mm)) { + /* Skip the CPUs that aren't really running this process. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) @@ -1796,40 +1745,56 @@ index b0678d59ebdb..0ea9d1c077f6 100644 +} + +/* -+ * Assign a broadcast ASID to the current process, protecting against ++ * Assign a global ASID to the current process, protecting against + * races between multiple threads in the process. + */ -+static void use_broadcast_asid(struct mm_struct *mm) ++static void use_global_asid(struct mm_struct *mm) +{ -+ guard(raw_spinlock_irqsave)(&broadcast_asid_lock); ++ guard(raw_spinlock_irqsave)(&global_asid_lock); + + /* This process is already using broadcast TLB invalidation. */ -+ if (mm->context.broadcast_asid) ++ if (mm->context.global_asid) + return; + -+ mm->context.broadcast_asid = get_broadcast_asid(); -+ mm->context.asid_transition = true; -+ list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list); -+ broadcast_asid_available--; ++ /* The last global ASID was consumed while waiting for the lock. */ ++ if (!global_asid_available) ++ return; ++ ++ /* ++ * The transition from IPI TLB flushing, with a dynamic ASID, ++ * and broadcast TLB flushing, using a global ASID, uses memory ++ * ordering for synchronization. ++ * ++ * While the process has threads still using a dynamic ASID, ++ * TLB invalidation IPIs continue to get sent. ++ * ++ * This code sets asid_transition first, before assigning the ++ * global ASID. ++ * ++ * The TLB flush code will only verify the ASID transition ++ * after it has seen the new global ASID for the process. ++ */ ++ WRITE_ONCE(mm->context.asid_transition, true); ++ WRITE_ONCE(mm->context.global_asid, get_global_asid()); +} + +/* -+ * Figure out whether to assign a broadcast (global) ASID to a process. -+ * We vary the threshold by how empty or full broadcast ASID space is. ++ * Figure out whether to assign a global ASID to a process. ++ * We vary the threshold by how empty or full global ASID space is. + * 1/4 full: >= 4 active threads + * 1/2 full: >= 8 active threads + * 3/4 full: >= 16 active threads + * 7/8 full: >= 32 active threads + * etc + * -+ * This way we should never exhaust the broadcast ASID space, even on very ++ * This way we should never exhaust the global ASID space, even on very + * large systems, and the processes with the largest number of active + * threads should be able to use broadcast TLB invalidation. + */ +#define HALFFULL_THRESHOLD 8 -+static bool meets_broadcast_asid_threshold(struct mm_struct *mm) ++static bool meets_global_asid_threshold(struct mm_struct *mm) +{ -+ int avail = broadcast_asid_available; ++ int avail = global_asid_available; + int threshold = HALFFULL_THRESHOLD; + + if (!avail) @@ -1849,7 +1814,7 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + return mm_active_cpus_exceeds(mm, threshold); +} + -+static void count_tlb_flush(struct mm_struct *mm) ++static void consider_global_asid(struct mm_struct *mm) +{ + if (!static_cpu_has(X86_FEATURE_INVLPGB)) + return; @@ -1858,43 +1823,54 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + if ((current->pid & 0x1f) != (jiffies & 0x1f)) + return; + -+ if (meets_broadcast_asid_threshold(mm)) -+ use_broadcast_asid(mm); ++ if (meets_global_asid_threshold(mm)) ++ use_global_asid(mm); +} + +static void finish_asid_transition(struct flush_tlb_info *info) +{ + struct mm_struct *mm = info->mm; -+ int bc_asid = mm_broadcast_asid(mm); ++ int bc_asid = mm_global_asid(mm); + int cpu; + -+ if (!mm->context.asid_transition) ++ if (!READ_ONCE(mm->context.asid_transition)) + return; + + for_each_cpu(cpu, mm_cpumask(mm)) { ++ /* ++ * The remote CPU is context switching. Wait for that to ++ * finish, to catch the unlikely case of it switching to ++ * the target mm with an out of date ASID. ++ */ ++ while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING) ++ cpu_relax(); ++ + if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) + continue; + + /* -+ * If at least one CPU is not using the broadcast ASID yet, ++ * If at least one CPU is not using the global ASID yet, + * send a TLB flush IPI. The IPI should cause stragglers + * to transition soon. ++ * ++ * This can race with the CPU switching to another task; ++ * that results in a (harmless) extra IPI. + */ -+ if (per_cpu(cpu_tlbstate.loaded_mm_asid, cpu) != bc_asid) { ++ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) { + flush_tlb_multi(mm_cpumask(info->mm), info); + return; + } + } + -+ /* All the CPUs running this process are using the broadcast ASID. */ -+ mm->context.asid_transition = 0; ++ /* All the CPUs running this process are using the global ASID. */ ++ WRITE_ONCE(mm->context.asid_transition, false); +} + +static void broadcast_tlb_flush(struct flush_tlb_info *info) +{ + bool pmd = info->stride_shift == PMD_SHIFT; + unsigned long maxnr = invlpgb_count_max; -+ unsigned long asid = info->mm->context.broadcast_asid; ++ unsigned long asid = info->mm->context.global_asid; + unsigned long addr = info->start; + unsigned long nr; + @@ -1902,12 +1878,17 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + if (info->stride_shift > PMD_SHIFT) + maxnr = 1; + -+ if (info->end == TLB_FLUSH_ALL || info->freed_tables) { -+ invlpgb_flush_single_pcid(kern_pcid(asid)); ++ /* ++ * TLB flushes with INVLPGB are kicked off asynchronously. ++ * The inc_mm_tlb_gen() guarantees page table updates are done ++ * before these TLB flushes happen. ++ */ ++ if (info->end == TLB_FLUSH_ALL) { ++ invlpgb_flush_single_pcid_nosync(kern_pcid(asid)); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_single_pcid(user_pcid(asid)); -+ } else do { ++ invlpgb_flush_single_pcid_nosync(user_pcid(asid)); ++ } else for (; addr < info->end; addr += nr << info->stride_shift) { + /* + * Calculate how many pages can be flushed at once; if the + * remainder of the range is less than one page, flush one. @@ -1915,43 +1896,42 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + nr = min(maxnr, (info->end - addr) >> info->stride_shift); + nr = max(nr, 1); + -+ invlpgb_flush_user_nr(kern_pcid(asid), addr, nr, pmd); ++ invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd, info->freed_tables); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), addr, nr, pmd); -+ addr += nr << info->stride_shift; -+ } while (addr < info->end); ++ invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd, info->freed_tables); ++ } + + finish_asid_transition(info); + + /* Wait for the INVLPGBs kicked off above to finish. */ + tlbsync(); +} -+#endif /* CONFIG_CPU_SUP_AMD */ ++#endif /* CONFIG_X86_BROADCAST_TLB_FLUSH */ + /* * Given an ASID, flush the corresponding user ASID. We can delay this * until the next time we switch to it. -@@ -556,8 +809,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, +@@ -556,8 +856,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, */ if (prev == next) { /* Not actually switching mm's */ - VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != - next->context.ctx_id); -+ if (is_dyn_asid(prev_asid)) -+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -+ next->context.ctx_id); ++ VM_WARN_ON(is_dyn_asid(prev_asid) && ++ this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != ++ next->context.ctx_id); /* * If this races with another thread that enables lam, 'new_lam' -@@ -573,6 +827,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, +@@ -573,6 +874,23 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, !cpumask_test_cpu(cpu, mm_cpumask(next)))) cpumask_set_cpu(cpu, mm_cpumask(next)); + /* + * Check if the current mm is transitioning to a new ASID. + */ -+ if (needs_broadcast_asid_reload(next, prev_asid)) { ++ if (needs_global_asid_reload(next, prev_asid)) { + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); @@ -1962,24 +1942,44 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + * Broadcast TLB invalidation keeps this PCID up to date + * all the time. + */ -+ if (is_broadcast_asid(prev_asid)) ++ if (is_global_asid(prev_asid)) + return; + /* * If the CPU is not in lazy TLB mode, we are just switching * from one thread in a process to another thread in the same -@@ -629,8 +900,10 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, - barrier(); +@@ -606,6 +924,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + */ + cond_mitigation(tsk); + ++ /* ++ * Let nmi_uaccess_okay() and finish_asid_transition() ++ * know that we're changing CR3. ++ */ ++ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); ++ barrier(); ++ + /* + * Stop remote flushes for the previous mm. + * Skip kernel threads; we never send init_mm TLB flushing IPIs, +@@ -623,14 +948,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); +- +- /* Let nmi_uaccess_okay() know that we're changing CR3. */ +- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); +- barrier(); } +reload_tlb: new_lam = mm_lam_cr3_mask(next); if (need_flush) { -+ VM_BUG_ON(is_broadcast_asid(new_asid)); ++ VM_WARN_ON_ONCE(is_global_asid(new_asid)); this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, new_lam, true); -@@ -749,7 +1022,7 @@ static void flush_tlb_func(void *info) +@@ -749,7 +1072,7 @@ static void flush_tlb_func(void *info) const struct flush_tlb_info *f = info; struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -1988,24 +1988,24 @@ index b0678d59ebdb..0ea9d1c077f6 100644 bool local = smp_processor_id() == f->initiating_cpu; unsigned long nr_invalidate = 0; u64 mm_tlb_gen; -@@ -769,6 +1042,16 @@ static void flush_tlb_func(void *info) +@@ -769,6 +1092,16 @@ static void flush_tlb_func(void *info) if (unlikely(loaded_mm == &init_mm)) return; -+ /* Reload the ASID if transitioning into or out of a broadcast ASID */ -+ if (needs_broadcast_asid_reload(loaded_mm, loaded_mm_asid)) { ++ /* Reload the ASID if transitioning into or out of a global ASID */ ++ if (needs_global_asid_reload(loaded_mm, loaded_mm_asid)) { + switch_mm_irqs_off(NULL, loaded_mm, NULL); + loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + } + + /* Broadcast ASIDs are always kept up to date with INVLPGB. */ -+ if (is_broadcast_asid(loaded_mm_asid)) ++ if (is_global_asid(loaded_mm_asid)) + return; + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); -@@ -786,6 +1069,8 @@ static void flush_tlb_func(void *info) +@@ -786,6 +1119,8 @@ static void flush_tlb_func(void *info) return; } @@ -2014,32 +2014,7 @@ index b0678d59ebdb..0ea9d1c077f6 100644 if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && f->new_tlb_gen <= local_tlb_gen)) { /* -@@ -825,7 +1110,7 @@ static void flush_tlb_func(void *info) - * - * The only question is whether to do a full or partial flush. - * -- * We do a partial flush if requested and two extra conditions -+ * We do a partial flush if requested and three extra conditions - * are met: - * - * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that -@@ -852,10 +1137,14 @@ static void flush_tlb_func(void *info) - * date. By doing a full flush instead, we can increase - * local_tlb_gen all the way to mm_tlb_gen and we can probably - * avoid another flush in the very near future. -+ * -+ * 3. No page tables were freed. If page tables were freed, a full -+ * flush ensures intermediate translations in the TLB get flushed. - */ - if (f->end != TLB_FLUSH_ALL && - f->new_tlb_gen == local_tlb_gen + 1 && -- f->new_tlb_gen == mm_tlb_gen) { -+ f->new_tlb_gen == mm_tlb_gen && -+ !f->freed_tables) { - /* Partial flush */ - unsigned long addr = f->start; - -@@ -926,7 +1215,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, +@@ -926,7 +1261,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ @@ -2048,102 +2023,155 @@ index b0678d59ebdb..0ea9d1c077f6 100644 on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, -@@ -998,14 +1287,18 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, +@@ -981,6 +1316,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, + info->new_tlb_gen = new_tlb_gen; + info->initiating_cpu = smp_processor_id(); + ++ /* ++ * If the number of flushes is so large that a full flush ++ * would be faster, do a full flush. ++ */ ++ if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { ++ info->start = 0; ++ info->end = TLB_FLUSH_ALL; ++ } ++ + return info; + } + +@@ -998,17 +1342,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, bool freed_tables) { struct flush_tlb_info *info; -+ unsigned long threshold = tlb_single_page_flush_ceiling; ++ int cpu = get_cpu(); u64 new_tlb_gen; - int cpu; - -+ if (static_cpu_has(X86_FEATURE_INVLPGB)) -+ threshold *= invlpgb_count_max; -+ - cpu = get_cpu(); - - /* Should we flush just the requested range? */ - if ((end == TLB_FLUSH_ALL) || +- int cpu; +- +- cpu = get_cpu(); +- +- /* Should we flush just the requested range? */ +- if ((end == TLB_FLUSH_ALL) || - ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { -+ ((end - start) >> stride_shift) > threshold) { - start = 0; - end = TLB_FLUSH_ALL; - } -@@ -1021,8 +1314,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, +- start = 0; +- end = TLB_FLUSH_ALL; +- } + + /* This is also a barrier that synchronizes with switch_mm(). */ + new_tlb_gen = inc_mm_tlb_gen(mm); +@@ -1021,8 +1356,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { -+ if (IS_ENABLED(CONFIG_CPU_SUP_AMD) && mm_broadcast_asid(mm)) { ++ if (mm_global_asid(mm)) { + broadcast_tlb_flush(info); + } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { flush_tlb_multi(mm_cpumask(mm), info); -+ count_tlb_flush(mm); ++ consider_global_asid(mm); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); local_irq_disable(); -@@ -1045,9 +1341,41 @@ static void do_flush_tlb_all(void *info) +@@ -1036,6 +1374,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + } + + ++static bool broadcast_flush_tlb_all(void) ++{ ++ if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH)) ++ return false; ++ ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ guard(preempt)(); ++ invlpgb_flush_all(); ++ return true; ++} ++ + static void do_flush_tlb_all(void *info) + { + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); +@@ -1044,10 +1395,36 @@ static void do_flush_tlb_all(void *info) + void flush_tlb_all(void) { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ guard(preempt)(); -+ invlpgb_flush_all(); -+ tlbsync(); ++ if (broadcast_flush_tlb_all()) + return; -+ } + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } -+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) ++static bool broadcast_kernel_range_flush(struct flush_tlb_info *info) +{ + unsigned long addr; -+ unsigned long maxnr = invlpgb_count_max; -+ unsigned long threshold = tlb_single_page_flush_ceiling * maxnr; ++ unsigned long nr; + -+ /* -+ * TLBSYNC only waits for flushes originating on the same CPU. -+ * Disabling migration allows us to wait on all flushes. -+ */ -+ guard(preempt)(); ++ if (!IS_ENABLED(CONFIG_X86_BROADCAST_TLB_FLUSH)) ++ return false; + -+ if (end == TLB_FLUSH_ALL || -+ (end - start) > threshold << PAGE_SHIFT) { ++ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) ++ return false; ++ ++ if (info->end == TLB_FLUSH_ALL) { + invlpgb_flush_all(); -+ } else { -+ unsigned long nr; -+ for (addr = start; addr < end; addr += nr << PAGE_SHIFT) { -+ nr = min((end - addr) >> PAGE_SHIFT, maxnr); -+ invlpgb_flush_addr(addr, nr); -+ } ++ return true; + } + ++ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { ++ nr = min((info->end - addr) >> PAGE_SHIFT, invlpgb_count_max); ++ invlpgb_flush_addr_nosync(addr, nr); ++ } + tlbsync(); ++ return true; +} + static void do_kernel_range_flush(void *info) { struct flush_tlb_info *f = info; -@@ -1060,6 +1388,11 @@ static void do_kernel_range_flush(void *info) +@@ -1060,22 +1437,21 @@ static void do_kernel_range_flush(void *info) void flush_tlb_kernel_range(unsigned long start, unsigned long end) { -+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { -+ broadcast_kernel_range_flush(start, end); -+ return; -+ } +- /* Balance as user space task's flush, a bit conservative */ +- if (end == TLB_FLUSH_ALL || +- (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { +- on_each_cpu(do_flush_tlb_all, NULL, 1); +- } else { +- struct flush_tlb_info *info; ++ struct flush_tlb_info *info; + +- preempt_disable(); +- info = get_flush_tlb_info(NULL, start, end, 0, false, +- TLB_GENERATION_INVALID); ++ guard(preempt)(); + ++ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, ++ TLB_GENERATION_INVALID); + - /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { -@@ -1244,7 +1577,6 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all); - void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) - { - struct flush_tlb_info *info; -- ++ if (broadcast_kernel_range_flush(info)) ++ ; /* Fall through. */ ++ else if (info->end == TLB_FLUSH_ALL) ++ on_each_cpu(do_flush_tlb_all, NULL, 1); ++ else + on_each_cpu(do_kernel_range_flush, info, 1); + +- put_flush_tlb_info(); +- preempt_enable(); +- } ++ put_flush_tlb_info(); + } + + /* +@@ -1247,7 +1623,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) + int cpu = get_cpu(); - info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, -@@ -1263,12 +1595,49 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, ++ info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false, + TLB_GENERATION_INVALID); + /* + * flush_tlb_multi() is not optimized for the common case in which only +@@ -1263,12 +1639,62 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) local_irq_enable(); } @@ -2168,8 +2196,8 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + struct mm_struct *mm, + unsigned long uaddr) +{ -+ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_broadcast_asid(mm)) { -+ u16 asid = mm_broadcast_asid(mm); ++ if (static_cpu_has(X86_FEATURE_INVLPGB) && mm_global_asid(mm)) { ++ u16 asid = mm_global_asid(mm); + /* + * Queue up an asynchronous invalidation. The corresponding + * TLBSYNC is done in arch_tlbbatch_flush(), and must be done @@ -2179,11 +2207,24 @@ index b0678d59ebdb..0ea9d1c077f6 100644 + batch->used_invlpgb = true; + migrate_disable(); + } -+ invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0); ++ invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false, false); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (static_cpu_has(X86_FEATURE_PTI)) -+ invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0); ++ invlpgb_flush_user_nr_nosync(user_pcid(asid), uaddr, 1, false, false); ++ ++ /* ++ * Some CPUs might still be using a local ASID for this ++ * process, and require IPIs, while others are using the ++ * global ASID. ++ * ++ * In this corner case we need to do both the broadcast ++ * TLB invalidation, and send IPIs. The IPIs will help ++ * stragglers transition to the broadcast ASID. ++ */ ++ if (READ_ONCE(mm->context.asid_transition)) ++ goto also_send_ipi; + } else { ++also_send_ipi: + inc_mm_tlb_gen(mm); + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + } @@ -2206,10 +2247,10 @@ index 55a4996d0c04..041e17282af0 100644 .pgd_alloc = xen_pgd_alloc, .pgd_free = xen_pgd_free, diff --git a/mm/memory.c b/mm/memory.c -index d322ddfe6791..d9ecd1ad789f 100644 +index 398c031be9ba..3d98aaf9b939 100644 --- a/mm/memory.c +++ b/mm/memory.c -@@ -1921,7 +1921,6 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +@@ -1935,7 +1935,6 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, struct mmu_notifier_range range; struct mmu_gather tlb; @@ -2218,10 +2259,10 @@ index d322ddfe6791..d9ecd1ad789f 100644 address, end); hugetlb_zap_begin(vma, &range.start, &range.end); diff --git a/mm/mmap.c b/mm/mmap.c -index 6183805f6f9e..c72cbe087a27 100644 +index aec208f90337..d628b7900d2d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c -@@ -1931,7 +1931,6 @@ void exit_mmap(struct mm_struct *mm) +@@ -1664,7 +1664,6 @@ void exit_mmap(struct mm_struct *mm) goto destroy; } @@ -2229,7 +2270,7 @@ index 6183805f6f9e..c72cbe087a27 100644 flush_cache_mm(mm); tlb_gather_mmu_fullmm(&tlb, mm); /* update_hiwater_rss(mm) here? but nobody should be looking */ -@@ -2374,7 +2373,6 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) +@@ -2107,7 +2106,6 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) vma, new_start, length, false, true)) return -ENOMEM; @@ -2238,7 +2279,7 @@ index 6183805f6f9e..c72cbe087a27 100644 next = vma_next(&vmi); if (new_end > old_start) { diff --git a/mm/swap_state.c b/mm/swap_state.c -index 4669f29cf555..ffbdfc8a46ef 100644 +index e0c0321b8ff7..ca42b2be64d9 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -317,7 +317,6 @@ void free_pages_and_swap_cache(struct encoded_page **pages, int nr) @@ -2250,10 +2291,10 @@ index 4669f29cf555..ffbdfc8a46ef 100644 for (int i = 0; i < nr; i++) { struct folio *folio = page_folio(encoded_page_ptr(pages[i])); diff --git a/mm/vma.c b/mm/vma.c -index 7621384d64cf..c7461e21ef70 100644 +index bb2119e5a0d0..a593d5edfd88 100644 --- a/mm/vma.c +++ b/mm/vma.c -@@ -347,7 +347,6 @@ void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, +@@ -398,7 +398,6 @@ void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; @@ -2261,7 +2302,7 @@ index 7621384d64cf..c7461e21ef70 100644 tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end, -@@ -1089,7 +1088,6 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms, +@@ -1130,7 +1129,6 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms, * were isolated before we downgraded mmap_lock. */ mas_set(mas_detach, 1); @@ -2269,759 +2310,33 @@ index 7621384d64cf..c7461e21ef70 100644 tlb_gather_mmu(&tlb, vms->vma->vm_mm); update_hiwater_rss(vms->vma->vm_mm); unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, --- -2.48.0.rc1 - -From dff70be32d2c190e19590381ce5ec62ab9100a15 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Thu, 9 Jan 2025 16:36:39 +0100 -Subject: [PATCH 04/13] autofdo - -Signed-off-by: Peter Jung ---- - Documentation/dev-tools/autofdo.rst | 168 ++++++++++++++++++++++++++ - Documentation/dev-tools/index.rst | 2 + - Documentation/dev-tools/propeller.rst | 162 +++++++++++++++++++++++++ - MAINTAINERS | 14 +++ - Makefile | 2 + - arch/Kconfig | 39 ++++++ - arch/sparc/kernel/vmlinux.lds.S | 5 + - arch/x86/Kconfig | 2 + - arch/x86/kernel/vmlinux.lds.S | 4 + - include/asm-generic/vmlinux.lds.h | 49 ++++++-- - scripts/Makefile.autofdo | 24 ++++ - scripts/Makefile.lib | 20 +++ - scripts/Makefile.propeller | 28 +++++ - tools/objtool/check.c | 2 + - tools/objtool/elf.c | 15 ++- - 15 files changed, 520 insertions(+), 16 deletions(-) - create mode 100644 Documentation/dev-tools/autofdo.rst - create mode 100644 Documentation/dev-tools/propeller.rst - create mode 100644 scripts/Makefile.autofdo - create mode 100644 scripts/Makefile.propeller - -diff --git a/Documentation/dev-tools/autofdo.rst b/Documentation/dev-tools/autofdo.rst -new file mode 100644 -index 000000000000..1f0a451e9ccd ---- /dev/null -+++ b/Documentation/dev-tools/autofdo.rst -@@ -0,0 +1,168 @@ -+.. SPDX-License-Identifier: GPL-2.0 -+ -+=================================== -+Using AutoFDO with the Linux kernel -+=================================== -+ -+This enables AutoFDO build support for the kernel when using -+the Clang compiler. AutoFDO (Auto-Feedback-Directed Optimization) -+is a type of profile-guided optimization (PGO) used to enhance the -+performance of binary executables. It gathers information about the -+frequency of execution of various code paths within a binary using -+hardware sampling. This data is then used to guide the compiler's -+optimization decisions, resulting in a more efficient binary. AutoFDO -+is a powerful optimization technique, and data indicates that it can -+significantly improve kernel performance. It's especially beneficial -+for workloads affected by front-end stalls. -+ -+For AutoFDO builds, unlike non-FDO builds, the user must supply a -+profile. Acquiring an AutoFDO profile can be done in several ways. -+AutoFDO profiles are created by converting hardware sampling using -+the "perf" tool. It is crucial that the workload used to create these -+perf files is representative; they must exhibit runtime -+characteristics similar to the workloads that are intended to be -+optimized. Failure to do so will result in the compiler optimizing -+for the wrong objective. -+ -+The AutoFDO profile often encapsulates the program's behavior. If the -+performance-critical codes are architecture-independent, the profile -+can be applied across platforms to achieve performance gains. For -+instance, using the profile generated on Intel architecture to build -+a kernel for AMD architecture can also yield performance improvements. -+ -+There are two methods for acquiring a representative profile: -+(1) Sample real workloads using a production environment. -+(2) Generate the profile using a representative load test. -+When enabling the AutoFDO build configuration without providing an -+AutoFDO profile, the compiler only modifies the dwarf information in -+the kernel without impacting runtime performance. It's advisable to -+use a kernel binary built with the same AutoFDO configuration to -+collect the perf profile. While it's possible to use a kernel built -+with different options, it may result in inferior performance. -+ -+One can collect profiles using AutoFDO build for the previous kernel. -+AutoFDO employs relative line numbers to match the profiles, offering -+some tolerance for source changes. This mode is commonly used in a -+production environment for profile collection. -+ -+In a profile collection based on a load test, the AutoFDO collection -+process consists of the following steps: -+ -+#. Initial build: The kernel is built with AutoFDO options -+ without a profile. -+ -+#. Profiling: The above kernel is then run with a representative -+ workload to gather execution frequency data. This data is -+ collected using hardware sampling, via perf. AutoFDO is most -+ effective on platforms supporting advanced PMU features like -+ LBR on Intel machines. -+ -+#. AutoFDO profile generation: Perf output file is converted to -+ the AutoFDO profile via offline tools. -+ -+The support requires a Clang compiler LLVM 17 or later. -+ -+Preparation -+=========== -+ -+Configure the kernel with:: -+ -+ CONFIG_AUTOFDO_CLANG=y -+ -+Customization -+============= -+ -+The default CONFIG_AUTOFDO_CLANG setting covers kernel space objects for -+AutoFDO builds. One can, however, enable or disable AutoFDO build for -+individual files and directories by adding a line similar to the following -+to the respective kernel Makefile: -+ -+- For enabling a single file (e.g. foo.o) :: -+ -+ AUTOFDO_PROFILE_foo.o := y -+ -+- For enabling all files in one directory :: -+ -+ AUTOFDO_PROFILE := y -+ -+- For disabling one file :: -+ -+ AUTOFDO_PROFILE_foo.o := n -+ -+- For disabling all files in one directory :: -+ -+ AUTOFDO_PROFILE := n -+ -+Workflow -+======== -+ -+Here is an example workflow for AutoFDO kernel: -+ -+1) Build the kernel on the host machine with LLVM enabled, -+ for example, :: -+ -+ $ make menuconfig LLVM=1 -+ -+ Turn on AutoFDO build config:: -+ -+ CONFIG_AUTOFDO_CLANG=y -+ -+ With a configuration that with LLVM enabled, use the following command:: -+ -+ $ scripts/config -e AUTOFDO_CLANG -+ -+ After getting the config, build with :: -+ -+ $ make LLVM=1 -+ -+2) Install the kernel on the test machine. -+ -+3) Run the load tests. The '-c' option in perf specifies the sample -+ event period. We suggest using a suitable prime number, like 500009, -+ for this purpose. -+ -+ - For Intel platforms:: -+ -+ $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c -o -- -+ -+ - For AMD platforms: -+ -+ The supported systems are: Zen3 with BRS, or Zen4 with amd_lbr_v2. To check, -+ -+ For Zen3:: -+ -+ $ cat proc/cpuinfo | grep " brs" -+ -+ For Zen4:: -+ -+ $ cat proc/cpuinfo | grep amd_lbr_v2 -+ -+ The following command generated the perf data file:: -+ -+ $ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c -o -- -+ -+4) (Optional) Download the raw perf file to the host machine. -+ -+5) To generate an AutoFDO profile, two offline tools are available: -+ create_llvm_prof and llvm_profgen. The create_llvm_prof tool is part -+ of the AutoFDO project and can be found on GitHub -+ (https://github.com/google/autofdo), version v0.30.1 or later. -+ The llvm_profgen tool is included in the LLVM compiler itself. It's -+ important to note that the version of llvm_profgen doesn't need to match -+ the version of Clang. It needs to be the LLVM 19 release of Clang -+ or later, or just from the LLVM trunk. :: -+ -+ $ llvm-profgen --kernel --binary= --perfdata= -o -+ -+ or :: -+ -+ $ create_llvm_prof --binary= --profile= --format=extbinary --out= -+ -+ Note that multiple AutoFDO profile files can be merged into one via:: -+ -+ $ llvm-profdata merge -o ... -+ -+6) Rebuild the kernel using the AutoFDO profile file with the same config as step 1, -+ (Note CONFIG_AUTOFDO_CLANG needs to be enabled):: -+ -+ $ make LLVM=1 CLANG_AUTOFDO_PROFILE= -diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst -index 53d4d124f9c5..3c0ac08b2709 100644 ---- a/Documentation/dev-tools/index.rst -+++ b/Documentation/dev-tools/index.rst -@@ -34,6 +34,8 @@ Documentation/dev-tools/testing-overview.rst - ktap - checkuapi - gpio-sloppy-logic-analyzer -+ autofdo -+ propeller +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h +index 3ae84c3b8e6d..dc1c1057f26e 100644 +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -25,6 +25,7 @@ + #define _EFER_SVME 12 /* Enable virtualization */ + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ ++#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ + #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ + #define EFER_SCE (1<<_EFER_SCE) +@@ -34,6 +35,7 @@ + #define EFER_SVME (1<<_EFER_SVME) + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSR (1<<_EFER_FFXSR) ++#define EFER_TCE (1<<_EFER_TCE) + #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) - .. only:: subproject and html -diff --git a/Documentation/dev-tools/propeller.rst b/Documentation/dev-tools/propeller.rst -new file mode 100644 -index 000000000000..92195958e3db ---- /dev/null -+++ b/Documentation/dev-tools/propeller.rst -@@ -0,0 +1,162 @@ -+.. SPDX-License-Identifier: GPL-2.0 -+ -+===================================== -+Using Propeller with the Linux kernel -+===================================== -+ -+This enables Propeller build support for the kernel when using Clang -+compiler. Propeller is a profile-guided optimization (PGO) method used -+to optimize binary executables. Like AutoFDO, it utilizes hardware -+sampling to gather information about the frequency of execution of -+different code paths within a binary. Unlike AutoFDO, this information -+is then used right before linking phase to optimize (among others) -+block layout within and across functions. -+ -+A few important notes about adopting Propeller optimization: -+ -+#. Although it can be used as a standalone optimization step, it is -+ strongly recommended to apply Propeller on top of AutoFDO, -+ AutoFDO+ThinLTO or Instrument FDO. The rest of this document -+ assumes this paradigm. -+ -+#. Propeller uses another round of profiling on top of -+ AutoFDO/AutoFDO+ThinLTO/iFDO. The whole build process involves -+ "build-afdo - train-afdo - build-propeller - train-propeller - -+ build-optimized". -+ -+#. Propeller requires LLVM 19 release or later for Clang/Clang++ -+ and the linker(ld.lld). -+ -+#. In addition to LLVM toolchain, Propeller requires a profiling -+ conversion tool: https://github.com/google/autofdo with a release -+ after v0.30.1: https://github.com/google/autofdo/releases/tag/v0.30.1. -+ -+The Propeller optimization process involves the following steps: -+ -+#. Initial building: Build the AutoFDO or AutoFDO+ThinLTO binary as -+ you would normally do, but with a set of compile-time / link-time -+ flags, so that a special metadata section is created within the -+ kernel binary. The special section is only intend to be used by the -+ profiling tool, it is not part of the runtime image, nor does it -+ change kernel run time text sections. -+ -+#. Profiling: The above kernel is then run with a representative -+ workload to gather execution frequency data. This data is collected -+ using hardware sampling, via perf. Propeller is most effective on -+ platforms supporting advanced PMU features like LBR on Intel -+ machines. This step is the same as profiling the kernel for AutoFDO -+ (the exact perf parameters can be different). -+ -+#. Propeller profile generation: Perf output file is converted to a -+ pair of Propeller profiles via an offline tool. -+ -+#. Optimized build: Build the AutoFDO or AutoFDO+ThinLTO optimized -+ binary as you would normally do, but with a compile-time / -+ link-time flag to pick up the Propeller compile time and link time -+ profiles. This build step uses 3 profiles - the AutoFDO profile, -+ the Propeller compile-time profile and the Propeller link-time -+ profile. -+ -+#. Deployment: The optimized kernel binary is deployed and used -+ in production environments, providing improved performance -+ and reduced latency. -+ -+Preparation -+=========== -+ -+Configure the kernel with:: -+ -+ CONFIG_AUTOFDO_CLANG=y -+ CONFIG_PROPELLER_CLANG=y -+ -+Customization -+============= -+ -+The default CONFIG_PROPELLER_CLANG setting covers kernel space objects -+for Propeller builds. One can, however, enable or disable Propeller build -+for individual files and directories by adding a line similar to the -+following to the respective kernel Makefile: -+ -+- For enabling a single file (e.g. foo.o):: -+ -+ PROPELLER_PROFILE_foo.o := y -+ -+- For enabling all files in one directory:: -+ -+ PROPELLER_PROFILE := y -+ -+- For disabling one file:: -+ -+ PROPELLER_PROFILE_foo.o := n -+ -+- For disabling all files in one directory:: -+ -+ PROPELLER__PROFILE := n -+ -+ -+Workflow -+======== -+ -+Here is an example workflow for building an AutoFDO+Propeller kernel: -+ -+1) Assuming an AutoFDO profile is already collected following -+ instructions in the AutoFDO document, build the kernel on the host -+ machine, with AutoFDO and Propeller build configs :: -+ -+ CONFIG_AUTOFDO_CLANG=y -+ CONFIG_PROPELLER_CLANG=y -+ -+ and :: -+ -+ $ make LLVM=1 CLANG_AUTOFDO_PROFILE= -+ -+2) Install the kernel on the test machine. -+ -+3) Run the load tests. The '-c' option in perf specifies the sample -+ event period. We suggest using a suitable prime number, like 500009, -+ for this purpose. -+ -+ - For Intel platforms:: -+ -+ $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c -o -- -+ -+ - For AMD platforms:: -+ -+ $ perf record --pfm-event RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c -o -- -+ -+ Note you can repeat the above steps to collect multiple s. -+ -+4) (Optional) Download the raw perf file(s) to the host machine. -+ -+5) Use the create_llvm_prof tool (https://github.com/google/autofdo) to -+ generate Propeller profile. :: -+ -+ $ create_llvm_prof --binary= --profile= -+ --format=propeller --propeller_output_module_name -+ --out=_cc_profile.txt -+ --propeller_symorder=_ld_profile.txt -+ -+ "" can be something like "/home/user/dir/any_string". -+ -+ This command generates a pair of Propeller profiles: -+ "_cc_profile.txt" and -+ "_ld_profile.txt". -+ -+ If there are more than 1 perf_file collected in the previous step, -+ you can create a temp list file "" with each line -+ containing one perf file name and run:: -+ -+ $ create_llvm_prof --binary= --profile=@ -+ --format=propeller --propeller_output_module_name -+ --out=_cc_profile.txt -+ --propeller_symorder=_ld_profile.txt -+ -+6) Rebuild the kernel using the AutoFDO and Propeller -+ profiles. :: -+ -+ CONFIG_AUTOFDO_CLANG=y -+ CONFIG_PROPELLER_CLANG=y -+ -+ and :: -+ -+ $ make LLVM=1 CLANG_AUTOFDO_PROFILE= CLANG_PROPELLER_PROFILE_PREFIX= -diff --git a/MAINTAINERS b/MAINTAINERS -index a578178468f1..a2d251917629 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3674,6 +3674,13 @@ F: kernel/audit* - F: lib/*audit.c - K: \baudit_[a-z_0-9]\+\b - -+AUTOFDO BUILD -+M: Rong Xu -+M: Han Shen -+S: Supported -+F: Documentation/dev-tools/autofdo.rst -+F: scripts/Makefile.autofdo -+ - AUXILIARY BUS DRIVER - M: Greg Kroah-Hartman - R: Dave Ertman -@@ -18505,6 +18512,13 @@ S: Maintained - F: include/linux/psi* - F: kernel/sched/psi.c - -+PROPELLER BUILD -+M: Rong Xu -+M: Han Shen -+S: Supported -+F: Documentation/dev-tools/propeller.rst -+F: scripts/Makefile.propeller -+ - PRINTK - M: Petr Mladek - R: Steven Rostedt -diff --git a/Makefile b/Makefile -index 80151f53d8ee..61c3a727f369 100644 ---- a/Makefile -+++ b/Makefile -@@ -1019,6 +1019,8 @@ include-$(CONFIG_KMSAN) += scripts/Makefile.kmsan - include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan - include-$(CONFIG_KCOV) += scripts/Makefile.kcov - include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct -+include-$(CONFIG_AUTOFDO_CLANG) += scripts/Makefile.autofdo -+include-$(CONFIG_PROPELLER_CLANG) += scripts/Makefile.propeller - include-$(CONFIG_GCC_PLUGINS) += scripts/Makefile.gcc-plugins - - include $(addprefix $(srctree)/, $(include-y)) -diff --git a/arch/Kconfig b/arch/Kconfig -index bd9f095d69fa..00551f340dbe 100644 ---- a/arch/Kconfig -+++ b/arch/Kconfig -@@ -811,6 +811,45 @@ config LTO_CLANG_THIN - If unsure, say Y. - endchoice - -+config ARCH_SUPPORTS_AUTOFDO_CLANG -+ bool -+ -+config AUTOFDO_CLANG -+ bool "Enable Clang's AutoFDO build (EXPERIMENTAL)" -+ depends on ARCH_SUPPORTS_AUTOFDO_CLANG -+ depends on CC_IS_CLANG && CLANG_VERSION >= 170000 -+ help -+ This option enables Clang’s AutoFDO build. When -+ an AutoFDO profile is specified in variable -+ CLANG_AUTOFDO_PROFILE during the build process, -+ Clang uses the profile to optimize the kernel. -+ -+ If no profile is specified, AutoFDO options are -+ still passed to Clang to facilitate the collection -+ of perf data for creating an AutoFDO profile in -+ subsequent builds. -+ -+ If unsure, say N. -+ -+config ARCH_SUPPORTS_PROPELLER_CLANG -+ bool -+ -+config PROPELLER_CLANG -+ bool "Enable Clang's Propeller build" -+ depends on ARCH_SUPPORTS_PROPELLER_CLANG -+ depends on CC_IS_CLANG && CLANG_VERSION >= 190000 -+ help -+ This option enables Clang’s Propeller build. When the Propeller -+ profiles is specified in variable CLANG_PROPELLER_PROFILE_PREFIX -+ during the build process, Clang uses the profiles to optimize -+ the kernel. -+ -+ If no profile is specified, Propeller options are still passed -+ to Clang to facilitate the collection of perf data for creating -+ the Propeller profiles in subsequent builds. -+ -+ If unsure, say N. -+ - config ARCH_SUPPORTS_CFI_CLANG - bool - help -diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S -index d317a843f7ea..f1b86eb30340 100644 ---- a/arch/sparc/kernel/vmlinux.lds.S -+++ b/arch/sparc/kernel/vmlinux.lds.S -@@ -48,6 +48,11 @@ SECTIONS - { - _text = .; - HEAD_TEXT -+ ALIGN_FUNCTION(); -+#ifdef CONFIG_SPARC64 -+ /* Match text section symbols in head_64.S first */ -+ *head_64.o(.text) -+#endif - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 76f9e6d11872..512b148b011a 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -126,6 +126,8 @@ config X86 - select ARCH_SUPPORTS_LTO_CLANG - select ARCH_SUPPORTS_LTO_CLANG_THIN - select ARCH_SUPPORTS_RT -+ select ARCH_SUPPORTS_AUTOFDO_CLANG -+ select ARCH_SUPPORTS_PROPELLER_CLANG if X86_64 - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF if X86_CMPXCHG64 - select ARCH_USE_MEMTEST -diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S -index feb8102a9ca7..bc497a67d363 100644 ---- a/arch/x86/kernel/vmlinux.lds.S -+++ b/arch/x86/kernel/vmlinux.lds.S -@@ -443,6 +443,10 @@ SECTIONS - - STABS_DEBUG - DWARF_DEBUG -+#ifdef CONFIG_PROPELLER_CLANG -+ .llvm_bb_addr_map : { *(.llvm_bb_addr_map) } -+#endif -+ - ELF_DETAILS - - DISCARDS -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index fa284b64b2de..54504013c749 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -95,18 +95,25 @@ - * With LTO_CLANG, the linker also splits sections by default, so we need - * these macros to combine the sections during the final link. - * -+ * With AUTOFDO_CLANG and PROPELLER_CLANG, by default, the linker splits -+ * text sections and regroups functions into subsections. -+ * - * RODATA_MAIN is not used because existing code already defines .rodata.x - * sections to be brought in with rodata. - */ --#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) -+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ -+defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) - #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -+#else -+#define TEXT_MAIN .text -+#endif -+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) - #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* - #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* - #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* - #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* - #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* - #else --#define TEXT_MAIN .text - #define DATA_MAIN .data - #define SDATA_MAIN .sdata - #define RODATA_MAIN .rodata -@@ -549,24 +556,44 @@ - __cpuidle_text_end = .; \ - __noinstr_text_end = .; - -+#define TEXT_SPLIT \ -+ __split_text_start = .; \ -+ *(.text.split .text.split.[0-9a-zA-Z_]*) \ -+ __split_text_end = .; -+ -+#define TEXT_UNLIKELY \ -+ __unlikely_text_start = .; \ -+ *(.text.unlikely .text.unlikely.*) \ -+ __unlikely_text_end = .; -+ -+#define TEXT_HOT \ -+ __hot_text_start = .; \ -+ *(.text.hot .text.hot.*) \ -+ __hot_text_end = .; -+ /* - * .text section. Map to function alignment to avoid address changes - * during second ld run in second ld pass when generating System.map - * -- * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead -- * code elimination is enabled, so these sections should be converted -- * to use ".." first. -+ * TEXT_MAIN here will match symbols with a fixed pattern (for example, -+ * .text.hot or .text.unlikely) if dead code elimination or -+ * function-section is enabled. Match these symbols first before -+ * TEXT_MAIN to ensure they are grouped together. -+ * -+ * Also placing .text.hot section at the beginning of a page, this -+ * would help the TLB performance. - */ - #define TEXT_TEXT \ - ALIGN_FUNCTION(); \ -- *(.text.hot .text.hot.*) \ -- *(TEXT_MAIN .text.fixup) \ -- *(.text.unlikely .text.unlikely.*) \ -+ *(.text.asan.* .text.tsan.*) \ - *(.text.unknown .text.unknown.*) \ -+ TEXT_SPLIT \ -+ TEXT_UNLIKELY \ -+ . = ALIGN(PAGE_SIZE); \ -+ TEXT_HOT \ -+ *(TEXT_MAIN .text.fixup) \ - NOINSTR_TEXT \ -- *(.ref.text) \ -- *(.text.asan.* .text.tsan.*) -- -+ *(.ref.text) - - /* sched.text is aling to function alignment to secure we have same - * address even at second ld pass when generating System.map */ -diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo -new file mode 100644 -index 000000000000..1caf2457e585 ---- /dev/null -+++ b/scripts/Makefile.autofdo -@@ -0,0 +1,24 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+# Enable available and selected Clang AutoFDO features. -+ -+CFLAGS_AUTOFDO_CLANG := -fdebug-info-for-profiling -mllvm -enable-fs-discriminator=true -mllvm -improved-fs-discriminator=true -+ -+ifndef CONFIG_DEBUG_INFO -+ CFLAGS_AUTOFDO_CLANG += -gmlt -+endif -+ -+ifdef CLANG_AUTOFDO_PROFILE -+ CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections -+ CFLAGS_AUTOFDO_CLANG += -fsplit-machine-functions -+endif -+ -+ifdef CONFIG_LTO_CLANG_THIN -+ ifdef CLANG_AUTOFDO_PROFILE -+ KBUILD_LDFLAGS += --lto-sample-profile=$(CLANG_AUTOFDO_PROFILE) -+ endif -+ KBUILD_LDFLAGS += --mllvm=-enable-fs-discriminator=true --mllvm=-improved-fs-discriminator=true -plugin-opt=thinlto -+ KBUILD_LDFLAGS += -plugin-opt=-split-machine-functions -+endif -+ -+export CFLAGS_AUTOFDO_CLANG -diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib -index 01a9f567d5af..e7859ad90224 100644 ---- a/scripts/Makefile.lib -+++ b/scripts/Makefile.lib -@@ -191,6 +191,26 @@ _c_flags += $(if $(patsubst n%,, \ - -D__KCSAN_INSTRUMENT_BARRIERS__) - endif - -+# -+# Enable AutoFDO build flags except some files or directories we don't want to -+# enable (depends on variables AUTOFDO_PROFILE_obj.o and AUTOFDO_PROFILE). -+# -+ifeq ($(CONFIG_AUTOFDO_CLANG),y) -+_c_flags += $(if $(patsubst n%,, \ -+ $(AUTOFDO_PROFILE_$(target-stem).o)$(AUTOFDO_PROFILE)$(is-kernel-object)), \ -+ $(CFLAGS_AUTOFDO_CLANG)) -+endif -+ -+# -+# Enable Propeller build flags except some files or directories we don't want to -+# enable (depends on variables AUTOFDO_PROPELLER_obj.o and PROPELLER_PROFILE). -+# -+ifdef CONFIG_PROPELLER_CLANG -+_c_flags += $(if $(patsubst n%,, \ -+ $(AUTOFDO_PROFILE_$(target-stem).o)$(AUTOFDO_PROFILE)$(PROPELLER_PROFILE))$(is-kernel-object), \ -+ $(CFLAGS_PROPELLER_CLANG)) -+endif -+ - # $(src) for including checkin headers from generated source files - # $(obj) for including generated headers from checkin source files - ifeq ($(KBUILD_EXTMOD),) -diff --git a/scripts/Makefile.propeller b/scripts/Makefile.propeller -new file mode 100644 -index 000000000000..344190717e47 ---- /dev/null -+++ b/scripts/Makefile.propeller -@@ -0,0 +1,28 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+# Enable available and selected Clang Propeller features. -+ifdef CLANG_PROPELLER_PROFILE_PREFIX -+ CFLAGS_PROPELLER_CLANG := -fbasic-block-sections=list=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt -ffunction-sections -+ KBUILD_LDFLAGS += --symbol-ordering-file=$(CLANG_PROPELLER_PROFILE_PREFIX)_ld_profile.txt --no-warn-symbol-ordering -+else -+ CFLAGS_PROPELLER_CLANG := -fbasic-block-sections=labels -+endif -+ -+# Propeller requires debug information to embed module names in the profiles. -+# If CONFIG_DEBUG_INFO is not enabled, set -gmlt option. Skip this for AutoFDO, -+# as the option should already be set. -+ifndef CONFIG_DEBUG_INFO -+ ifndef CONFIG_AUTOFDO_CLANG -+ CFLAGS_PROPELLER_CLANG += -gmlt -+ endif -+endif -+ -+ifdef CONFIG_LTO_CLANG_THIN -+ ifdef CLANG_PROPELLER_PROFILE_PREFIX -+ KBUILD_LDFLAGS += --lto-basic-block-sections=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt -+ else -+ KBUILD_LDFLAGS += --lto-basic-block-sections=labels -+ endif -+endif -+ -+export CFLAGS_PROPELLER_CLANG -diff --git a/tools/objtool/check.c b/tools/objtool/check.c -index f0d8796b984a..634a67dba86c 100644 ---- a/tools/objtool/check.c -+++ b/tools/objtool/check.c -@@ -4560,6 +4560,8 @@ static int validate_ibt(struct objtool_file *file) - !strcmp(sec->name, "__jump_table") || - !strcmp(sec->name, "__mcount_loc") || - !strcmp(sec->name, ".kcfi_traps") || -+ !strcmp(sec->name, ".llvm.call-graph-profile") || -+ !strcmp(sec->name, ".llvm_bb_addr_map") || - strstr(sec->name, "__patchable_function_entries")) - continue; - -diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c -index 3d27983dc908..6f64d611faea 100644 ---- a/tools/objtool/elf.c -+++ b/tools/objtool/elf.c -@@ -224,12 +224,17 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) - if (n) - return 0; /* not a hole */ - -- /* didn't find a symbol for which @offset is after it */ -- if (!hole.sym) -- return 0; /* not a hole */ -+ /* -+ * @offset >= sym->offset + sym->len, find symbol after it. -+ * When hole.sym is empty, use the first node to compute the hole. -+ * If there is no symbol in the section, the first node will be NULL, -+ * in which case, -1 is returned to skip the whole section. -+ */ -+ if (hole.sym) -+ n = rb_next(&hole.sym->node); -+ else -+ n = rb_first_cached(&sec->symbol_tree); - -- /* @offset >= sym->offset + sym->len, find symbol after it */ -- n = rb_next(&hole.sym->node); - if (!n) - return -1; /* until end of address space */ - -- 2.48.0.rc1 -From 665449fc260d8c493b4f983fecfe0028da4e1ddd Mon Sep 17 00:00:00 2001 +From 1fc2e15c0c690b276928953ff73277b4d66e67f3 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 9 Jan 2025 16:36:50 +0100 -Subject: [PATCH 05/13] bbr3 +Date: Mon, 20 Jan 2025 13:21:45 +0100 +Subject: [PATCH 03/12] bbr3 Signed-off-by: Peter Jung --- @@ -3044,10 +2359,10 @@ Signed-off-by: Peter Jung 16 files changed, 1940 insertions(+), 553 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h -index 6a5e08b937b3..27aab715490e 100644 +index f88daaa76d83..b0f79a5888a2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h -@@ -369,7 +369,9 @@ struct tcp_sock { +@@ -368,7 +368,9 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ @@ -3059,7 +2374,7 @@ index 6a5e08b937b3..27aab715490e 100644 fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h -index c0deaafebfdc..d53f042d936e 100644 +index c7f42844c79a..170250145598 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -137,8 +137,8 @@ struct inet_connection_sock { @@ -3074,7 +2389,7 @@ index c0deaafebfdc..d53f042d936e 100644 #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ diff --git a/include/net/tcp.h b/include/net/tcp.h -index d1948d357dad..7d99f0bec5f2 100644 +index e9b37b76e894..419fda8c64e5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -375,6 +375,8 @@ static inline void tcp_dec_quickack_mode(struct sock *sk) @@ -3375,10 +2690,10 @@ index 554804774628..2279e6e7bc9c 100644 .undo_cwnd = bpf_tcp_ca_undo_cwnd, .sndbuf_expand = bpf_tcp_ca_sndbuf_expand, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 4f77bd862e95..fd3a5551eda7 100644 +index 0d704bda6c41..d652078f6aec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c -@@ -3384,6 +3384,7 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -3385,6 +3385,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; @@ -3386,7 +2701,7 @@ index 4f77bd862e95..fd3a5551eda7 100644 /* Clean up fastopen related fields */ -@@ -4110,6 +4111,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) +@@ -4111,6 +4112,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; @@ -6040,7 +5355,7 @@ index 760941e55153..a180fa648d5e 100644 MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); +MODULE_VERSION(__stringify(BBR_VERSION)); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c -index 0306d257fa64..28f581c0dab7 100644 +index df758adbb445..e98e5dbc050e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct sock *sk) @@ -6052,7 +5367,7 @@ index 0306d257fa64..28f581c0dab7 100644 icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 2d43b29da15e..c3252eb30ce1 100644 +index 4811727b8a02..ba8b714fb693 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -370,7 +370,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) @@ -6186,10 +5501,10 @@ index 2d43b29da15e..c3252eb30ce1 100644 tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c -index bb1fe1ba867a..050a80769de6 100644 +index 7121d8573928..696afe8cfda8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c -@@ -462,6 +462,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) +@@ -466,6 +466,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; @@ -6199,7 +5514,7 @@ index bb1fe1ba867a..050a80769de6 100644 const struct tcp_congestion_ops *ca; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 8efc58716ce9..5798ce3db487 100644 +index 0e5b9a654254..f7da9d719b25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) @@ -6301,7 +5616,7 @@ index 8efc58716ce9..5798ce3db487 100644 goto repair; /* Skip network transmission */ } -@@ -2981,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk) +@@ -2979,6 +3006,7 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; @@ -6390,10 +5705,10 @@ index a8f6d9d06f2e..8737f2134648 100644 rs->interval_us = max(snd_us, ack_us); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index 79064580c8c0..697270ce1ea6 100644 +index b412ed88ccd9..d70f8b742b21 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c -@@ -690,6 +690,7 @@ void tcp_write_timer_handler(struct sock *sk) +@@ -699,6 +699,7 @@ void tcp_write_timer_handler(struct sock *sk) return; } @@ -6404,40 +5719,46 @@ index 79064580c8c0..697270ce1ea6 100644 -- 2.48.0.rc1 -From 3ff09f2d9488979acccefbda05e384ed0619cef5 Mon Sep 17 00:00:00 2001 +From e01619bda1e69eea53c0f3ef61476fb02da06868 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 9 Jan 2025 16:37:07 +0100 -Subject: [PATCH 06/13] cachy +Date: Mon, 20 Jan 2025 13:21:55 +0100 +Subject: [PATCH 04/12] cachy Signed-off-by: Peter Jung --- .../admin-guide/kernel-parameters.txt | 12 + Documentation/admin-guide/sysctl/vm.rst | 72 + - Documentation/gpu/amdgpu/thermal.rst | 12 + Makefile | 8 + arch/x86/Kconfig.cpu | 367 +- arch/x86/Makefile | 89 +- arch/x86/include/asm/pci.h | 6 + arch/x86/include/asm/vermagic.h | 72 + arch/x86/pci/common.c | 7 +- + block/elevator.c | 8 + drivers/Makefile | 13 +- drivers/ata/ahci.c | 23 +- drivers/cpufreq/Kconfig.x86 | 2 - drivers/cpufreq/intel_pstate.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 44 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 + + drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 19 + + drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 1 + drivers/gpu/drm/amd/display/Kconfig | 6 + - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 69 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- - .../gpu/drm/amd/include/kgd_pp_interface.h | 4 + - drivers/gpu/drm/amd/pm/amdgpu_pm.c | 130 + - drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 4 + - drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 18 +- - drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 2 + - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 108 +- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 108 +- + .../drm/amd/display/dc/bios/bios_parser2.c | 13 +- + .../drm/amd/display/dc/core/dc_link_exports.c | 6 + + drivers/gpu/drm/amd/display/dc/dc.h | 3 + + .../dc/resource/dce120/dce120_resource.c | 17 + + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- drivers/input/evdev.c | 19 +- drivers/md/dm-crypt.c | 5 + drivers/media/v4l2-core/Kconfig | 5 + @@ -6459,6 +5780,7 @@ Signed-off-by: Peter Jung include/linux/wait.h | 2 + init/Kconfig | 26 + kernel/Kconfig.hz | 24 + + kernel/Kconfig.preempt | 2 +- kernel/fork.c | 14 + kernel/locking/rwsem.c | 4 +- kernel/sched/fair.c | 13 + @@ -6476,7 +5798,7 @@ Signed-off-by: Peter Jung mm/vmpressure.c | 4 + mm/vmscan.c | 143 + net/ipv4/inet_connection_sock.c | 2 +- - 65 files changed, 6915 insertions(+), 66 deletions(-) + 72 files changed, 6714 insertions(+), 93 deletions(-) create mode 100644 drivers/media/v4l2-core/v4l2loopback.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.h create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h @@ -6486,10 +5808,10 @@ Signed-off-by: Peter Jung create mode 100644 drivers/scsi/vhba/vhba.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index d401577b5a6a..e6ec15a89924 100644 +index 3872bc6ec49d..5e8881ec6b40 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2248,6 +2248,9 @@ +@@ -2256,6 +2256,9 @@ disable Do not enable intel_pstate as the default scaling driver for the supported processors @@ -6499,7 +5821,7 @@ index d401577b5a6a..e6ec15a89924 100644 active Use intel_pstate driver to bypass the scaling governors layer of cpufreq and provides it own -@@ -4473,6 +4476,15 @@ +@@ -4481,6 +4484,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. @@ -6612,34 +5934,11 @@ index f48eaa98d22d..fc777c14cff6 100644 unprivileged_userfaultfd ======================== -diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst -index 6d942b5c58f0..1768a106aab1 100644 ---- a/Documentation/gpu/amdgpu/thermal.rst -+++ b/Documentation/gpu/amdgpu/thermal.rst -@@ -100,6 +100,18 @@ fan_minimum_pwm - .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c - :doc: fan_minimum_pwm - -+fan_zero_rpm_enable -+---------------------- -+ -+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c -+ :doc: fan_zero_rpm_enable -+ -+fan_zero_rpm_stop_temperature -+----------------------------- -+ -+.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c -+ :doc: fan_zero_rpm_stop_temperature -+ - GFXOFF - ====== - diff --git a/Makefile b/Makefile -index 61c3a727f369..d7e04964ee7a 100644 +index b9464c88ac72..ea555e6a8bf1 100644 --- a/Makefile +++ b/Makefile -@@ -802,11 +802,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks +@@ -860,11 +860,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 KBUILD_RUSTFLAGS += -Copt-level=2 @@ -6660,7 +5959,7 @@ index 61c3a727f369..d7e04964ee7a 100644 # depends on `opt-level` and `debug-assertions`, respectively. KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu -index 2a7279d80460..786fcf84d128 100644 +index bacdc502903f..f2c97bdcef58 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -155,9 +155,8 @@ config MPENTIUM4 @@ -6790,7 +6089,7 @@ index 2a7279d80460..786fcf84d128 100644 + +config MZEN5 + bool "AMD Zen 5" -+ depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 191000) ++ depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 190000) + help + Select this for AMD Family 19h Zen 5 processors. + @@ -7347,6 +6646,28 @@ index ddb798603201..7c20387d8202 100644 return dev; } -#endif +diff --git a/block/elevator.c b/block/elevator.c +index 7c3ba80e5ff4..06e974eb6594 100644 +--- a/block/elevator.c ++++ b/block/elevator.c +@@ -566,9 +566,17 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) + + if (q->nr_hw_queues != 1 && + !blk_mq_is_shared_tags(q->tag_set->flags)) ++#if defined(CONFIG_CACHY) ++ return elevator_find_get("mq-deadline"); ++#else + return NULL; ++#endif + ++#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ) ++ return elevator_find_get("bfq"); ++#else + return elevator_find_get("mq-deadline"); ++#endif + } + + /* diff --git a/drivers/Makefile b/drivers/Makefile index 45d1c3e630f7..4f5ab2429a7f 100644 --- a/drivers/Makefile @@ -7381,7 +6702,7 @@ index 45d1c3e630f7..4f5ab2429a7f 100644 obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c -index 45f63b09828a..d8bcb8b7544f 100644 +index 8d27c567be1c..479477438de0 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1618,7 +1618,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) @@ -7425,8 +6746,8 @@ index 45f63b09828a..d8bcb8b7544f 100644 } static int ahci_get_irq_vector(struct ata_host *host, int port) -@@ -1896,7 +1889,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; +@@ -1898,7 +1891,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + return -ENOMEM; /* detect remapped nvme devices */ - ahci_remap_check(pdev, ahci_pci_bar, hpriv); @@ -7457,10 +6778,10 @@ index 97c2d4f15d76..5a3af44d785a 100644 This driver adds a CPUFreq driver which utilizes a fine grain processor performance frequency control range instead of legacy diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index 400337f3b572..d413b60c6001 100644 +index b8e2396a708a..d8e529cd454d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c -@@ -3817,6 +3817,8 @@ static int __init intel_pstate_setup(char *str) +@@ -3819,6 +3819,8 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; @@ -7470,10 +6791,10 @@ index 400337f3b572..d413b60c6001 100644 default_driver = &intel_pstate; else if (!strcmp(str, "passive")) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -index 9b1e0ede05a4..7617963901fa 100644 +index 4653a8d2823a..6590e83dfbf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -@@ -164,6 +164,7 @@ struct amdgpu_watchdog_timer { +@@ -160,6 +160,7 @@ struct amdgpu_watchdog_timer { */ extern int amdgpu_modeset; extern unsigned int amdgpu_vram_limit; @@ -7481,11 +6802,123 @@ index 9b1e0ede05a4..7617963901fa 100644 extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +index 093141ad6ed0..e476e45b996a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +@@ -36,13 +36,6 @@ + #include "atombios_encoders.h" + #include "bif/bif_4_1_d.h" + +-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev, +- ATOM_GPIO_I2C_ASSIGMENT *gpio, +- u8 index) +-{ +- +-} +- + static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio) + { + struct amdgpu_i2c_bus_rec i2c; +@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device * + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { +- +- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); +- + if (gpio->sucI2cId.ucAccess == id) { + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + break; +@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { +- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); +- + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + + if (i2c.valid) { +@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) + } + } + ++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id) ++{ ++ struct atom_context *ctx = adev->mode_info.atom_context; ++ ATOM_GPIO_I2C_ASSIGMENT *gpio; ++ struct amdgpu_i2c_bus_rec i2c; ++ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info); ++ struct _ATOM_GPIO_I2C_INFO *i2c_info; ++ uint16_t data_offset, size; ++ int i, num_indices; ++ char stmp[32]; ++ ++ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) { ++ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset); ++ ++ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / ++ sizeof(ATOM_GPIO_I2C_ASSIGMENT); ++ ++ gpio = &i2c_info->asGPIO_Info[0]; ++ for (i = 0; i < num_indices; i++) { ++ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); ++ ++ if (i2c.valid && i2c.i2c_id == i2c_id) { ++ sprintf(stmp, "OEM 0x%x", i2c.i2c_id); ++ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp); ++ break; ++ } ++ gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ++ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); ++ } ++ } ++} ++ + struct amdgpu_gpio_rec + amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, + u8 id) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +index 0e16432d9a72..867bc5c5ce67 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, + struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev, + uint8_t id); + void amdgpu_atombios_i2c_init(struct amdgpu_device *adev); ++void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id); + + bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index cd4fac120834..1ab433d774cc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -4461,8 +4461,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, + goto failed; + } + /* init i2c buses */ +- if (!amdgpu_device_has_dc_support(adev)) +- amdgpu_atombios_i2c_init(adev); ++ amdgpu_i2c_init(adev); + } + } + +@@ -4724,8 +4723,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) + amdgpu_reset_fini(adev); + + /* free i2c buses */ +- if (!amdgpu_device_has_dc_support(adev)) +- amdgpu_i2c_fini(adev); ++ amdgpu_i2c_fini(adev); + + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index 81d9877c8735..852e6f315576 100644 +index 38686203bea6..811d020f3f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -136,6 +136,7 @@ enum AMDGPU_DEBUG_MASK { +@@ -137,6 +137,7 @@ enum AMDGPU_DEBUG_MASK { }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -7493,7 +6926,7 @@ index 81d9877c8735..852e6f315576 100644 int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ -@@ -259,6 +260,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { +@@ -255,6 +256,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { .period = 0x0, /* default to 0x0 (timeout disable) */ }; @@ -7509,11 +6942,65 @@ index 81d9877c8735..852e6f315576 100644 /** * DOC: vramlimit (int) * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +index f0765ccde668..8179d0814db9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +@@ -225,6 +225,25 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) + kfree(i2c); + } + ++void amdgpu_i2c_init(struct amdgpu_device *adev) ++{ ++ if (!adev->is_atom_fw) { ++ if (!amdgpu_device_has_dc_support(adev)) { ++ amdgpu_atombios_i2c_init(adev); ++ } else { ++ switch (adev->asic_type) { ++ case CHIP_POLARIS10: ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS12: ++ amdgpu_atombios_oem_i2c_init(adev, 0x97); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++} ++ + /* remove all the buses */ + void amdgpu_i2c_fini(struct amdgpu_device *adev) + { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +index 21e3d1dad0a1..1d3d3806e0dd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +@@ -28,6 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, + const struct amdgpu_i2c_bus_rec *rec, + const char *name); + void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c); ++void amdgpu_i2c_init(struct amdgpu_device *adev); + void amdgpu_i2c_fini(struct amdgpu_device *adev); + struct amdgpu_i2c_chan * + amdgpu_i2c_lookup(struct amdgpu_device *adev, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +index 5e3faefc5510..6da4f946cac0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +@@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter { + struct i2c_adapter base; + + struct ddc_service *ddc_service; ++ bool oem; + }; + + #define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig -index df17e79c45c7..e454488c1a31 100644 +index 11e3f2f3b174..7b1bd69dc29e 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig -@@ -53,4 +53,10 @@ config DRM_AMD_SECURE_DISPLAY +@@ -54,4 +54,10 @@ config DRM_AMD_SECURE_DISPLAY This option enables the calculation of crc of specific region via debugfs. Cooperate with specific DMCU FW. @@ -7525,10 +7012,73 @@ index df17e79c45c7..e454488c1a31 100644 + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index ad3a3aa72b51..a80f8c9fc324 100644 +index 5f216d626cbb..382af92c4ff1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4501,7 +4501,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) +@@ -177,6 +177,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev); + static void amdgpu_dm_fini(struct amdgpu_device *adev); + static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); + static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); ++static struct amdgpu_i2c_adapter * ++create_i2c(struct ddc_service *ddc_service, bool oem); + + static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) + { +@@ -2839,6 +2841,33 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) + return 0; + } + ++static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) ++{ ++ struct amdgpu_display_manager *dm = &adev->dm; ++ struct amdgpu_i2c_adapter *oem_i2c; ++ struct ddc_service *oem_ddc_service; ++ int r; ++ ++ oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc); ++ if (oem_ddc_service) { ++ oem_i2c = create_i2c(oem_ddc_service, true); ++ if (!oem_i2c) { ++ dev_info(adev->dev, "Failed to create oem i2c adapter data\n"); ++ return -ENOMEM; ++ } ++ ++ r = i2c_add_adapter(&oem_i2c->base); ++ if (r) { ++ dev_info(adev->dev, "Failed to register oem i2c\n"); ++ kfree(oem_i2c); ++ return r; ++ } ++ dm->oem_i2c = oem_i2c; ++ } ++ ++ return 0; ++} ++ + /** + * dm_hw_init() - Initialize DC device + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. +@@ -2870,6 +2899,10 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block) + return r; + amdgpu_dm_hpd_init(adev); + ++ r = dm_oem_i2c_hw_init(adev); ++ if (r) ++ dev_info(adev->dev, "Failed to add OEM i2c bus\n"); ++ + return 0; + } + +@@ -2885,6 +2918,8 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) + { + struct amdgpu_device *adev = ip_block->adev; + ++ kfree(adev->dm.oem_i2c); ++ + amdgpu_dm_hpd_fini(adev); + + amdgpu_dm_irq_fini(adev); +@@ -4516,7 +4551,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } @@ -7537,6 +7087,93 @@ index ad3a3aa72b51..a80f8c9fc324 100644 if (amdgpu_dm_create_color_properties(adev)) { dc_state_release(state->context); kfree(state); +@@ -8218,7 +8253,7 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, + int i; + int result = -EIO; + +- if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported) ++ if (!ddc_service->ddc_pin) + return result; + + cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL); +@@ -8237,11 +8272,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, + cmd.payloads[i].data = msgs[i].buf; + } + +- if (dc_submit_i2c( +- ddc_service->ctx->dc, +- ddc_service->link->link_index, +- &cmd)) +- result = num; ++ if (i2c->oem) { ++ if (dc_submit_i2c_oem( ++ ddc_service->ctx->dc, ++ &cmd)) ++ result = num; ++ } else { ++ if (dc_submit_i2c( ++ ddc_service->ctx->dc, ++ ddc_service->link->link_index, ++ &cmd)) ++ result = num; ++ } + + kfree(cmd.payloads); + return result; +@@ -8258,9 +8300,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = { + }; + + static struct amdgpu_i2c_adapter * +-create_i2c(struct ddc_service *ddc_service, +- int link_index, +- int *res) ++create_i2c(struct ddc_service *ddc_service, bool oem) + { + struct amdgpu_device *adev = ddc_service->ctx->driver_context; + struct amdgpu_i2c_adapter *i2c; +@@ -8271,9 +8311,14 @@ create_i2c(struct ddc_service *ddc_service, + i2c->base.owner = THIS_MODULE; + i2c->base.dev.parent = &adev->pdev->dev; + i2c->base.algo = &amdgpu_dm_i2c_algo; +- snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index); ++ if (oem) ++ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus"); ++ else ++ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", ++ ddc_service->link->link_index); + i2c_set_adapdata(&i2c->base, i2c); + i2c->ddc_service = ddc_service; ++ i2c->oem = oem; + + return i2c; + } +@@ -8298,7 +8343,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, + link->priv = aconnector; + + +- i2c = create_i2c(link->ddc, link->link_index, &res); ++ i2c = create_i2c(link->ddc, false); + if (!i2c) { + DRM_ERROR("Failed to create i2c adapter data\n"); + return -ENOMEM; +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +index 2227cd8e4a89..5710776bb0e2 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +@@ -606,6 +606,13 @@ struct amdgpu_display_manager { + * Bounding box data read from dmub during early initialization for DCN4+ + */ + struct dml2_soc_bb *bb_from_dmub; ++ ++ /** ++ * @oem_i2c: ++ * ++ * OEM i2c bus ++ */ ++ struct amdgpu_i2c_adapter *oem_i2c; + }; + + enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f..4d3ebcaacca1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -7551,10 +7188,10 @@ index ebabfe3a512f..4d3ebcaacca1 100644 * * AMD driver supports pre-defined mathematical functions for transferring diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -index 9be87b532517..47e8cd9a756f 100644 +index 36a830a7440f..a8fc8bd52d51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -@@ -473,7 +473,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) +@@ -470,7 +470,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif @@ -7563,7 +7200,7 @@ index 9be87b532517..47e8cd9a756f 100644 /** * dm_crtc_additional_color_mgmt - enable additional color properties * @crtc: DRM CRTC -@@ -555,7 +555,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { +@@ -552,7 +552,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif @@ -7572,7 +7209,7 @@ index 9be87b532517..47e8cd9a756f 100644 .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, #endif -@@ -734,7 +734,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, +@@ -731,7 +731,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); @@ -7612,33 +7249,129 @@ index 495e3cd70426..704a48209657 100644 dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ -diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h -index 19a48d98830a..2fca138419d4 100644 ---- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h -+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h -@@ -119,6 +119,8 @@ enum pp_clock_type { - OD_ACOUSTIC_TARGET, - OD_FAN_TARGET_TEMPERATURE, - OD_FAN_MINIMUM_PWM, -+ OD_FAN_ZERO_RPM_ENABLE, -+ OD_FAN_ZERO_RPM_STOP_TEMP, - }; +diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +index c9a6de110b74..470ec970217b 100644 +--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c ++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +@@ -1778,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1( + struct dc_firmware_info *info) + { + struct atom_firmware_info_v3_1 *firmware_info; ++ struct atom_firmware_info_v3_2 *firmware_info32; + struct atom_display_controller_info_v4_1 *dce_info = NULL; - enum amd_pp_sensors { -@@ -199,6 +201,8 @@ enum PP_OD_DPM_TABLE_COMMAND { - PP_OD_EDIT_ACOUSTIC_TARGET, - PP_OD_EDIT_FAN_TARGET_TEMPERATURE, - PP_OD_EDIT_FAN_MINIMUM_PWM, -+ PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, -+ PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, - }; + if (!info) +@@ -1785,6 +1786,8 @@ static enum bp_result get_firmware_info_v3_1( - struct pp_states_info { + firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1, + DATA_TABLES(firmwareinfo)); ++ firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2, ++ DATA_TABLES(firmwareinfo)); + + dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1, + DATA_TABLES(dce_info)); +@@ -1817,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1( + bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10; + } + +- info->oem_i2c_present = false; ++ /* These fields are marked as reserved in v3_1, but they appear to be populated ++ * properly. ++ */ ++ if (firmware_info32->board_i2c_feature_id == 0x2) { ++ info->oem_i2c_present = true; ++ info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id; ++ } else { ++ info->oem_i2c_present = false; ++ } + + return BP_RESULT_OK; + } +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +index 457d60eeb486..13636eb4ec3f 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +@@ -142,6 +142,12 @@ bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx) + return link->dc->link_srv->update_dsc_config(pipe_ctx); + } + ++struct ddc_service * ++dc_get_oem_i2c_device(struct dc *dc) ++{ ++ return dc->res_pool->oem_device; ++} ++ + bool dc_is_oem_i2c_device_present( + struct dc *dc, + size_t slave_address) +diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h +index 08c5a315b3a6..70d6005ecd64 100644 +--- a/drivers/gpu/drm/amd/display/dc/dc.h ++++ b/drivers/gpu/drm/amd/display/dc/dc.h +@@ -1939,6 +1939,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *payload, + enum aux_return_code_type *operation_result); + ++struct ddc_service * ++dc_get_oem_i2c_device(struct dc *dc); ++ + bool dc_is_oem_i2c_device_present( + struct dc *dc, + size_t slave_address +diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +index c63c59623433..eb1e158d3436 100644 +--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +@@ -67,6 +67,7 @@ + #include "reg_helper.h" + + #include "dce100/dce100_resource.h" ++#include "link.h" + + #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f +@@ -659,6 +660,12 @@ static void dce120_resource_destruct(struct dce110_resource_pool *pool) + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); ++ ++ if (pool->base.oem_device != NULL) { ++ struct dc *dc = pool->base.oem_device->ctx->dc; ++ ++ dc->link_srv->destroy_ddc_service(&pool->base.oem_device); ++ } + } + + static void read_dce_straps( +@@ -1054,6 +1061,7 @@ static bool dce120_resource_construct( + struct dc *dc, + struct dce110_resource_pool *pool) + { ++ struct ddc_service_init_data ddc_init_data = {0}; + unsigned int i; + int j; + struct dc_context *ctx = dc->ctx; +@@ -1257,6 +1265,15 @@ static bool dce120_resource_construct( + + bw_calcs_data_update_from_pplib(dc); + ++ if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { ++ ddc_init_data.ctx = dc->ctx; ++ ddc_init_data.link = NULL; ++ ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; ++ ddc_init_data.id.enum_id = 0; ++ ddc_init_data.id.type = OBJECT_TYPE_GENERIC; ++ pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); ++ } ++ + return true; + + irqs_create_fail: diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -index 0fa6fbee1978..20d28c68593d 100644 +index e8ae7681bf0a..8a0d873983f3 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -@@ -3276,6 +3276,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, +@@ -3178,6 +3178,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, struct device_attribute *attr, char *buf) { @@ -7648,167 +7381,11 @@ index 0fa6fbee1978..20d28c68593d 100644 return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); } -@@ -4217,6 +4220,117 @@ static umode_t fan_minimum_pwm_visible(struct amdgpu_device *adev) - return umode; - } - -+/** -+ * DOC: fan_zero_rpm_enable -+ * -+ * The amdgpu driver provides a sysfs API for checking and adjusting the -+ * zero RPM feature. -+ * -+ * Reading back the file shows you the current setting and the permitted -+ * ranges if changable. -+ * -+ * Writing an integer to the file, change the setting accordingly. -+ * -+ * When you have finished the editing, write "c" (commit) to the file to commit -+ * your changes. -+ * -+ * If you want to reset to the default value, write "r" (reset) to the file to -+ * reset them. -+ */ -+static ssize_t fan_zero_rpm_enable_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); -+ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; -+ -+ return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_ENABLE, buf); -+} -+ -+static ssize_t fan_zero_rpm_enable_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, -+ size_t count) -+{ -+ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); -+ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; -+ -+ return (ssize_t)amdgpu_distribute_custom_od_settings(adev, -+ PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, -+ buf, -+ count); -+} -+ -+static umode_t fan_zero_rpm_enable_visible(struct amdgpu_device *adev) -+{ -+ umode_t umode = 0000; -+ -+ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE) -+ umode |= S_IRUSR | S_IRGRP | S_IROTH; -+ -+ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET) -+ umode |= S_IWUSR; -+ -+ return umode; -+} -+ -+/** -+ * DOC: fan_zero_rpm_stop_temperature -+ * -+ * The amdgpu driver provides a sysfs API for checking and adjusting the -+ * zero RPM stop temperature feature. -+ * -+ * Reading back the file shows you the current setting and the permitted -+ * ranges if changable. -+ * -+ * Writing an integer to the file, change the setting accordingly. -+ * -+ * When you have finished the editing, write "c" (commit) to the file to commit -+ * your changes. -+ * -+ * If you want to reset to the default value, write "r" (reset) to the file to -+ * reset them. -+ * -+ * This setting works only if the Zero RPM setting is enabled. It adjusts the -+ * temperature below which the fan can stop. -+ */ -+static ssize_t fan_zero_rpm_stop_temp_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); -+ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; -+ -+ return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_STOP_TEMP, buf); -+} -+ -+static ssize_t fan_zero_rpm_stop_temp_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, -+ size_t count) -+{ -+ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); -+ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; -+ -+ return (ssize_t)amdgpu_distribute_custom_od_settings(adev, -+ PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, -+ buf, -+ count); -+} -+ -+static umode_t fan_zero_rpm_stop_temp_visible(struct amdgpu_device *adev) -+{ -+ umode_t umode = 0000; -+ -+ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE) -+ umode |= S_IRUSR | S_IRGRP | S_IROTH; -+ -+ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET) -+ umode |= S_IWUSR; -+ -+ return umode; -+} -+ - static struct od_feature_set amdgpu_od_set = { - .containers = { - [0] = { -@@ -4262,6 +4376,22 @@ static struct od_feature_set amdgpu_od_set = { - .store = fan_minimum_pwm_store, - }, - }, -+ [5] = { -+ .name = "fan_zero_rpm_enable", -+ .ops = { -+ .is_visible = fan_zero_rpm_enable_visible, -+ .show = fan_zero_rpm_enable_show, -+ .store = fan_zero_rpm_enable_store, -+ }, -+ }, -+ [6] = { -+ .name = "fan_zero_rpm_stop_temperature", -+ .ops = { -+ .is_visible = fan_zero_rpm_stop_temp_visible, -+ .show = fan_zero_rpm_stop_temp_show, -+ .store = fan_zero_rpm_stop_temp_store, -+ }, -+ }, - }, - }, - }, -diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h -index f5bf41f21c41..363af8990aa2 100644 ---- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h -+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h -@@ -328,6 +328,10 @@ struct config_table_setting - #define OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET BIT(7) - #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE BIT(8) - #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET BIT(9) -+#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE BIT(10) -+#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET BIT(11) -+#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE BIT(12) -+#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET BIT(13) - - struct amdgpu_pm { - struct mutex mutex; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index 0c0b9aa44dfa..c7d977890f47 100644 +index 21bd635bcdfc..6f4032038fc7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -@@ -2792,7 +2792,10 @@ int smu_get_power_limit(void *handle, +@@ -2809,7 +2809,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: @@ -7820,7 +7397,7 @@ index 0c0b9aa44dfa..c7d977890f47 100644 break; default: return -EINVAL; -@@ -2816,7 +2819,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) +@@ -2833,7 +2836,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); @@ -7836,352 +7413,6 @@ index 0c0b9aa44dfa..c7d977890f47 100644 dev_err(smu->adev->dev, "New power limit (%d) is out of range [%d,%d]\n", limit, smu->min_power_limit, smu->max_power_limit); -@@ -2895,6 +2905,10 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) - clk_type = SMU_OD_FAN_TARGET_TEMPERATURE; break; - case OD_FAN_MINIMUM_PWM: - clk_type = SMU_OD_FAN_MINIMUM_PWM; break; -+ case OD_FAN_ZERO_RPM_ENABLE: -+ clk_type = SMU_OD_FAN_ZERO_RPM_ENABLE; break; -+ case OD_FAN_ZERO_RPM_STOP_TEMP: -+ clk_type = SMU_OD_FAN_ZERO_RPM_STOP_TEMP; break; - default: - clk_type = SMU_CLK_COUNT; break; - } -diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h -index e71a721c12b9..a299dc4a8071 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h -+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h -@@ -313,6 +313,8 @@ enum smu_clk_type { - SMU_OD_ACOUSTIC_TARGET, - SMU_OD_FAN_TARGET_TEMPERATURE, - SMU_OD_FAN_MINIMUM_PWM, -+ SMU_OD_FAN_ZERO_RPM_ENABLE, -+ SMU_OD_FAN_ZERO_RPM_STOP_TEMP, - SMU_CLK_COUNT, - }; - -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -index a93739688071..5fa58ef65371 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -@@ -107,6 +107,8 @@ - #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 - #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 - #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 -+#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 -+#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 - - #define LINK_SPEED_MAX 3 - -@@ -1143,6 +1145,14 @@ static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, - od_min_setting = overdrive_lowerlimits->FanMinimumPwm; - od_max_setting = overdrive_upperlimits->FanMinimumPwm; - break; -+ case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: -+ od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; -+ od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; -+ break; -+ case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: -+ od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; -+ od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; -+ break; - default: - od_min_setting = od_max_setting = INT_MAX; - break; -@@ -1463,6 +1473,42 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, - min_value, max_value); - break; - -+ case SMU_OD_FAN_ZERO_RPM_ENABLE: -+ if (!smu_v13_0_0_is_od_feature_supported(smu, -+ PP_OD_FEATURE_ZERO_FAN_BIT)) -+ break; -+ -+ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); -+ size += sysfs_emit_at(buf, size, "%d\n", -+ (int)od_table->OverDriveTable.FanZeroRpmEnable); -+ -+ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); -+ smu_v13_0_0_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, -+ &min_value, -+ &max_value); -+ size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", -+ min_value, max_value); -+ break; -+ -+ case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: -+ if (!smu_v13_0_0_is_od_feature_supported(smu, -+ PP_OD_FEATURE_ZERO_FAN_BIT)) -+ break; -+ -+ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); -+ size += sysfs_emit_at(buf, size, "%d\n", -+ (int)od_table->OverDriveTable.FanZeroRpmStopTemp); -+ -+ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); -+ smu_v13_0_0_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, -+ &min_value, -+ &max_value); -+ size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", -+ min_value, max_value); -+ break; -+ - case SMU_OD_RANGE: - if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && - !smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && -@@ -1560,6 +1606,16 @@ static int smu_v13_0_0_od_restore_table_single(struct smu_context *smu, long inp - od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; - od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); - break; -+ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: -+ od_table->OverDriveTable.FanZeroRpmEnable = -+ boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; -+ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: -+ od_table->OverDriveTable.FanZeroRpmStopTemp = -+ boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; - default: - dev_info(adev->dev, "Invalid table index: %ld\n", input); - return -EINVAL; -@@ -1853,6 +1909,48 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, - od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); - break; - -+ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: -+ if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { -+ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); -+ return -ENOTSUPP; -+ } -+ -+ smu_v13_0_0_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, -+ &minimum, -+ &maximum); -+ if (input[0] < minimum || -+ input[0] > maximum) { -+ dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", -+ input[0], minimum, maximum); -+ return -EINVAL; -+ } -+ -+ od_table->OverDriveTable.FanZeroRpmEnable = input[0]; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; -+ -+ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: -+ if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { -+ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); -+ return -ENOTSUPP; -+ } -+ -+ smu_v13_0_0_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, -+ &minimum, -+ &maximum); -+ if (input[0] < minimum || -+ input[0] > maximum) { -+ dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", -+ input[0], minimum, maximum); -+ return -EINVAL; -+ } -+ -+ od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; -+ - case PP_OD_RESTORE_DEFAULT_TABLE: - if (size == 1) { - ret = smu_v13_0_0_od_restore_table_single(smu, input[0]); -@@ -2122,7 +2220,11 @@ static void smu_v13_0_0_set_supported_od_feature_mask(struct smu_context *smu) - OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | - OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | -- OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; -+ OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; - } - - static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) -@@ -2188,6 +2290,10 @@ static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) - user_od_table_bak.OverDriveTable.FanTargetTemperature; - user_od_table->OverDriveTable.FanMinimumPwm = - user_od_table_bak.OverDriveTable.FanMinimumPwm; -+ user_od_table->OverDriveTable.FanZeroRpmEnable = -+ user_od_table_bak.OverDriveTable.FanZeroRpmEnable; -+ user_od_table->OverDriveTable.FanZeroRpmStopTemp = -+ user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; - } - - smu_v13_0_0_set_supported_od_feature_mask(smu); -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -index 1aedfafa507f..d19c63a8459a 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -@@ -83,6 +83,8 @@ - #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 - #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 - #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 -+#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 -+#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 - - #define LINK_SPEED_MAX 3 - -@@ -1132,6 +1134,14 @@ static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, - od_min_setting = overdrive_lowerlimits->FanMinimumPwm; - od_max_setting = overdrive_upperlimits->FanMinimumPwm; - break; -+ case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: -+ od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; -+ od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; -+ break; -+ case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: -+ od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; -+ od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; -+ break; - default: - od_min_setting = od_max_setting = INT_MAX; - break; -@@ -1452,6 +1462,42 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, - min_value, max_value); - break; - -+ case SMU_OD_FAN_ZERO_RPM_ENABLE: -+ if (!smu_v13_0_7_is_od_feature_supported(smu, -+ PP_OD_FEATURE_ZERO_FAN_BIT)) -+ break; -+ -+ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); -+ size += sysfs_emit_at(buf, size, "%d\n", -+ (int)od_table->OverDriveTable.FanZeroRpmEnable); -+ -+ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); -+ smu_v13_0_7_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, -+ &min_value, -+ &max_value); -+ size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", -+ min_value, max_value); -+ break; -+ -+ case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: -+ if (!smu_v13_0_7_is_od_feature_supported(smu, -+ PP_OD_FEATURE_ZERO_FAN_BIT)) -+ break; -+ -+ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); -+ size += sysfs_emit_at(buf, size, "%d\n", -+ (int)od_table->OverDriveTable.FanZeroRpmStopTemp); -+ -+ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); -+ smu_v13_0_7_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, -+ &min_value, -+ &max_value); -+ size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", -+ min_value, max_value); -+ break; -+ - case SMU_OD_RANGE: - if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && - !smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && -@@ -1548,6 +1594,16 @@ static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long inp - od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; - od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); - break; -+ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: -+ od_table->OverDriveTable.FanZeroRpmEnable = -+ boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; -+ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: -+ od_table->OverDriveTable.FanZeroRpmStopTemp = -+ boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; - default: - dev_info(adev->dev, "Invalid table index: %ld\n", input); - return -EINVAL; -@@ -1841,6 +1897,48 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, - od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); - break; - -+ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: -+ if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { -+ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); -+ return -ENOTSUPP; -+ } -+ -+ smu_v13_0_7_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, -+ &minimum, -+ &maximum); -+ if (input[0] < minimum || -+ input[0] > maximum) { -+ dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", -+ input[0], minimum, maximum); -+ return -EINVAL; -+ } -+ -+ od_table->OverDriveTable.FanZeroRpmEnable = input[0]; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; -+ -+ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: -+ if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { -+ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); -+ return -ENOTSUPP; -+ } -+ -+ smu_v13_0_7_get_od_setting_limits(smu, -+ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, -+ &minimum, -+ &maximum); -+ if (input[0] < minimum || -+ input[0] > maximum) { -+ dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", -+ input[0], minimum, maximum); -+ return -EINVAL; -+ } -+ -+ od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); -+ break; -+ - case PP_OD_RESTORE_DEFAULT_TABLE: - if (size == 1) { - ret = smu_v13_0_7_od_restore_table_single(smu, input[0]); -@@ -2108,7 +2206,11 @@ static void smu_v13_0_7_set_supported_od_feature_mask(struct smu_context *smu) - OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | - OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | -- OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; -+ OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | -+ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; - } - - static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) -@@ -2174,6 +2276,10 @@ static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) - user_od_table_bak.OverDriveTable.FanTargetTemperature; - user_od_table->OverDriveTable.FanMinimumPwm = - user_od_table_bak.OverDriveTable.FanMinimumPwm; -+ user_od_table->OverDriveTable.FanZeroRpmEnable = -+ user_od_table_bak.OverDriveTable.FanZeroRpmEnable; -+ user_od_table->OverDriveTable.FanZeroRpmStopTemp = -+ user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; - } - - smu_v13_0_7_set_supported_od_feature_mask(smu); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index b5cbb57ee5f6..a0f7fa1518c6 100644 --- a/drivers/input/evdev.c @@ -12519,10 +11750,10 @@ index 000000000000..e105e6f5cc91 +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index 8103bc24a54e..5448adb10b21 100644 +index 76f4df75b08a..49c1a91c611d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c -@@ -3747,6 +3747,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) +@@ -3746,6 +3746,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; } @@ -12629,7 +11860,7 @@ index 8103bc24a54e..5448adb10b21 100644 /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. -@@ -5171,6 +5271,7 @@ static const struct pci_dev_acs_enabled { +@@ -5170,6 +5270,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, @@ -13822,10 +13053,10 @@ index 000000000000..7531223355e5 +module_exit(vhba_exit); + diff --git a/include/linux/mm.h b/include/linux/mm.h -index 8617adc6becd..f58cc697122c 100644 +index b1c3db9cf355..e38685ece897 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -205,6 +205,14 @@ static inline void __mm_zero_struct_page(struct page *page) +@@ -206,6 +206,14 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; @@ -13841,10 +13072,10 @@ index 8617adc6becd..f58cc697122c 100644 extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h -index 68a5f1ff3301..291873a34079 100644 +index bcf0865a38ae..e748afb0ce06 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h -@@ -1362,7 +1362,7 @@ struct readahead_control { +@@ -1365,7 +1365,7 @@ struct readahead_control { ._index = i, \ } @@ -13876,7 +13107,7 @@ index 7183e5aca282..56573371a2f8 100644 { return &init_user_ns; diff --git a/include/linux/wait.h b/include/linux/wait.h -index 2b322a9b88a2..6f977c21fd81 100644 +index 6d90ad974408..d04768b01364 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) @@ -13896,7 +13127,7 @@ index 2b322a9b88a2..6f977c21fd81 100644 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); diff --git a/init/Kconfig b/init/Kconfig -index 7256fa127530..857869dbc22c 100644 +index a20e6efd3f0f..9437171030e2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -154,6 +154,10 @@ config THREAD_INFO_IN_TASK @@ -13910,7 +13141,7 @@ index 7256fa127530..857869dbc22c 100644 config BROKEN bool -@@ -1309,6 +1313,22 @@ config USER_NS +@@ -1310,6 +1314,22 @@ config USER_NS If unsure, say N. @@ -13933,7 +13164,7 @@ index 7256fa127530..857869dbc22c 100644 config PID_NS bool "PID Namespaces" default y -@@ -1451,6 +1471,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE +@@ -1452,6 +1472,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE with the "-O2" compiler flag for best performance and most helpful compile-time warnings. @@ -13988,11 +13219,24 @@ index 38ef6d06888e..0f78364efd4f 100644 default 1000 if HZ_1000 config SCHED_HRTICK +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index 54ea59ff8fbe..18f87e0dd137 100644 +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -88,7 +88,7 @@ endchoice + + config PREEMPT_RT + bool "Fully Preemptible Kernel (Real-Time)" +- depends on EXPERT && ARCH_SUPPORTS_RT && !COMPILE_TEST ++ depends on ARCH_SUPPORTS_RT && !COMPILE_TEST + select PREEMPTION + help + This option turns the kernel into a real-time kernel by replacing diff --git a/kernel/fork.c b/kernel/fork.c -index e192bdbc9ade..d27b8f5582df 100644 +index 9b301180fd41..0cb5431b4d7e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -107,6 +107,10 @@ +@@ -106,6 +106,10 @@ #include #include @@ -14003,7 +13247,7 @@ index e192bdbc9ade..d27b8f5582df 100644 #include #include #include -@@ -2157,6 +2161,10 @@ __latent_entropy struct task_struct *copy_process( +@@ -2158,6 +2162,10 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -14014,7 +13258,7 @@ index e192bdbc9ade..d27b8f5582df 100644 /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. -@@ -3310,6 +3318,12 @@ int ksys_unshare(unsigned long unshare_flags) +@@ -3311,6 +3319,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; @@ -14028,7 +13272,7 @@ index e192bdbc9ade..d27b8f5582df 100644 if (err) goto bad_unshare_out; diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c -index 2bbb6eca5144..125cdf85741c 100644 +index 2ddb827e3bea..464049c4af3f 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -747,6 +747,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) @@ -14050,7 +13294,7 @@ index 2bbb6eca5144..125cdf85741c 100644 return state; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 1ca96c99872f..d06d306b7fba 100644 +index 26958431deb7..8c0f17a96d4f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; @@ -14087,10 +13331,10 @@ index 1ca96c99872f..d06d306b7fba 100644 #ifdef CONFIG_NUMA_BALANCING /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index f2ef520513c4..d6e2ca8c8cd2 100644 +index c5d67a43fe52..da653eba7884 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2825,7 +2825,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); +@@ -2820,7 +2820,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); @@ -14142,7 +13386,7 @@ index 51e38f5f4701..c5cc616484ba 100644 { wq_entry->flags = flags; diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index 79e6cb1d5c48..4a62ae02a2c0 100644 +index 5c9202cb8f59..de4ddf79fe97 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -80,6 +80,9 @@ @@ -14155,7 +13399,7 @@ index 79e6cb1d5c48..4a62ae02a2c0 100644 /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; -@@ -1618,6 +1621,15 @@ static struct ctl_table kern_table[] = { +@@ -1617,6 +1620,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, @@ -14171,7 +13415,7 @@ index 79e6cb1d5c48..4a62ae02a2c0 100644 #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", -@@ -2198,6 +2210,40 @@ static struct ctl_table vm_table[] = { +@@ -2197,6 +2209,40 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif @@ -14231,7 +13475,7 @@ index aa0b2e47f2f2..d74d857b1696 100644 static DEFINE_MUTEX(userns_state_mutex); diff --git a/mm/Kconfig b/mm/Kconfig -index 33fa51d608dc..87cb63f7ca57 100644 +index 84000b016808..1d96c5cc35d3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -499,6 +499,69 @@ config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP @@ -14330,10 +13574,10 @@ index a2b16b08cbbf..48d611e58ad3 100644 static int __read_mostly sysctl_compact_memory; diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 7e0f72cd9fd4..4410a8b06db3 100644 +index db64116a4f84..3e0266c973e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c -@@ -65,7 +65,11 @@ unsigned long transparent_hugepage_flags __read_mostly = +@@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE (1<> (20 - PAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ -@@ -1091,4 +1095,5 @@ void __init swap_setup(void) +@@ -1060,4 +1064,5 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ @@ -14439,7 +13683,7 @@ index bd5183dfd879..3a410f53a07c 100644 /* diff --git a/mm/vmscan.c b/mm/vmscan.c -index 67a680e4b484..9b9304c91dcc 100644 +index b1ec5ece067e..e258174d240a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { @@ -14475,7 +13719,7 @@ index 67a680e4b484..9b9304c91dcc 100644 * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ +#ifdef CONFIG_CACHY -+int vm_swappiness = 20; ++int vm_swappiness = 100; +#else int vm_swappiness = 60; +#endif @@ -14493,7 +13737,7 @@ index 67a680e4b484..9b9304c91dcc 100644 /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing -@@ -2422,6 +2448,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, +@@ -2423,6 +2449,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } @@ -14509,7 +13753,7 @@ index 67a680e4b484..9b9304c91dcc 100644 /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. -@@ -2566,6 +2601,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, +@@ -2567,6 +2602,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } @@ -14524,7 +13768,7 @@ index 67a680e4b484..9b9304c91dcc 100644 nr[lru] = scan; } } -@@ -2585,6 +2628,96 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, +@@ -2586,6 +2629,96 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } @@ -14621,7 +13865,7 @@ index 67a680e4b484..9b9304c91dcc 100644 #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED -@@ -4535,6 +4668,12 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw +@@ -4539,6 +4672,12 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw */ if (!swappiness) type = LRU_GEN_FILE; @@ -14634,7 +13878,7 @@ index 67a680e4b484..9b9304c91dcc 100644 else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) type = LRU_GEN_ANON; else if (swappiness == 1) -@@ -4814,6 +4953,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) +@@ -4829,6 +4968,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -14643,7 +13887,7 @@ index 67a680e4b484..9b9304c91dcc 100644 /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; -@@ -5961,6 +6102,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) +@@ -5977,6 +6118,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); @@ -14653,7 +13897,7 @@ index 67a680e4b484..9b9304c91dcc 100644 flush_reclaim_state(sc); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c -index fe7947f77406..99e138cfdd95 100644 +index 6872b5aff73e..1910fe1b2471 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -634,7 +634,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) @@ -14668,1649 +13912,805 @@ index fe7947f77406..99e138cfdd95 100644 -- 2.48.0.rc1 -From 4229b688e49e9265bfe5b86bb8b5babd82b73acf Mon Sep 17 00:00:00 2001 +From 7bc012030531a472b823293e167a86cd58da545c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 9 Jan 2025 16:37:17 +0100 -Subject: [PATCH 07/13] crypto +Date: Mon, 20 Jan 2025 13:22:05 +0100 +Subject: [PATCH 05/12] crypto Signed-off-by: Peter Jung --- - arch/x86/crypto/Kconfig | 4 +- - arch/x86/crypto/aegis128-aesni-asm.S | 533 ++++++++-------------- - arch/x86/crypto/aegis128-aesni-glue.c | 145 +++--- - arch/x86/crypto/crc32c-intel_glue.c | 2 +- - arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 354 +++++--------- - 5 files changed, 387 insertions(+), 651 deletions(-) + arch/x86/crypto/aes-gcm-avx10-x86_64.S | 119 ++++----- + arch/x86/crypto/aes-xts-avx-x86_64.S | 329 +++++++++++++------------ + arch/x86/crypto/aesni-intel_glue.c | 10 +- + 3 files changed, 221 insertions(+), 237 deletions(-) -diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig -index 7b1bebed879d..3d2e38ba5240 100644 ---- a/arch/x86/crypto/Kconfig -+++ b/arch/x86/crypto/Kconfig -@@ -363,7 +363,7 @@ config CRYPTO_CHACHA20_X86_64 - - AVX-512VL (Advanced Vector Extensions-512VL) +diff --git a/arch/x86/crypto/aes-gcm-avx10-x86_64.S b/arch/x86/crypto/aes-gcm-avx10-x86_64.S +index 97e0ee515fc5..02ee11083d4f 100644 +--- a/arch/x86/crypto/aes-gcm-avx10-x86_64.S ++++ b/arch/x86/crypto/aes-gcm-avx10-x86_64.S +@@ -88,7 +88,7 @@ - config CRYPTO_AEGIS128_AESNI_SSE2 -- tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)" -+ tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE4.1)" - depends on X86 && 64BIT - select CRYPTO_AEAD - select CRYPTO_SIMD -@@ -372,7 +372,7 @@ config CRYPTO_AEGIS128_AESNI_SSE2 + // A shuffle mask that reflects the bytes of 16-byte blocks + .Lbswap_mask: +- .octa 0x000102030405060708090a0b0c0d0e0f ++ .octa 0x000102030405060708090a0b0c0d0e0f - Architecture: x86_64 using: - - AES-NI (AES New Instructions) -- - SSE2 (Streaming SIMD Extensions 2) -+ - SSE4.1 (Streaming SIMD Extensions 4.1) + // This is the GHASH reducing polynomial without its constant term, i.e. + // x^128 + x^7 + x^2 + x, represented using the backwards mapping +@@ -384,8 +384,8 @@ + vpshufd $0xd3, H_CUR_XMM, %xmm0 + vpsrad $31, %xmm0, %xmm0 + vpaddq H_CUR_XMM, H_CUR_XMM, H_CUR_XMM +- vpand .Lgfpoly_and_internal_carrybit(%rip), %xmm0, %xmm0 +- vpxor %xmm0, H_CUR_XMM, H_CUR_XMM ++ // H_CUR_XMM ^= xmm0 & gfpoly_and_internal_carrybit ++ vpternlogd $0x78, .Lgfpoly_and_internal_carrybit(%rip), %xmm0, H_CUR_XMM - config CRYPTO_NHPOLY1305_SSE2 - tristate "Hash functions: NHPoly1305 (SSE2)" -diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S -index 2de859173940..7294dc0ee7ba 100644 ---- a/arch/x86/crypto/aegis128-aesni-asm.S -+++ b/arch/x86/crypto/aegis128-aesni-asm.S -@@ -1,14 +1,13 @@ - /* SPDX-License-Identifier: GPL-2.0-only */ - /* -- * AES-NI + SSE2 implementation of AEGIS-128 -+ * AES-NI + SSE4.1 implementation of AEGIS-128 - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. -+ * Copyright 2024 Google LLC - */ - - #include --#include --#include - - #define STATE0 %xmm0 - #define STATE1 %xmm1 -@@ -20,11 +19,6 @@ - #define T0 %xmm6 - #define T1 %xmm7 - --#define STATEP %rdi --#define LEN %esi --#define SRC %rdx --#define DST %rcx -- - .section .rodata.cst16.aegis128_const, "aM", @progbits, 32 - .align 16 - .Laegis128_const_0: -@@ -34,11 +28,11 @@ - .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 - .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd - --.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 --.align 16 --.Laegis128_counter: -- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 -- .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f -+.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32 -+.align 32 -+.Lzeropad_mask: -+ .octa 0xffffffffffffffffffffffffffffffff -+ .octa 0 - - .text - -@@ -61,140 +55,102 @@ + // Load the gfpoly constant. + vbroadcasti32x4 .Lgfpoly(%rip), GFPOLY +@@ -562,6 +562,32 @@ + vpxord RNDKEY0, V3, V3 .endm - /* -- * __load_partial: internal ABI -- * input: -- * LEN - bytes -- * SRC - src -- * output: -- * MSG - message block -- * changed: -- * T0 -- * %r8 -- * %r9 -+ * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register -+ * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8. - */ --SYM_FUNC_START_LOCAL(__load_partial) -- xor %r9d, %r9d -- pxor MSG, MSG -- -- mov LEN, %r8d -- and $0x1, %r8 -- jz .Lld_partial_1 -- -- mov LEN, %r8d -- and $0x1E, %r8 -- add SRC, %r8 -- mov (%r8), %r9b -- --.Lld_partial_1: -- mov LEN, %r8d -- and $0x2, %r8 -- jz .Lld_partial_2 -- -- mov LEN, %r8d -- and $0x1C, %r8 -- add SRC, %r8 -- shl $0x10, %r9 -- mov (%r8), %r9w -- --.Lld_partial_2: -- mov LEN, %r8d -- and $0x4, %r8 -- jz .Lld_partial_4 -- -- mov LEN, %r8d -- and $0x18, %r8 -- add SRC, %r8 -- shl $32, %r9 -- mov (%r8), %r8d -- xor %r8, %r9 -- --.Lld_partial_4: -- movq %r9, MSG -- -- mov LEN, %r8d -- and $0x8, %r8 -- jz .Lld_partial_8 -- -- mov LEN, %r8d -- and $0x10, %r8 -- add SRC, %r8 -- pslldq $8, MSG -- movq (%r8), T0 -- pxor T0, MSG -- --.Lld_partial_8: -- RET --SYM_FUNC_END(__load_partial) -+.macro load_partial -+ sub $8, %ecx /* LEN - 8 */ -+ jle .Lle8\@ ++// Do the last AES round for four vectors of counter blocks V0-V3, XOR source ++// data with the resulting keystream, and write the result to DST and ++// GHASHDATA[0-3]. (Implementation differs slightly, but has the same effect.) ++.macro _aesenclast_and_xor_4x ++ // XOR the source data with the last round key, saving the result in ++ // GHASHDATA[0-3]. This reduces latency by taking advantage of the ++ // property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). ++ vpxord 0*VL(SRC), RNDKEYLAST, GHASHDATA0 ++ vpxord 1*VL(SRC), RNDKEYLAST, GHASHDATA1 ++ vpxord 2*VL(SRC), RNDKEYLAST, GHASHDATA2 ++ vpxord 3*VL(SRC), RNDKEYLAST, GHASHDATA3 + -+ /* Load 9 <= LEN <= 15 bytes: */ -+ movq (SRC), MSG /* Load first 8 bytes */ -+ mov (SRC, %rcx), %rax /* Load last 8 bytes */ -+ neg %ecx -+ shl $3, %ecx -+ shr %cl, %rax /* Discard overlapping bytes */ -+ pinsrq $1, %rax, MSG -+ jmp .Ldone\@ ++ // Do the last AES round. This handles the XOR with the source data ++ // too, as per the optimization described above. ++ vaesenclast GHASHDATA0, V0, GHASHDATA0 ++ vaesenclast GHASHDATA1, V1, GHASHDATA1 ++ vaesenclast GHASHDATA2, V2, GHASHDATA2 ++ vaesenclast GHASHDATA3, V3, GHASHDATA3 + -+.Lle8\@: -+ add $4, %ecx /* LEN - 4 */ -+ jl .Llt4\@ -+ -+ /* Load 4 <= LEN <= 8 bytes: */ -+ mov (SRC), %eax /* Load first 4 bytes */ -+ mov (SRC, %rcx), %r8d /* Load last 4 bytes */ -+ jmp .Lcombine\@ -+ -+.Llt4\@: -+ /* Load 1 <= LEN <= 3 bytes: */ -+ add $2, %ecx /* LEN - 2 */ -+ movzbl (SRC), %eax /* Load first byte */ -+ jl .Lmovq\@ -+ movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */ -+.Lcombine\@: -+ shl $3, %ecx -+ shl %cl, %r8 -+ or %r8, %rax /* Combine the two parts */ -+.Lmovq\@: -+ movq %rax, MSG -+.Ldone\@: ++ // Store the en/decrypted data to DST. ++ vmovdqu8 GHASHDATA0, 0*VL(DST) ++ vmovdqu8 GHASHDATA1, 1*VL(DST) ++ vmovdqu8 GHASHDATA2, 2*VL(DST) ++ vmovdqu8 GHASHDATA3, 3*VL(DST) +.endm - - /* -- * __store_partial: internal ABI -- * input: -- * LEN - bytes -- * DST - dst -- * output: -- * T0 - message block -- * changed: -- * %r8 -- * %r9 -- * %r10 -+ * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer -+ * DST. Clobbers %rax, %rcx, and %r8. - */ --SYM_FUNC_START_LOCAL(__store_partial) -- mov LEN, %r8d -- mov DST, %r9 -- -- movq T0, %r10 -- -- cmp $8, %r8 -- jl .Lst_partial_8 -- -- mov %r10, (%r9) -- psrldq $8, T0 -- movq T0, %r10 -- -- sub $8, %r8 -- add $8, %r9 -- --.Lst_partial_8: -- cmp $4, %r8 -- jl .Lst_partial_4 -- -- mov %r10d, (%r9) -- shr $32, %r10 -- -- sub $4, %r8 -- add $4, %r9 -- --.Lst_partial_4: -- cmp $2, %r8 -- jl .Lst_partial_2 -- -- mov %r10w, (%r9) -- shr $0x10, %r10 -- -- sub $2, %r8 -- add $2, %r9 -- --.Lst_partial_2: -- cmp $1, %r8 -- jl .Lst_partial_1 -- -- mov %r10b, (%r9) -- --.Lst_partial_1: -- RET --SYM_FUNC_END(__store_partial) -+.macro store_partial msg -+ sub $8, %ecx /* LEN - 8 */ -+ jl .Llt8\@ + -+ /* Store 8 <= LEN <= 15 bytes: */ -+ pextrq $1, \msg, %rax -+ mov %ecx, %r8d -+ shl $3, %ecx -+ ror %cl, %rax -+ mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */ -+ movq \msg, (DST) /* Store first 8 bytes */ -+ jmp .Ldone\@ -+ -+.Llt8\@: -+ add $4, %ecx /* LEN - 4 */ -+ jl .Llt4\@ -+ -+ /* Store 4 <= LEN <= 7 bytes: */ -+ pextrd $1, \msg, %eax -+ mov %ecx, %r8d -+ shl $3, %ecx -+ ror %cl, %eax -+ mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */ -+ movd \msg, (DST) /* Store first 4 bytes */ -+ jmp .Ldone\@ -+ -+.Llt4\@: -+ /* Store 1 <= LEN <= 3 bytes: */ -+ pextrb $0, \msg, 0(DST) -+ cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */ -+ jl .Ldone\@ -+ pextrb $1, \msg, 1(DST) -+ je .Ldone\@ -+ pextrb $2, \msg, 2(DST) -+.Ldone\@: -+.endm + // void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_avx10 *key, + // const u32 le_ctr[4], u8 ghash_acc[16], + // const u8 *src, u8 *dst, int datalen); +@@ -640,7 +666,7 @@ + // LE_CTR contains the next set of little-endian counter blocks. + .set LE_CTR, V12 - /* -- * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); -+ * void aegis128_aesni_init(struct aegis_state *state, -+ * const struct aegis_block *key, -+ * const u8 iv[AEGIS128_NONCE_SIZE]); - */ --SYM_FUNC_START(crypto_aegis128_aesni_init) -- FRAME_BEGIN -+SYM_FUNC_START(aegis128_aesni_init) -+ .set STATEP, %rdi -+ .set KEYP, %rsi -+ .set IVP, %rdx - - /* load IV: */ -- movdqu (%rdx), T1 -+ movdqu (IVP), T1 - - /* load key: */ -- movdqa (%rsi), KEY -+ movdqa (KEYP), KEY - pxor KEY, T1 - movdqa T1, STATE0 - movdqa KEY, STATE3 -@@ -224,20 +180,22 @@ SYM_FUNC_START(crypto_aegis128_aesni_init) - movdqu STATE2, 0x20(STATEP) - movdqu STATE3, 0x30(STATEP) - movdqu STATE4, 0x40(STATEP) +- // RNDKEY0, RNDKEYLAST, and RNDKEY_M[9-5] contain cached AES round keys, ++ // RNDKEY0, RNDKEYLAST, and RNDKEY_M[9-1] contain cached AES round keys, + // copied to all 128-bit lanes. RNDKEY0 is the zero-th round key, + // RNDKEYLAST the last, and RNDKEY_M\i the one \i-th from the last. + .set RNDKEY0, V13 +@@ -650,15 +676,10 @@ + .set RNDKEY_M7, V17 + .set RNDKEY_M6, V18 + .set RNDKEY_M5, V19 - -- FRAME_END - RET --SYM_FUNC_END(crypto_aegis128_aesni_init) -+SYM_FUNC_END(aegis128_aesni_init) - - /* -- * void crypto_aegis128_aesni_ad(void *state, unsigned int length, -- * const void *data); -+ * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, -+ * unsigned int len); -+ * -+ * len must be a multiple of 16. - */ --SYM_FUNC_START(crypto_aegis128_aesni_ad) -- FRAME_BEGIN -+SYM_FUNC_START(aegis128_aesni_ad) -+ .set STATEP, %rdi -+ .set SRC, %rsi -+ .set LEN, %edx - -- cmp $0x10, LEN -- jb .Lad_out -+ test LEN, LEN -+ jz .Lad_out - - /* load the state: */ - movdqu 0x00(STATEP), STATE0 -@@ -246,89 +204,40 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) - movdqu 0x30(STATEP), STATE3 - movdqu 0x40(STATEP), STATE4 - -- mov SRC, %r8 -- and $0xF, %r8 -- jnz .Lad_u_loop -- --.align 8 --.Lad_a_loop: -- movdqa 0x00(SRC), MSG -- aegis128_update -- pxor MSG, STATE4 -- sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_1 -- -- movdqa 0x10(SRC), MSG -- aegis128_update -- pxor MSG, STATE3 -- sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_2 -- -- movdqa 0x20(SRC), MSG -- aegis128_update -- pxor MSG, STATE2 -- sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_3 -- -- movdqa 0x30(SRC), MSG -- aegis128_update -- pxor MSG, STATE1 -- sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_4 -- -- movdqa 0x40(SRC), MSG -- aegis128_update -- pxor MSG, STATE0 -- sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_0 -- -- add $0x50, SRC -- jmp .Lad_a_loop -- - .align 8 --.Lad_u_loop: -+.Lad_loop: - movdqu 0x00(SRC), MSG - aegis128_update - pxor MSG, STATE4 - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_1 -+ jz .Lad_out_1 - - movdqu 0x10(SRC), MSG - aegis128_update - pxor MSG, STATE3 - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_2 -+ jz .Lad_out_2 - - movdqu 0x20(SRC), MSG - aegis128_update - pxor MSG, STATE2 - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_3 -+ jz .Lad_out_3 - - movdqu 0x30(SRC), MSG - aegis128_update - pxor MSG, STATE1 - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_4 -+ jz .Lad_out_4 - - movdqu 0x40(SRC), MSG - aegis128_update - pxor MSG, STATE0 - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lad_out_0 -+ jz .Lad_out_0 - - add $0x50, SRC -- jmp .Lad_u_loop -+ jmp .Lad_loop - - /* store the state: */ - .Lad_out_0: -@@ -337,7 +246,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) - movdqu STATE2, 0x20(STATEP) - movdqu STATE3, 0x30(STATEP) - movdqu STATE4, 0x40(STATEP) -- FRAME_END - RET - - .Lad_out_1: -@@ -346,7 +254,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) - movdqu STATE1, 0x20(STATEP) - movdqu STATE2, 0x30(STATEP) - movdqu STATE3, 0x40(STATEP) -- FRAME_END - RET - - .Lad_out_2: -@@ -355,7 +262,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) - movdqu STATE0, 0x20(STATEP) - movdqu STATE1, 0x30(STATEP) - movdqu STATE2, 0x40(STATEP) -- FRAME_END - RET - - .Lad_out_3: -@@ -364,7 +270,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) - movdqu STATE4, 0x20(STATEP) - movdqu STATE0, 0x30(STATEP) - movdqu STATE1, 0x40(STATEP) -- FRAME_END - RET - - .Lad_out_4: -@@ -373,41 +278,38 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) - movdqu STATE3, 0x20(STATEP) - movdqu STATE4, 0x30(STATEP) - movdqu STATE0, 0x40(STATEP) -- FRAME_END -- RET -- - .Lad_out: -- FRAME_END - RET --SYM_FUNC_END(crypto_aegis128_aesni_ad) -+SYM_FUNC_END(aegis128_aesni_ad) - --.macro encrypt_block a s0 s1 s2 s3 s4 i -- movdq\a (\i * 0x10)(SRC), MSG -+.macro encrypt_block s0 s1 s2 s3 s4 i -+ movdqu (\i * 0x10)(SRC), MSG - movdqa MSG, T0 - pxor \s1, T0 - pxor \s4, T0 - movdqa \s2, T1 - pand \s3, T1 - pxor T1, T0 -- movdq\a T0, (\i * 0x10)(DST) -+ movdqu T0, (\i * 0x10)(DST) - - aegis128_update - pxor MSG, \s4 - - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Lenc_out_\i -+ jz .Lenc_out_\i - .endm - - /* -- * void crypto_aegis128_aesni_enc(void *state, unsigned int length, -- * const void *src, void *dst); -+ * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst, -+ * unsigned int len); -+ * -+ * len must be nonzero and a multiple of 16. - */ --SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) -- FRAME_BEGIN -- -- cmp $0x10, LEN -- jb .Lenc_out -+SYM_FUNC_START(aegis128_aesni_enc) -+ .set STATEP, %rdi -+ .set SRC, %rsi -+ .set DST, %rdx -+ .set LEN, %ecx - - /* load the state: */ - movdqu 0x00(STATEP), STATE0 -@@ -416,34 +318,17 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) - movdqu 0x30(STATEP), STATE3 - movdqu 0x40(STATEP), STATE4 - -- mov SRC, %r8 -- or DST, %r8 -- and $0xF, %r8 -- jnz .Lenc_u_loop -- - .align 8 --.Lenc_a_loop: -- encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 -- encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 -- encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 -- encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 -- encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 -+.Lenc_loop: -+ encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 -+ encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 -+ encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 -+ encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 -+ encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 - - add $0x50, SRC - add $0x50, DST -- jmp .Lenc_a_loop -- --.align 8 --.Lenc_u_loop: -- encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 -- encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 -- encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 -- encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 -- encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 -- -- add $0x50, SRC -- add $0x50, DST -- jmp .Lenc_u_loop -+ jmp .Lenc_loop - - /* store the state: */ - .Lenc_out_0: -@@ -452,7 +337,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) - movdqu STATE1, 0x20(STATEP) - movdqu STATE2, 0x30(STATEP) - movdqu STATE3, 0x40(STATEP) -- FRAME_END - RET - - .Lenc_out_1: -@@ -461,7 +345,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) - movdqu STATE0, 0x20(STATEP) - movdqu STATE1, 0x30(STATEP) - movdqu STATE2, 0x40(STATEP) -- FRAME_END - RET - - .Lenc_out_2: -@@ -470,7 +353,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) - movdqu STATE4, 0x20(STATEP) - movdqu STATE0, 0x30(STATEP) - movdqu STATE1, 0x40(STATEP) -- FRAME_END - RET - - .Lenc_out_3: -@@ -479,7 +361,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) - movdqu STATE3, 0x20(STATEP) - movdqu STATE4, 0x30(STATEP) - movdqu STATE0, 0x40(STATEP) -- FRAME_END - RET - - .Lenc_out_4: -@@ -488,20 +369,19 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) - movdqu STATE2, 0x20(STATEP) - movdqu STATE3, 0x30(STATEP) - movdqu STATE4, 0x40(STATEP) -- FRAME_END -- RET -- - .Lenc_out: -- FRAME_END - RET --SYM_FUNC_END(crypto_aegis128_aesni_enc) -+SYM_FUNC_END(aegis128_aesni_enc) - - /* -- * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, -- * const void *src, void *dst); -+ * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src, -+ * u8 *dst, unsigned int len); - */ --SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) -- FRAME_BEGIN -+SYM_FUNC_START(aegis128_aesni_enc_tail) -+ .set STATEP, %rdi -+ .set SRC, %rsi -+ .set DST, %rdx -+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ - - /* load the state: */ - movdqu 0x00(STATEP), STATE0 -@@ -511,7 +391,8 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) - movdqu 0x40(STATEP), STATE4 - - /* encrypt message: */ -- call __load_partial -+ mov LEN, %r9d -+ load_partial - - movdqa MSG, T0 - pxor STATE1, T0 -@@ -520,7 +401,8 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) - pand STATE3, T1 - pxor T1, T0 - -- call __store_partial -+ mov %r9d, LEN -+ store_partial T0 - - aegis128_update - pxor MSG, STATE4 -@@ -531,37 +413,36 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) - movdqu STATE1, 0x20(STATEP) - movdqu STATE2, 0x30(STATEP) - movdqu STATE3, 0x40(STATEP) -- -- FRAME_END - RET --SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) -+SYM_FUNC_END(aegis128_aesni_enc_tail) - --.macro decrypt_block a s0 s1 s2 s3 s4 i -- movdq\a (\i * 0x10)(SRC), MSG -+.macro decrypt_block s0 s1 s2 s3 s4 i -+ movdqu (\i * 0x10)(SRC), MSG - pxor \s1, MSG - pxor \s4, MSG - movdqa \s2, T1 - pand \s3, T1 - pxor T1, MSG -- movdq\a MSG, (\i * 0x10)(DST) -+ movdqu MSG, (\i * 0x10)(DST) - - aegis128_update - pxor MSG, \s4 - - sub $0x10, LEN -- cmp $0x10, LEN -- jl .Ldec_out_\i -+ jz .Ldec_out_\i - .endm - - /* -- * void crypto_aegis128_aesni_dec(void *state, unsigned int length, -- * const void *src, void *dst); -+ * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst, -+ * unsigned int len); -+ * -+ * len must be nonzero and a multiple of 16. - */ --SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) -- FRAME_BEGIN -- -- cmp $0x10, LEN -- jb .Ldec_out -+SYM_FUNC_START(aegis128_aesni_dec) -+ .set STATEP, %rdi -+ .set SRC, %rsi -+ .set DST, %rdx -+ .set LEN, %ecx - - /* load the state: */ - movdqu 0x00(STATEP), STATE0 -@@ -570,34 +451,17 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) - movdqu 0x30(STATEP), STATE3 - movdqu 0x40(STATEP), STATE4 - -- mov SRC, %r8 -- or DST, %r8 -- and $0xF, %r8 -- jnz .Ldec_u_loop -- - .align 8 --.Ldec_a_loop: -- decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 -- decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 -- decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 -- decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 -- decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 -+.Ldec_loop: -+ decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 -+ decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 -+ decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 -+ decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 -+ decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 - - add $0x50, SRC - add $0x50, DST -- jmp .Ldec_a_loop -- --.align 8 --.Ldec_u_loop: -- decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 -- decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 -- decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 -- decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 -- decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 -- -- add $0x50, SRC -- add $0x50, DST -- jmp .Ldec_u_loop -+ jmp .Ldec_loop - - /* store the state: */ - .Ldec_out_0: -@@ -606,7 +470,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) - movdqu STATE1, 0x20(STATEP) - movdqu STATE2, 0x30(STATEP) - movdqu STATE3, 0x40(STATEP) -- FRAME_END - RET - - .Ldec_out_1: -@@ -615,7 +478,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) - movdqu STATE0, 0x20(STATEP) - movdqu STATE1, 0x30(STATEP) - movdqu STATE2, 0x40(STATEP) -- FRAME_END - RET - - .Ldec_out_2: -@@ -624,7 +486,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) - movdqu STATE4, 0x20(STATEP) - movdqu STATE0, 0x30(STATEP) - movdqu STATE1, 0x40(STATEP) -- FRAME_END - RET - - .Ldec_out_3: -@@ -633,7 +494,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) - movdqu STATE3, 0x20(STATEP) - movdqu STATE4, 0x30(STATEP) - movdqu STATE0, 0x40(STATEP) -- FRAME_END - RET - - .Ldec_out_4: -@@ -642,20 +502,19 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) - movdqu STATE2, 0x20(STATEP) - movdqu STATE3, 0x30(STATEP) - movdqu STATE4, 0x40(STATEP) -- FRAME_END -- RET -- - .Ldec_out: -- FRAME_END - RET --SYM_FUNC_END(crypto_aegis128_aesni_dec) -+SYM_FUNC_END(aegis128_aesni_dec) - - /* -- * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, -- * const void *src, void *dst); -+ * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src, -+ * u8 *dst, unsigned int len); - */ --SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) -- FRAME_BEGIN -+SYM_FUNC_START(aegis128_aesni_dec_tail) -+ .set STATEP, %rdi -+ .set SRC, %rsi -+ .set DST, %rdx -+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ - - /* load the state: */ - movdqu 0x00(STATEP), STATE0 -@@ -665,7 +524,8 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) - movdqu 0x40(STATEP), STATE4 - - /* decrypt message: */ -- call __load_partial -+ mov LEN, %r9d -+ load_partial - - pxor STATE1, MSG - pxor STATE4, MSG -@@ -673,17 +533,13 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) - pand STATE3, T1 - pxor T1, MSG - -- movdqa MSG, T0 -- call __store_partial -+ mov %r9d, LEN -+ store_partial MSG - - /* mask with byte count: */ -- movd LEN, T0 -- punpcklbw T0, T0 -- punpcklbw T0, T0 -- punpcklbw T0, T0 -- punpcklbw T0, T0 -- movdqa .Laegis128_counter(%rip), T1 -- pcmpgtb T1, T0 -+ lea .Lzeropad_mask+16(%rip), %rax -+ sub %r9, %rax -+ movdqu (%rax), T0 - pand T0, MSG - - aegis128_update -@@ -695,18 +551,19 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) - movdqu STATE1, 0x20(STATEP) - movdqu STATE2, 0x30(STATEP) - movdqu STATE3, 0x40(STATEP) -- -- FRAME_END - RET --SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) -+SYM_FUNC_END(aegis128_aesni_dec_tail) - - /* -- * void crypto_aegis128_aesni_final(void *state, void *tag_xor, -- * unsigned int assoclen, -- * unsigned int cryptlen); -+ * void aegis128_aesni_final(struct aegis_state *state, -+ * struct aegis_block *tag_xor, -+ * unsigned int assoclen, unsigned int cryptlen); - */ --SYM_FUNC_START(crypto_aegis128_aesni_final) -- FRAME_BEGIN -+SYM_FUNC_START(aegis128_aesni_final) -+ .set STATEP, %rdi -+ .set TAG_XOR, %rsi -+ .set ASSOCLEN, %edx -+ .set CRYPTLEN, %ecx - - /* load the state: */ - movdqu 0x00(STATEP), STATE0 -@@ -716,10 +573,8 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) - movdqu 0x40(STATEP), STATE4 - - /* prepare length block: */ -- movd %edx, MSG -- movd %ecx, T0 -- pslldq $8, T0 -- pxor T0, MSG -+ movd ASSOCLEN, MSG -+ pinsrd $2, CRYPTLEN, MSG - psllq $3, MSG /* multiply by 8 (to get bit count) */ - - pxor STATE3, MSG -@@ -734,7 +589,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) - aegis128_update; pxor MSG, STATE3 - - /* xor tag: */ -- movdqu (%rsi), MSG -+ movdqu (TAG_XOR), MSG - - pxor STATE0, MSG - pxor STATE1, MSG -@@ -742,8 +597,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) - pxor STATE3, MSG - pxor STATE4, MSG - -- movdqu MSG, (%rsi) -- -- FRAME_END -+ movdqu MSG, (TAG_XOR) - RET --SYM_FUNC_END(crypto_aegis128_aesni_final) -+SYM_FUNC_END(aegis128_aesni_final) -diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c -index 4623189000d8..c19d8e3d96a3 100644 ---- a/arch/x86/crypto/aegis128-aesni-glue.c -+++ b/arch/x86/crypto/aegis128-aesni-glue.c -@@ -1,7 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0-or-later - /* - * The AEGIS-128 Authenticated-Encryption Algorithm -- * Glue for AES-NI + SSE2 implementation -+ * Glue for AES-NI + SSE4.1 implementation - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. -@@ -23,27 +23,6 @@ - #define AEGIS128_MIN_AUTH_SIZE 8 - #define AEGIS128_MAX_AUTH_SIZE 16 - --asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv); -- --asmlinkage void crypto_aegis128_aesni_ad( -- void *state, unsigned int length, const void *data); -- --asmlinkage void crypto_aegis128_aesni_enc( -- void *state, unsigned int length, const void *src, void *dst); -- --asmlinkage void crypto_aegis128_aesni_dec( -- void *state, unsigned int length, const void *src, void *dst); -- --asmlinkage void crypto_aegis128_aesni_enc_tail( -- void *state, unsigned int length, const void *src, void *dst); -- --asmlinkage void crypto_aegis128_aesni_dec_tail( -- void *state, unsigned int length, const void *src, void *dst); -- --asmlinkage void crypto_aegis128_aesni_final( -- void *state, void *tag_xor, unsigned int cryptlen, -- unsigned int assoclen); -- - struct aegis_block { - u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN); - }; -@@ -56,15 +35,31 @@ struct aegis_ctx { - struct aegis_block key; - }; - --struct aegis_crypt_ops { -- int (*skcipher_walk_init)(struct skcipher_walk *walk, -- struct aead_request *req, bool atomic); -+asmlinkage void aegis128_aesni_init(struct aegis_state *state, -+ const struct aegis_block *key, -+ const u8 iv[AEGIS128_NONCE_SIZE]); - -- void (*crypt_blocks)(void *state, unsigned int length, const void *src, -- void *dst); -- void (*crypt_tail)(void *state, unsigned int length, const void *src, -- void *dst); --}; -+asmlinkage void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, -+ unsigned int len); -+ -+asmlinkage void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, -+ u8 *dst, unsigned int len); -+ -+asmlinkage void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, -+ u8 *dst, unsigned int len); -+ -+asmlinkage void aegis128_aesni_enc_tail(struct aegis_state *state, -+ const u8 *src, u8 *dst, -+ unsigned int len); -+ -+asmlinkage void aegis128_aesni_dec_tail(struct aegis_state *state, -+ const u8 *src, u8 *dst, -+ unsigned int len); -+ -+asmlinkage void aegis128_aesni_final(struct aegis_state *state, -+ struct aegis_block *tag_xor, -+ unsigned int assoclen, -+ unsigned int cryptlen); - - static void crypto_aegis128_aesni_process_ad( - struct aegis_state *state, struct scatterlist *sg_src, -@@ -85,16 +80,15 @@ static void crypto_aegis128_aesni_process_ad( - if (pos > 0) { - unsigned int fill = AEGIS128_BLOCK_SIZE - pos; - memcpy(buf.bytes + pos, src, fill); -- crypto_aegis128_aesni_ad(state, -- AEGIS128_BLOCK_SIZE, -- buf.bytes); -+ aegis128_aesni_ad(state, buf.bytes, -+ AEGIS128_BLOCK_SIZE); - pos = 0; - left -= fill; - src += fill; - } - -- crypto_aegis128_aesni_ad(state, left, src); -- -+ aegis128_aesni_ad(state, src, -+ left & ~(AEGIS128_BLOCK_SIZE - 1)); - src += left & ~(AEGIS128_BLOCK_SIZE - 1); - left &= AEGIS128_BLOCK_SIZE - 1; - } -@@ -110,24 +104,37 @@ static void crypto_aegis128_aesni_process_ad( - - if (pos > 0) { - memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); -- crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); -+ aegis128_aesni_ad(state, buf.bytes, AEGIS128_BLOCK_SIZE); - } - } - --static void crypto_aegis128_aesni_process_crypt( -- struct aegis_state *state, struct skcipher_walk *walk, -- const struct aegis_crypt_ops *ops) -+static __always_inline void -+crypto_aegis128_aesni_process_crypt(struct aegis_state *state, -+ struct skcipher_walk *walk, bool enc) - { - while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { -- ops->crypt_blocks(state, -- round_down(walk->nbytes, AEGIS128_BLOCK_SIZE), -- walk->src.virt.addr, walk->dst.virt.addr); -+ if (enc) -+ aegis128_aesni_enc(state, walk->src.virt.addr, -+ walk->dst.virt.addr, -+ round_down(walk->nbytes, -+ AEGIS128_BLOCK_SIZE)); -+ else -+ aegis128_aesni_dec(state, walk->src.virt.addr, -+ walk->dst.virt.addr, -+ round_down(walk->nbytes, -+ AEGIS128_BLOCK_SIZE)); - skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); - } - - if (walk->nbytes) { -- ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, -- walk->dst.virt.addr); -+ if (enc) -+ aegis128_aesni_enc_tail(state, walk->src.virt.addr, -+ walk->dst.virt.addr, -+ walk->nbytes); -+ else -+ aegis128_aesni_dec_tail(state, walk->src.virt.addr, -+ walk->dst.virt.addr, -+ walk->nbytes); - skcipher_walk_done(walk, 0); - } - } -@@ -162,42 +169,39 @@ static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm, - return 0; - } - --static void crypto_aegis128_aesni_crypt(struct aead_request *req, -- struct aegis_block *tag_xor, -- unsigned int cryptlen, -- const struct aegis_crypt_ops *ops) -+static __always_inline void -+crypto_aegis128_aesni_crypt(struct aead_request *req, -+ struct aegis_block *tag_xor, -+ unsigned int cryptlen, bool enc) - { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); - struct skcipher_walk walk; - struct aegis_state state; - -- ops->skcipher_walk_init(&walk, req, true); -+ if (enc) -+ skcipher_walk_aead_encrypt(&walk, req, true); -+ else -+ skcipher_walk_aead_decrypt(&walk, req, true); - - kernel_fpu_begin(); - -- crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); -+ aegis128_aesni_init(&state, &ctx->key, req->iv); - crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); -- crypto_aegis128_aesni_process_crypt(&state, &walk, ops); -- crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); -+ crypto_aegis128_aesni_process_crypt(&state, &walk, enc); -+ aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); - - kernel_fpu_end(); - } - - static int crypto_aegis128_aesni_encrypt(struct aead_request *req) - { -- static const struct aegis_crypt_ops OPS = { -- .skcipher_walk_init = skcipher_walk_aead_encrypt, -- .crypt_blocks = crypto_aegis128_aesni_enc, -- .crypt_tail = crypto_aegis128_aesni_enc_tail, -- }; -- - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_block tag = {}; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen; - -- crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); -+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true); - - scatterwalk_map_and_copy(tag.bytes, req->dst, - req->assoclen + cryptlen, authsize, 1); -@@ -208,12 +212,6 @@ static int crypto_aegis128_aesni_decrypt(struct aead_request *req) - { - static const struct aegis_block zeros = {}; - -- static const struct aegis_crypt_ops OPS = { -- .skcipher_walk_init = skcipher_walk_aead_decrypt, -- .crypt_blocks = crypto_aegis128_aesni_dec, -- .crypt_tail = crypto_aegis128_aesni_dec_tail, -- }; -- - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_block tag; - unsigned int authsize = crypto_aead_authsize(tfm); -@@ -222,27 +220,16 @@ static int crypto_aegis128_aesni_decrypt(struct aead_request *req) - scatterwalk_map_and_copy(tag.bytes, req->src, - req->assoclen + cryptlen, authsize, 0); - -- crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); -+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false); - - return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; - } - --static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead) --{ -- return 0; --} -- --static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) --{ --} -- - static struct aead_alg crypto_aegis128_aesni_alg = { - .setkey = crypto_aegis128_aesni_setkey, - .setauthsize = crypto_aegis128_aesni_setauthsize, - .encrypt = crypto_aegis128_aesni_encrypt, - .decrypt = crypto_aegis128_aesni_decrypt, -- .init = crypto_aegis128_aesni_init_tfm, -- .exit = crypto_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, -@@ -267,7 +254,7 @@ static struct simd_aead_alg *simd_alg; - - static int __init crypto_aegis128_aesni_module_init(void) - { -- if (!boot_cpu_has(X86_FEATURE_XMM2) || -+ if (!boot_cpu_has(X86_FEATURE_XMM4_1) || - !boot_cpu_has(X86_FEATURE_AES) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) - return -ENODEV; -@@ -286,6 +273,6 @@ module_exit(crypto_aegis128_aesni_module_exit); - - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Ondrej Mosnacek "); --MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation"); -+MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE4.1 implementation"); - MODULE_ALIAS_CRYPTO("aegis128"); - MODULE_ALIAS_CRYPTO("aegis128-aesni"); -diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c -index feccb5254c7e..52c5d47ef5a1 100644 ---- a/arch/x86/crypto/crc32c-intel_glue.c -+++ b/arch/x86/crypto/crc32c-intel_glue.c -@@ -41,7 +41,7 @@ - */ - #define CRC32C_PCL_BREAKEVEN 512 - --asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, -+asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len, - unsigned int crc_init); - #endif /* CONFIG_X86_64 */ - -diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S -index bbcff1fb78cb..752812bc4991 100644 ---- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S -+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S -@@ -7,6 +7,7 @@ - * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf - * - * Copyright (C) 2012 Intel Corporation. -+ * Copyright 2024 Google LLC - * - * Authors: - * Wajdi Feghali -@@ -44,185 +45,129 @@ - */ - - #include --#include - - ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction - --.macro LABEL prefix n --.L\prefix\n\(): --.endm -- --.macro JMPTBL_ENTRY i --.quad .Lcrc_\i --.endm -- --.macro JNC_LESS_THAN j -- jnc .Lless_than_\j --.endm -- --# Define threshold where buffers are considered "small" and routed to more --# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so --# SMALL_SIZE can be no larger than 255. -- -+# Define threshold below which buffers are considered "small" and routed to -+# regular CRC code that does not interleave the CRC instructions. - #define SMALL_SIZE 200 - --.if (SMALL_SIZE > 255) --.error "SMALL_ SIZE must be < 256" +- // RNDKEYLAST[0-3] temporarily store the last AES round key XOR'd with +- // the corresponding block of source data. This is useful because +- // vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a), and key ^ b can +- // be computed in parallel with the AES rounds. +- .set RNDKEYLAST0, V20 +- .set RNDKEYLAST1, V21 +- .set RNDKEYLAST2, V22 +- .set RNDKEYLAST3, V23 ++ .set RNDKEY_M4, V20 ++ .set RNDKEY_M3, V21 ++ .set RNDKEY_M2, V22 ++ .set RNDKEY_M1, V23 + + // GHASHTMP[0-2] are temporary variables used by _ghash_step_4x. These + // cannot coincide with anything used for AES encryption, since for +@@ -713,7 +734,7 @@ + // Pre-subtracting 4*VL from DATALEN saves an instruction from the main + // loop and also ensures that at least one write always occurs to + // DATALEN, zero-extending it and allowing DATALEN64 to be used later. +- sub $4*VL, DATALEN ++ add $-4*VL, DATALEN // shorter than 'sub 4*VL' when VL=32 + jl .Lcrypt_loop_4x_done\@ + + // Load powers of the hash key. +@@ -748,26 +769,15 @@ + add $16, %rax + cmp %rax, RNDKEYLAST_PTR + jne 1b +- vpxord 0*VL(SRC), RNDKEYLAST, RNDKEYLAST0 +- vpxord 1*VL(SRC), RNDKEYLAST, RNDKEYLAST1 +- vpxord 2*VL(SRC), RNDKEYLAST, RNDKEYLAST2 +- vpxord 3*VL(SRC), RNDKEYLAST, RNDKEYLAST3 +- vaesenclast RNDKEYLAST0, V0, GHASHDATA0 +- vaesenclast RNDKEYLAST1, V1, GHASHDATA1 +- vaesenclast RNDKEYLAST2, V2, GHASHDATA2 +- vaesenclast RNDKEYLAST3, V3, GHASHDATA3 +- vmovdqu8 GHASHDATA0, 0*VL(DST) +- vmovdqu8 GHASHDATA1, 1*VL(DST) +- vmovdqu8 GHASHDATA2, 2*VL(DST) +- vmovdqu8 GHASHDATA3, 3*VL(DST) +- add $4*VL, SRC +- add $4*VL, DST +- sub $4*VL, DATALEN ++ _aesenclast_and_xor_4x ++ sub $-4*VL, SRC // shorter than 'add 4*VL' when VL=32 ++ sub $-4*VL, DST ++ add $-4*VL, DATALEN + jl .Lghash_last_ciphertext_4x\@ + .endif + + // Cache as many additional AES round keys as possible. +-.irp i, 9,8,7,6,5 ++.irp i, 9,8,7,6,5,4,3,2,1 + vbroadcasti32x4 -\i*16(RNDKEYLAST_PTR), RNDKEY_M\i + .endr + +@@ -799,50 +809,17 @@ + _vaesenc_4x RNDKEY + 128: + +- // XOR the source data with the last round key, saving the result in +- // RNDKEYLAST[0-3]. This reduces latency by taking advantage of the +- // property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). +-.if \enc +- vpxord 0*VL(SRC), RNDKEYLAST, RNDKEYLAST0 +- vpxord 1*VL(SRC), RNDKEYLAST, RNDKEYLAST1 +- vpxord 2*VL(SRC), RNDKEYLAST, RNDKEYLAST2 +- vpxord 3*VL(SRC), RNDKEYLAST, RNDKEYLAST3 +-.else +- vpxord GHASHDATA0, RNDKEYLAST, RNDKEYLAST0 +- vpxord GHASHDATA1, RNDKEYLAST, RNDKEYLAST1 +- vpxord GHASHDATA2, RNDKEYLAST, RNDKEYLAST2 +- vpxord GHASHDATA3, RNDKEYLAST, RNDKEYLAST3 -.endif - --# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); -+# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init); + // Finish the AES encryption of the counter blocks in V0-V3, interleaved + // with the GHASH update of the ciphertext blocks in GHASHDATA[0-3]. +-.irp i, 9,8,7,6,5 ++.irp i, 9,8,7,6,5,4,3,2,1 ++ _ghash_step_4x (9 - \i) + _vaesenc_4x RNDKEY_M\i +- _ghash_step_4x (9 - \i) +-.endr +-.irp i, 4,3,2,1 +- vbroadcasti32x4 -\i*16(RNDKEYLAST_PTR), RNDKEY +- _vaesenc_4x RNDKEY +- _ghash_step_4x (9 - \i) + .endr + _ghash_step_4x 9 +- +- // Do the last AES round. This handles the XOR with the source data +- // too, as per the optimization described above. +- vaesenclast RNDKEYLAST0, V0, GHASHDATA0 +- vaesenclast RNDKEYLAST1, V1, GHASHDATA1 +- vaesenclast RNDKEYLAST2, V2, GHASHDATA2 +- vaesenclast RNDKEYLAST3, V3, GHASHDATA3 +- +- // Store the en/decrypted data to DST. +- vmovdqu8 GHASHDATA0, 0*VL(DST) +- vmovdqu8 GHASHDATA1, 1*VL(DST) +- vmovdqu8 GHASHDATA2, 2*VL(DST) +- vmovdqu8 GHASHDATA3, 3*VL(DST) +- +- add $4*VL, SRC +- add $4*VL, DST +- sub $4*VL, DATALEN ++ _aesenclast_and_xor_4x ++ sub $-4*VL, SRC // shorter than 'add 4*VL' when VL=32 ++ sub $-4*VL, DST ++ add $-4*VL, DATALEN + jge .Lcrypt_loop_4x\@ + .if \enc +@@ -856,7 +833,7 @@ + .Lcrypt_loop_4x_done\@: + + // Undo the extra subtraction by 4*VL and check whether data remains. +- add $4*VL, DATALEN ++ sub $-4*VL, DATALEN // shorter than 'add 4*VL' when VL=32 + jz .Ldone\@ + + // The data length isn't a multiple of 4*VL. Process the remaining data +@@ -940,7 +917,7 @@ + // GHASH. However, any such blocks are all-zeroes, and the values that + // they're multiplied with are also all-zeroes. Therefore they just add + // 0 * 0 = 0 to the final GHASH result, which makes no difference. +- vmovdqu8 (POWERS_PTR), H_POW1 ++ vmovdqu8 (POWERS_PTR), H_POW1 + .if \enc + vmovdqu8 V0, V1{%k1}{z} + .endif +diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S +index 48f97b79f7a9..8a3e23fbcf85 100644 +--- a/arch/x86/crypto/aes-xts-avx-x86_64.S ++++ b/arch/x86/crypto/aes-xts-avx-x86_64.S +@@ -80,22 +80,6 @@ + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 .text - SYM_FUNC_START(crc_pcl) --#define bufp rdi --#define bufp_dw %edi --#define bufp_w %di --#define bufp_b %dil --#define bufptmp %rcx --#define block_0 %rcx --#define block_1 %rdx --#define block_2 %r11 --#define len %rsi --#define len_dw %esi --#define len_w %si --#define len_b %sil --#define crc_init_arg %rdx --#define tmp %rbx --#define crc_init %r8 --#define crc_init_dw %r8d --#define crc1 %r9 --#define crc2 %r10 + +-// Function parameters +-.set KEY, %rdi // Initially points to crypto_aes_ctx, then is +- // advanced to point to 7th-from-last round key +-.set SRC, %rsi // Pointer to next source data +-.set DST, %rdx // Pointer to next destination data +-.set LEN, %ecx // Remaining length in bytes +-.set LEN8, %cl +-.set LEN64, %rcx +-.set TWEAK, %r8 // Pointer to next tweak - -- pushq %rbx -- pushq %rdi -- pushq %rsi +-// %rax holds the AES key length in bytes. +-.set KEYLEN, %eax +-.set KEYLEN64, %rax - -- ## Move crc_init for Linux to a different -- mov crc_init_arg, crc_init -+#define bufp %rdi -+#define bufp_d %edi -+#define len %esi -+#define crc_init %edx -+#define crc_init_q %rdx -+#define n_misaligned %ecx /* overlaps chunk_bytes! */ -+#define n_misaligned_q %rcx -+#define chunk_bytes %ecx /* overlaps n_misaligned! */ -+#define chunk_bytes_q %rcx -+#define crc1 %r8 -+#define crc2 %r9 +-// %r9-r11 are available as temporaries. +- + .macro _define_Vi i + .if VL == 16 + .set V\i, %xmm\i +@@ -112,41 +96,31 @@ + // Define register aliases V0-V15, or V0-V31 if all 32 SIMD registers + // are available, that map to the xmm, ymm, or zmm registers according + // to the selected Vector Length (VL). +- _define_Vi 0 +- _define_Vi 1 +- _define_Vi 2 +- _define_Vi 3 +- _define_Vi 4 +- _define_Vi 5 +- _define_Vi 6 +- _define_Vi 7 +- _define_Vi 8 +- _define_Vi 9 +- _define_Vi 10 +- _define_Vi 11 +- _define_Vi 12 +- _define_Vi 13 +- _define_Vi 14 +- _define_Vi 15 ++.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 ++ _define_Vi \i ++.endr + .if USE_AVX10 +- _define_Vi 16 +- _define_Vi 17 +- _define_Vi 18 +- _define_Vi 19 +- _define_Vi 20 +- _define_Vi 21 +- _define_Vi 22 +- _define_Vi 23 +- _define_Vi 24 +- _define_Vi 25 +- _define_Vi 26 +- _define_Vi 27 +- _define_Vi 28 +- _define_Vi 29 +- _define_Vi 30 +- _define_Vi 31 ++.irp i, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 ++ _define_Vi \i ++.endr + .endif + ++ // Function parameters ++ .set KEY, %rdi // Initially points to crypto_aes_ctx, then is ++ // advanced to point to 7th-from-last round key ++ .set SRC, %rsi // Pointer to next source data ++ .set DST, %rdx // Pointer to next destination data ++ .set LEN, %ecx // Remaining length in bytes ++ .set LEN8, %cl ++ .set LEN64, %rcx ++ .set TWEAK, %r8 // Pointer to next tweak + -+ cmp $SMALL_SIZE, len -+ jb .Lsmall - - ################################################################ - ## 1) ALIGN: - ################################################################ -- -- mov %bufp, bufptmp # rdi = *buf -- neg %bufp -- and $7, %bufp # calculate the unalignment amount of -+ mov bufp_d, n_misaligned -+ neg n_misaligned -+ and $7, n_misaligned # calculate the misalignment amount of - # the address -- je .Lproc_block # Skip if aligned -- -- ## If len is less than 8 and we're unaligned, we need to jump -- ## to special code to avoid reading beyond the end of the buffer -- cmp $8, len -- jae .Ldo_align -- # less_than_8 expects length in upper 3 bits of len_dw -- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] -- shl $32-3+1, len_dw -- jmp .Lless_than_8_post_shl1 -+ je .Laligned # Skip if aligned - -+ # Process 1 <= n_misaligned <= 7 bytes individually in order to align -+ # the remaining data to an 8-byte boundary. - .Ldo_align: -- #### Calculate CRC of unaligned bytes of the buffer (if any) -- movq (bufptmp), tmp # load a quadward from the buffer -- add %bufp, bufptmp # align buffer pointer for quadword -- # processing -- sub %bufp, len # update buffer length -+ movq (bufp), %rax -+ add n_misaligned_q, bufp -+ sub n_misaligned, len - .Lalign_loop: -- crc32b %bl, crc_init_dw # compute crc32 of 1-byte -- shr $8, tmp # get next byte -- dec %bufp -+ crc32b %al, crc_init # compute crc32 of 1-byte -+ shr $8, %rax # get next byte -+ dec n_misaligned - jne .Lalign_loop -- --.Lproc_block: -+.Laligned: - - ################################################################ -- ## 2) PROCESS BLOCKS: -+ ## 2) PROCESS BLOCK: - ################################################################ - -- ## compute num of bytes to be processed -- movq len, tmp # save num bytes in tmp -- -- cmpq $128*24, len -+ cmp $128*24, len - jae .Lfull_block - --.Lcontinue_block: -- cmpq $SMALL_SIZE, len -- jb .Lsmall -- -- ## len < 128*24 -- movq $2731, %rax # 2731 = ceil(2^16 / 24) -- mul len_dw -- shrq $16, %rax -- -- ## eax contains floor(bytes / 24) = num 24-byte chunks to do -- -- ## process rax 24-byte chunks (128 >= rax >= 0) -- -- ## compute end address of each block -- ## block 0 (base addr + RAX * 8) -- ## block 1 (base addr + RAX * 16) -- ## block 2 (base addr + RAX * 24) -- lea (bufptmp, %rax, 8), block_0 -- lea (block_0, %rax, 8), block_1 -- lea (block_1, %rax, 8), block_2 -+.Lpartial_block: -+ # Compute floor(len / 24) to get num qwords to process from each lane. -+ imul $2731, len, %eax # 2731 = ceil(2^16 / 24) -+ shr $16, %eax -+ jmp .Lcrc_3lanes - -- xor crc1, crc1 -- xor crc2, crc2 -- -- ## branch into array -- leaq jump_table(%rip), %bufp -- mov (%bufp,%rax,8), %bufp -- JMP_NOSPEC bufp -- -- ################################################################ -- ## 2a) PROCESS FULL BLOCKS: -- ################################################################ - .Lfull_block: -- movl $128,%eax -- lea 128*8*2(block_0), block_1 -- lea 128*8*3(block_0), block_2 -- add $128*8*1, block_0 -- -- xor crc1,crc1 -- xor crc2,crc2 -- -- # Fall through into top of crc array (crc_128) -+ # Processing 128 qwords from each lane. -+ mov $128, %eax - - ################################################################ -- ## 3) CRC Array: -+ ## 3) CRC each of three lanes: - ################################################################ - -- i=128 --.rept 128-1 --.altmacro --LABEL crc_ %i --.noaltmacro -- ENDBR -- crc32q -i*8(block_0), crc_init -- crc32q -i*8(block_1), crc1 -- crc32q -i*8(block_2), crc2 -- i=(i-1) --.endr -- --.altmacro --LABEL crc_ %i --.noaltmacro -- ENDBR -- crc32q -i*8(block_0), crc_init -- crc32q -i*8(block_1), crc1 --# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet -- -- mov block_2, block_0 -+.Lcrc_3lanes: -+ xor crc1,crc1 -+ xor crc2,crc2 -+ mov %eax, chunk_bytes -+ shl $3, chunk_bytes # num bytes to process from each lane -+ sub $5, %eax # 4 for 4x_loop, 1 for special last iter -+ jl .Lcrc_3lanes_4x_done ++ // %rax holds the AES key length in bytes. ++ .set KEYLEN, %eax ++ .set KEYLEN64, %rax + -+ # Unroll the loop by a factor of 4 to reduce the overhead of the loop -+ # bookkeeping instructions, which can compete with crc32q for the ALUs. -+.Lcrc_3lanes_4x_loop: -+ crc32q (bufp), crc_init_q -+ crc32q (bufp,chunk_bytes_q), crc1 -+ crc32q (bufp,chunk_bytes_q,2), crc2 -+ crc32q 8(bufp), crc_init_q -+ crc32q 8(bufp,chunk_bytes_q), crc1 -+ crc32q 8(bufp,chunk_bytes_q,2), crc2 -+ crc32q 16(bufp), crc_init_q -+ crc32q 16(bufp,chunk_bytes_q), crc1 -+ crc32q 16(bufp,chunk_bytes_q,2), crc2 -+ crc32q 24(bufp), crc_init_q -+ crc32q 24(bufp,chunk_bytes_q), crc1 -+ crc32q 24(bufp,chunk_bytes_q,2), crc2 -+ add $32, bufp -+ sub $4, %eax -+ jge .Lcrc_3lanes_4x_loop ++ // %r9-r11 are available as temporaries. + -+.Lcrc_3lanes_4x_done: -+ add $4, %eax -+ jz .Lcrc_3lanes_last_qword + // V0-V3 hold the data blocks during the main loop, or temporary values + // otherwise. V4-V5 hold temporary values. + +@@ -214,6 +188,7 @@ + .endm + + // Move a vector between memory and a register. ++// The register operand must be in the first 16 vector registers. + .macro _vmovdqu src, dst + .if VL < 64 + vmovdqu \src, \dst +@@ -234,11 +209,12 @@ + .endm + + // XOR two vectors together. ++// Any register operands must be in the first 16 vector registers. + .macro _vpxor src1, src2, dst +-.if USE_AVX10 +- vpxord \src1, \src2, \dst +-.else ++.if VL < 64 + vpxor \src1, \src2, \dst ++.else ++ vpxord \src1, \src2, \dst + .endif + .endm + +@@ -259,8 +235,12 @@ + vpshufd $0x13, \src, \tmp + vpaddq \src, \src, \dst + vpsrad $31, \tmp, \tmp ++.if USE_AVX10 ++ vpternlogd $0x78, GF_POLY_XMM, \tmp, \dst ++.else + vpand GF_POLY_XMM, \tmp, \tmp + vpxor \tmp, \dst, \dst ++.endif + .endm + + // Given the XTS tweak(s) in the vector \src, compute the next vector of +@@ -369,9 +349,14 @@ + + // Do one step in computing the next set of tweaks using the VPCLMULQDQ method + // (the same method _next_tweakvec uses for VL > 16). This means multiplying +-// each tweak by x^(4*VL/16) independently. Since 4*VL/16 is a multiple of 8 +-// when VL > 16 (which it is here), the needed shift amounts are byte-aligned, +-// which allows the use of vpsrldq and vpslldq to do 128-bit wide shifts. ++// each tweak by x^(4*VL/16) independently. ++// ++// Since 4*VL/16 is a multiple of 8 when VL > 16 (which it is here), the needed ++// shift amounts are byte-aligned, which allows the use of vpsrldq and vpslldq ++// to do 128-bit wide shifts. The 128-bit left shift (vpslldq) saves ++// instructions directly. The 128-bit right shift (vpsrldq) performs better ++// than a 64-bit right shift on Intel CPUs in the context where it is used here, ++// because it runs on a different execution port from the AES instructions. + .macro _tweak_step_pclmul i + .if \i == 0 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK0, NEXT_TWEAK0 +@@ -406,7 +391,7 @@ + // \i that include at least 0 through 19, then 1000 which signals the last step. + // + // This is used to interleave the computation of the next set of tweaks with the +-// AES en/decryptions, which increases performance in some cases. ++// AES en/decryptions, which increases performance in some cases. Clobbers V5. + .macro _tweak_step i + .if VL == 16 + _tweak_step_mulx \i +@@ -443,9 +428,10 @@ + // the last round needs different instructions. + // + // An alternative approach would be to roll up all the round loops. We +- // don't do that because it isn't compatible with caching the round keys +- // in registers which we do when possible (see below), and also because +- // it seems unwise to rely *too* heavily on the CPU's branch predictor. ++ // don't do that because (a) it isn't compatible with caching the round ++ // keys in registers which we do when possible (see below), (b) we ++ // interleave the AES rounds with the XTS tweak computation, and (c) it ++ // seems unwise to rely *too* heavily on the CPU's branch predictor. + lea OFFS-16(KEY, KEYLEN64, 4), KEY + + // If all 32 SIMD registers are available, cache all the round keys. +@@ -472,90 +458,94 @@ + .endif + .endm + +-// Do a single round of AES encryption (if \enc==1) or decryption (if \enc==0) +-// on the block(s) in \data using the round key(s) in \key. The register length +-// determines the number of AES blocks en/decrypted. +-.macro _vaes enc, last, key, data ++// Do a single non-last round of AES encryption (if \enc==1) or decryption (if ++// \enc==0) on the block(s) in \data using the round key(s) in \key. The ++// register length determines the number of AES blocks en/decrypted. ++.macro _vaes enc, key, data + .if \enc +-.if \last +- vaesenclast \key, \data, \data +-.else + vaesenc \key, \data, \data +-.endif +-.else +-.if \last +- vaesdeclast \key, \data, \data + .else + vaesdec \key, \data, \data + .endif ++.endm + -+.Lcrc_3lanes_1x_loop: -+ crc32q (bufp), crc_init_q -+ crc32q (bufp,chunk_bytes_q), crc1 -+ crc32q (bufp,chunk_bytes_q,2), crc2 -+ add $8, bufp -+ dec %eax -+ jnz .Lcrc_3lanes_1x_loop ++// Same as _vaes, but does the last round. ++.macro _vaeslast enc, key, data ++.if \enc ++ vaesenclast \key, \data, \data ++.else ++ vaesdeclast \key, \data, \data + .endif + .endm + +-// Do a single round of AES en/decryption on the block(s) in \data, using the +-// same key for all block(s). The round key is loaded from the appropriate +-// register or memory location for round \i. May clobber V4. +-.macro _vaes_1x enc, last, i, xmm_suffix, data ++// Do a single non-last round of AES en/decryption on the block(s) in \data, ++// using the same key for all block(s). The round key is loaded from the ++// appropriate register or memory location for round \i. May clobber \tmp. ++.macro _vaes_1x enc, i, xmm_suffix, data, tmp + .if USE_AVX10 +- _vaes \enc, \last, KEY\i\xmm_suffix, \data ++ _vaes \enc, KEY\i\xmm_suffix, \data + .else + .ifnb \xmm_suffix +- _vaes \enc, \last, (\i-7)*16(KEY), \data ++ _vaes \enc, (\i-7)*16(KEY), \data + .else +- _vbroadcast128 (\i-7)*16(KEY), V4 +- _vaes \enc, \last, V4, \data ++ _vbroadcast128 (\i-7)*16(KEY), \tmp ++ _vaes \enc, \tmp, \data + .endif + .endif + .endm + +-// Do a single round of AES en/decryption on the blocks in registers V0-V3, +-// using the same key for all blocks. The round key is loaded from the ++// Do a single non-last round of AES en/decryption on the blocks in registers ++// V0-V3, using the same key for all blocks. The round key is loaded from the + // appropriate register or memory location for round \i. In addition, does two +-// steps of the computation of the next set of tweaks. May clobber V4. +-.macro _vaes_4x enc, last, i ++// steps of the computation of the next set of tweaks. May clobber V4 and V5. ++.macro _vaes_4x enc, i + .if USE_AVX10 + _tweak_step (2*(\i-5)) +- _vaes \enc, \last, KEY\i, V0 +- _vaes \enc, \last, KEY\i, V1 ++ _vaes \enc, KEY\i, V0 ++ _vaes \enc, KEY\i, V1 + _tweak_step (2*(\i-5) + 1) +- _vaes \enc, \last, KEY\i, V2 +- _vaes \enc, \last, KEY\i, V3 ++ _vaes \enc, KEY\i, V2 ++ _vaes \enc, KEY\i, V3 + .else + _vbroadcast128 (\i-7)*16(KEY), V4 + _tweak_step (2*(\i-5)) +- _vaes \enc, \last, V4, V0 +- _vaes \enc, \last, V4, V1 ++ _vaes \enc, V4, V0 ++ _vaes \enc, V4, V1 + _tweak_step (2*(\i-5) + 1) +- _vaes \enc, \last, V4, V2 +- _vaes \enc, \last, V4, V3 ++ _vaes \enc, V4, V2 ++ _vaes \enc, V4, V3 + .endif + .endm + + // Do tweaked AES en/decryption (i.e., XOR with \tweak, then AES en/decrypt, + // then XOR with \tweak again) of the block(s) in \data. To process a single + // block, use xmm registers and set \xmm_suffix=_XMM. To process a vector of +-// length VL, use V* registers and leave \xmm_suffix empty. May clobber V4. +-.macro _aes_crypt enc, xmm_suffix, tweak, data ++// length VL, use V* registers and leave \xmm_suffix empty. Clobbers \tmp. ++.macro _aes_crypt enc, xmm_suffix, tweak, data, tmp + _xor3 KEY0\xmm_suffix, \tweak, \data + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ +- _vaes_1x \enc, 0, 1, \xmm_suffix, \data +- _vaes_1x \enc, 0, 2, \xmm_suffix, \data ++ _vaes_1x \enc, 1, \xmm_suffix, \data, tmp=\tmp ++ _vaes_1x \enc, 2, \xmm_suffix, \data, tmp=\tmp + .Laes192\@: +- _vaes_1x \enc, 0, 3, \xmm_suffix, \data +- _vaes_1x \enc, 0, 4, \xmm_suffix, \data ++ _vaes_1x \enc, 3, \xmm_suffix, \data, tmp=\tmp ++ _vaes_1x \enc, 4, \xmm_suffix, \data, tmp=\tmp + .Laes128\@: +- _vaes_1x \enc, 0, 5, \xmm_suffix, \data +- _vaes_1x \enc, 0, 6, \xmm_suffix, \data +- _vaes_1x \enc, 0, 7, \xmm_suffix, \data +- _vaes_1x \enc, 0, 8, \xmm_suffix, \data +- _vaes_1x \enc, 0, 9, \xmm_suffix, \data +- _vaes_1x \enc, 0, 10, \xmm_suffix, \data +- _vaes_1x \enc, 0, 11, \xmm_suffix, \data +- _vaes_1x \enc, 0, 12, \xmm_suffix, \data +- _vaes_1x \enc, 0, 13, \xmm_suffix, \data +- _vaes_1x \enc, 1, 14, \xmm_suffix, \data +- _vpxor \tweak, \data, \data ++.irp i, 5,6,7,8,9,10,11,12,13 ++ _vaes_1x \enc, \i, \xmm_suffix, \data, tmp=\tmp ++.endr ++.if USE_AVX10 ++ vpxord KEY14\xmm_suffix, \tweak, \tmp ++.else ++.ifnb \xmm_suffix ++ vpxor 7*16(KEY), \tweak, \tmp ++.else ++ _vbroadcast128 7*16(KEY), \tmp ++ vpxor \tweak, \tmp, \tmp ++.endif ++.endif ++ _vaeslast \enc, \tmp, \data + .endm + + .macro _aes_xts_crypt enc +@@ -581,7 +571,7 @@ + // Compute the first set of tweaks TWEAK[0-3]. + _compute_first_set_of_tweaks + +- sub $4*VL, LEN ++ add $-4*VL, LEN // shorter than 'sub 4*VL' when VL=32 + jl .Lhandle_remainder\@ + + .Lmain_loop\@: +@@ -589,10 +579,10 @@ + + // XOR each source block with its tweak and the zero-th round key. + .if USE_AVX10 +- vmovdqu8 0*VL(SRC), V0 +- vmovdqu8 1*VL(SRC), V1 +- vmovdqu8 2*VL(SRC), V2 +- vmovdqu8 3*VL(SRC), V3 ++ _vmovdqu 0*VL(SRC), V0 ++ _vmovdqu 1*VL(SRC), V1 ++ _vmovdqu 2*VL(SRC), V2 ++ _vmovdqu 3*VL(SRC), V3 + vpternlogd $0x96, TWEAK0, KEY0, V0 + vpternlogd $0x96, TWEAK1, KEY0, V1 + vpternlogd $0x96, TWEAK2, KEY0, V2 +@@ -612,28 +602,43 @@ + je .Laes192\@ + // Do all the AES rounds on the data blocks, interleaved with + // the computation of the next set of tweaks. +- _vaes_4x \enc, 0, 1 +- _vaes_4x \enc, 0, 2 ++ _vaes_4x \enc, 1 ++ _vaes_4x \enc, 2 + .Laes192\@: +- _vaes_4x \enc, 0, 3 +- _vaes_4x \enc, 0, 4 ++ _vaes_4x \enc, 3 ++ _vaes_4x \enc, 4 + .Laes128\@: +- _vaes_4x \enc, 0, 5 +- _vaes_4x \enc, 0, 6 +- _vaes_4x \enc, 0, 7 +- _vaes_4x \enc, 0, 8 +- _vaes_4x \enc, 0, 9 +- _vaes_4x \enc, 0, 10 +- _vaes_4x \enc, 0, 11 +- _vaes_4x \enc, 0, 12 +- _vaes_4x \enc, 0, 13 +- _vaes_4x \enc, 1, 14 +- +- // XOR in the tweaks again. +- _vpxor TWEAK0, V0, V0 +- _vpxor TWEAK1, V1, V1 +- _vpxor TWEAK2, V2, V2 +- _vpxor TWEAK3, V3, V3 ++.irp i, 5,6,7,8,9,10,11,12,13 ++ _vaes_4x \enc, \i ++.endr ++ // Do the last AES round, then XOR the results with the tweaks again. ++ // Reduce latency by doing the XOR before the vaesenclast, utilizing the ++ // property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a) ++ // (and likewise for vaesdeclast). ++.if USE_AVX10 ++ _tweak_step 18 ++ _tweak_step 19 ++ vpxord TWEAK0, KEY14, V4 ++ vpxord TWEAK1, KEY14, V5 ++ _vaeslast \enc, V4, V0 ++ _vaeslast \enc, V5, V1 ++ vpxord TWEAK2, KEY14, V4 ++ vpxord TWEAK3, KEY14, V5 ++ _vaeslast \enc, V4, V2 ++ _vaeslast \enc, V5, V3 ++.else ++ _vbroadcast128 7*16(KEY), V4 ++ _tweak_step 18 // uses V5 ++ _tweak_step 19 // uses V5 ++ vpxor TWEAK0, V4, V5 ++ _vaeslast \enc, V5, V0 ++ vpxor TWEAK1, V4, V5 ++ _vaeslast \enc, V5, V1 ++ vpxor TWEAK2, V4, V5 ++ vpxor TWEAK3, V4, V4 ++ _vaeslast \enc, V5, V2 ++ _vaeslast \enc, V4, V3 ++.endif + + // Store the destination blocks. + _vmovdqu V0, 0*VL(DST) +@@ -644,9 +649,9 @@ + // Finish computing the next set of tweaks. + _tweak_step 1000 + +- add $4*VL, SRC +- add $4*VL, DST +- sub $4*VL, LEN ++ sub $-4*VL, SRC // shorter than 'add 4*VL' when VL=32 ++ sub $-4*VL, DST ++ add $-4*VL, LEN + jge .Lmain_loop\@ + + // Check for the uncommon case where the data length isn't a multiple of +@@ -670,7 +675,7 @@ + jl .Lvec_at_a_time_done\@ + .Lvec_at_a_time\@: + _vmovdqu (SRC), V0 +- _aes_crypt \enc, , TWEAK0, V0 ++ _aes_crypt \enc, , TWEAK0, V0, tmp=V1 + _vmovdqu V0, (DST) + _next_tweakvec TWEAK0, V0, V1, TWEAK0 + add $VL, SRC +@@ -687,7 +692,7 @@ + jl .Lblock_at_a_time_done\@ + .Lblock_at_a_time\@: + vmovdqu (SRC), %xmm0 +- _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 ++ _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0, tmp=%xmm1 + vmovdqu %xmm0, (DST) + _next_tweak TWEAK0_XMM, %xmm0, TWEAK0_XMM + add $16, SRC +@@ -715,7 +720,7 @@ + // Do it now by advancing the tweak and decrypting the last full block. + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM + vmovdqu (SRC), %xmm0 +- _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0 ++ _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0, tmp=%xmm1 + .endif + + .if USE_AVX10 +@@ -758,47 +763,49 @@ + vpblendvb %xmm3, %xmm0, %xmm1, %xmm0 + .endif + // En/decrypt again and store the last full block. +- _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 ++ _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0, tmp=%xmm1 + vmovdqu %xmm0, (DST) + jmp .Ldone\@ + .endm + + // void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + // u8 iv[AES_BLOCK_SIZE]); ++// ++// Encrypt |iv| using the AES key |tweak_key| to get the first tweak. Assumes ++// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX10. + SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) +- vmovdqu (%rsi), %xmm0 +- vpxor (%rdi), %xmm0, %xmm0 +- movl 480(%rdi), %eax // AES key length +- lea -16(%rdi, %rax, 4), %rdi +- cmp $24, %eax ++ .set TWEAK_KEY, %rdi ++ .set IV, %rsi ++ .set KEYLEN, %eax ++ .set KEYLEN64, %rax + -+.Lcrc_3lanes_last_qword: -+ crc32q (bufp), crc_init_q -+ crc32q (bufp,chunk_bytes_q), crc1 -+# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet ++ vmovdqu (IV), %xmm0 ++ vpxor (TWEAK_KEY), %xmm0, %xmm0 ++ movl 480(TWEAK_KEY), KEYLEN ++ lea -16(TWEAK_KEY, KEYLEN64, 4), TWEAK_KEY ++ cmp $24, KEYLEN + jl .Lencrypt_iv_aes128 + je .Lencrypt_iv_aes192 +- vaesenc -6*16(%rdi), %xmm0, %xmm0 +- vaesenc -5*16(%rdi), %xmm0, %xmm0 ++ vaesenc -6*16(TWEAK_KEY), %xmm0, %xmm0 ++ vaesenc -5*16(TWEAK_KEY), %xmm0, %xmm0 + .Lencrypt_iv_aes192: +- vaesenc -4*16(%rdi), %xmm0, %xmm0 +- vaesenc -3*16(%rdi), %xmm0, %xmm0 ++ vaesenc -4*16(TWEAK_KEY), %xmm0, %xmm0 ++ vaesenc -3*16(TWEAK_KEY), %xmm0, %xmm0 + .Lencrypt_iv_aes128: +- vaesenc -2*16(%rdi), %xmm0, %xmm0 +- vaesenc -1*16(%rdi), %xmm0, %xmm0 +- vaesenc 0*16(%rdi), %xmm0, %xmm0 +- vaesenc 1*16(%rdi), %xmm0, %xmm0 +- vaesenc 2*16(%rdi), %xmm0, %xmm0 +- vaesenc 3*16(%rdi), %xmm0, %xmm0 +- vaesenc 4*16(%rdi), %xmm0, %xmm0 +- vaesenc 5*16(%rdi), %xmm0, %xmm0 +- vaesenc 6*16(%rdi), %xmm0, %xmm0 +- vaesenclast 7*16(%rdi), %xmm0, %xmm0 +- vmovdqu %xmm0, (%rsi) ++.irp i, -2,-1,0,1,2,3,4,5,6 ++ vaesenc \i*16(TWEAK_KEY), %xmm0, %xmm0 ++.endr ++ vaesenclast 7*16(TWEAK_KEY), %xmm0, %xmm0 ++ vmovdqu %xmm0, (IV) + RET + SYM_FUNC_END(aes_xts_encrypt_iv) - ################################################################ - ## 4) Combine three results: - ################################################################ + // Below are the actual AES-XTS encryption and decryption functions, + // instantiated from the above macro. They all have the following prototype: + // +-// void (*xts_asm_func)(const struct crypto_aes_ctx *key, +-// const u8 *src, u8 *dst, unsigned int len, +-// u8 tweak[AES_BLOCK_SIZE]); ++// void (*xts_crypt_func)(const struct crypto_aes_ctx *key, ++// const u8 *src, u8 *dst, int len, ++// u8 tweak[AES_BLOCK_SIZE]); + // + // |key| is the data key. |tweak| contains the next tweak; the encryption of + // the original IV with the tweak key was already done. This function supports +diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c +index fbf43482e1f5..11e95fc62636 100644 +--- a/arch/x86/crypto/aesni-intel_glue.c ++++ b/arch/x86/crypto/aesni-intel_glue.c +@@ -505,7 +505,7 @@ static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, + typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); + typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, +- const u8 *src, u8 *dst, unsigned int len, ++ const u8 *src, u8 *dst, int len, + u8 tweak[AES_BLOCK_SIZE]); -- lea (K_table-8)(%rip), %bufp # first entry is for idx 1 -- shlq $3, %rax # rax *= 8 -- pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2 -- leal (%eax,%eax,2), %eax # rax *= 3 (total *24) -- subq %rax, tmp # tmp -= rax*24 -+ lea (K_table-8)(%rip), %rax # first entry is for idx 1 -+ pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2 -+ lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3 -+ sub %eax, len # len -= chunk_bytes * 3 + /* This handles cases where the source and/or destination span pages. */ +@@ -624,14 +624,14 @@ static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + } -- movq crc_init, %xmm1 # CRC for block 1 -+ movq crc_init_q, %xmm1 # CRC for block 1 - pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2 + static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, +- const u8 *src, u8 *dst, unsigned int len, ++ const u8 *src, u8 *dst, int len, + u8 tweak[AES_BLOCK_SIZE]) + { + aesni_xts_enc(key, dst, src, len, tweak); + } - movq crc1, %xmm2 # CRC for block 2 -@@ -230,103 +175,54 @@ LABEL crc_ %i - - pxor %xmm2,%xmm1 - movq %xmm1, %rax -- xor -i*8(block_2), %rax -- mov crc2, crc_init -- crc32 %rax, crc_init -+ xor (bufp,chunk_bytes_q,2), %rax -+ mov crc2, crc_init_q -+ crc32 %rax, crc_init_q -+ lea 8(bufp,chunk_bytes_q,2), bufp - - ################################################################ -- ## 5) Check for end: -+ ## 5) If more blocks remain, goto (2): - ################################################################ - --LABEL crc_ 0 -- ENDBR -- mov tmp, len -- cmp $128*24, tmp -- jae .Lfull_block -- cmp $24, tmp -- jae .Lcontinue_block -- --.Lless_than_24: -- shl $32-4, len_dw # less_than_16 expects length -- # in upper 4 bits of len_dw -- jnc .Lless_than_16 -- crc32q (bufptmp), crc_init -- crc32q 8(bufptmp), crc_init -- jz .Ldo_return -- add $16, bufptmp -- # len is less than 8 if we got here -- # less_than_8 expects length in upper 3 bits of len_dw -- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] -- shl $2, len_dw -- jmp .Lless_than_8_post_shl1 -+ cmp $128*24, len -+ jae .Lfull_block -+ cmp $SMALL_SIZE, len -+ jae .Lpartial_block - - ####################################################################### -- ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) -+ ## 6) Process any remainder without interleaving: - ####################################################################### - .Lsmall: -- shl $32-8, len_dw # Prepare len_dw for less_than_256 -- j=256 --.rept 5 # j = {256, 128, 64, 32, 16} --.altmacro --LABEL less_than_ %j # less_than_j: Length should be in -- # upper lg(j) bits of len_dw -- j=(j/2) -- shl $1, len_dw # Get next MSB -- JNC_LESS_THAN %j --.noaltmacro -- i=0 --.rept (j/8) -- crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data -- i=i+8 --.endr -- jz .Ldo_return # Return if remaining length is zero -- add $j, bufptmp # Advance buf --.endr -- --.Lless_than_8: # Length should be stored in -- # upper 3 bits of len_dw -- shl $1, len_dw --.Lless_than_8_post_shl1: -- jnc .Lless_than_4 -- crc32l (bufptmp), crc_init_dw # CRC of 4 bytes -- jz .Ldo_return # return if remaining data is zero -- add $4, bufptmp --.Lless_than_4: # Length should be stored in -- # upper 2 bits of len_dw -- shl $1, len_dw -- jnc .Lless_than_2 -- crc32w (bufptmp), crc_init_dw # CRC of 2 bytes -- jz .Ldo_return # return if remaining data is zero -- add $2, bufptmp --.Lless_than_2: # Length should be stored in the MSB -- # of len_dw -- shl $1, len_dw -- jnc .Lless_than_1 -- crc32b (bufptmp), crc_init_dw # CRC of 1 byte --.Lless_than_1: # Length should be zero --.Ldo_return: -- movq crc_init, %rax -- popq %rsi -- popq %rdi -- popq %rbx -+ test len, len -+ jz .Ldone -+ mov len, %eax -+ shr $3, %eax -+ jz .Ldo_dword -+.Ldo_qwords: -+ crc32q (bufp), crc_init_q -+ add $8, bufp -+ dec %eax -+ jnz .Ldo_qwords -+.Ldo_dword: -+ test $4, len -+ jz .Ldo_word -+ crc32l (bufp), crc_init -+ add $4, bufp -+.Ldo_word: -+ test $2, len -+ jz .Ldo_byte -+ crc32w (bufp), crc_init -+ add $2, bufp -+.Ldo_byte: -+ test $1, len -+ jz .Ldone -+ crc32b (bufp), crc_init -+.Ldone: -+ mov crc_init, %eax - RET - SYM_FUNC_END(crc_pcl) - - .section .rodata, "a", @progbits -- ################################################################ -- ## jump table Table is 129 entries x 2 bytes each -- ################################################################ --.align 4 --jump_table: -- i=0 --.rept 129 --.altmacro --JMPTBL_ENTRY %i --.noaltmacro -- i=i+1 --.endr -- -- - ################################################################ - ## PCLMULQDQ tables - ## Table is 128 entries x 2 words (8 bytes) each + static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, +- const u8 *src, u8 *dst, unsigned int len, ++ const u8 *src, u8 *dst, int len, + u8 tweak[AES_BLOCK_SIZE]) + { + aesni_xts_dec(key, dst, src, len, tweak); +@@ -790,10 +790,10 @@ asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + \ + asmlinkage void \ + aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ +- u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ ++ u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \ + asmlinkage void \ + aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ +- u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ ++ u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \ + \ + static int xts_encrypt_##suffix(struct skcipher_request *req) \ + { \ -- 2.48.0.rc1 -From 2682c67e0617fac2dc28bbe1bb2514b8206d9388 Mon Sep 17 00:00:00 2001 +From 2f514dfe8b006e7fa976b6265bef4b8efb81ec11 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 9 Jan 2025 16:37:27 +0100 -Subject: [PATCH 08/13] fixes +Date: Mon, 20 Jan 2025 13:22:15 +0100 +Subject: [PATCH 06/12] fixes Signed-off-by: Peter Jung --- - arch/Kconfig | 4 +- - arch/x86/include/asm/futex.h | 8 ++- - arch/x86/mm/tlb.c | 2 +- - drivers/bluetooth/btmtk.c | 4 +- - drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 50 +++++++++++++++-- - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++- - drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 17 ++++++ - drivers/gpu/drm/drm_edid.c | 47 ++++++++++++++-- - drivers/hid/hid-ids.h | 1 + - fs/ntfs3/bitmap.c | 62 ++++++---------------- - fs/ntfs3/file.c | 32 ++++++----- - fs/ntfs3/frecord.c | 1 - - fs/ntfs3/fsntfs.c | 2 +- - fs/ntfs3/record.c | 16 ++++-- - fs/ntfs3/run.c | 6 +-- - kernel/futex/core.c | 22 -------- - kernel/futex/futex.h | 59 +++++++++++++++++++- - kernel/kprobes.c | 23 ++++---- - kernel/sched/ext.c | 7 +-- - kernel/workqueue.c | 22 ++++++-- - scripts/package/PKGBUILD | 5 ++ - sound/pci/hda/patch_realtek.c | 2 + - 23 files changed, 277 insertions(+), 122 deletions(-) + arch/Kconfig | 4 +- + .../link/protocols/link_edp_panel_control.c | 3 +- + drivers/gpu/drm/drm_edid.c | 47 +++++++++++++++++-- + drivers/hid/hid-asus.c | 26 ++++++++++ + drivers/hid/hid-ids.h | 1 + + include/linux/platform_data/x86/asus-wmi.h | 5 ++ + kernel/fork.c | 9 ++-- + kernel/kprobes.c | 23 +++++---- + kernel/sched/ext.c | 4 +- + scripts/package/PKGBUILD | 5 ++ + sound/pci/hda/patch_realtek.c | 4 +- + 11 files changed, 105 insertions(+), 26 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig -index 00551f340dbe..833b2344ce79 100644 +index 6682b2a53e34..fe54298ae05c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig -@@ -1128,7 +1128,7 @@ config ARCH_MMAP_RND_BITS +@@ -1137,7 +1137,7 @@ config ARCH_MMAP_RND_BITS int "Number of bits to use for ASLR of mmap base address" if EXPERT range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT @@ -16319,7 +14719,7 @@ index 00551f340dbe..833b2344ce79 100644 depends on HAVE_ARCH_MMAP_RND_BITS help This value can be used to select the number of bits to use to -@@ -1162,7 +1162,7 @@ config ARCH_MMAP_RND_COMPAT_BITS +@@ -1171,7 +1171,7 @@ config ARCH_MMAP_RND_COMPAT_BITS int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT @@ -16328,231 +14728,20 @@ index 00551f340dbe..833b2344ce79 100644 depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to -diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h -index 99d345b686fa..6e2458088800 100644 ---- a/arch/x86/include/asm/futex.h -+++ b/arch/x86/include/asm/futex.h -@@ -48,7 +48,9 @@ do { \ - static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) - { -- if (!user_access_begin(uaddr, sizeof(u32))) -+ if (can_do_masked_user_access()) -+ uaddr = masked_user_access_begin(uaddr); -+ else if (!user_access_begin(uaddr, sizeof(u32))) - return -EFAULT; - - switch (op) { -@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - { - int ret = 0; - -- if (!user_access_begin(uaddr, sizeof(u32))) -+ if (can_do_masked_user_access()) -+ uaddr = masked_user_access_begin(uaddr); -+ else if (!user_access_begin(uaddr, sizeof(u32))) - return -EFAULT; - asm volatile("\n" - "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 0ea9d1c077f6..0080175153ef 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -823,7 +823,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, - * mm_cpumask. The TLB shootdown code can figure out from - * cpu_tlbstate_shared.is_lazy whether or not to send an IPI. - */ -- if (WARN_ON_ONCE(prev != &init_mm && -+ if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm && - !cpumask_test_cpu(cpu, mm_cpumask(next)))) - cpumask_set_cpu(cpu, mm_cpumask(next)); - -diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c -index 85e99641eaae..c1b6bcc6f7dd 100644 ---- a/drivers/bluetooth/btmtk.c -+++ b/drivers/bluetooth/btmtk.c -@@ -1329,7 +1329,6 @@ int btmtk_usb_setup(struct hci_dev *hdev) - fwname = FIRMWARE_MT7668; - break; - case 0x7922: -- case 0x7961: - case 0x7925: - /* Reset the device to ensure it's in the initial state before - * downloading the firmware to ensure. -@@ -1337,7 +1336,8 @@ int btmtk_usb_setup(struct hci_dev *hdev) - - if (!test_bit(BTMTK_FIRMWARE_LOADED, &btmtk_data->flags)) - btmtk_usb_subsys_reset(hdev, dev_id); -- -+ fallthrough; -+ case 0x7961: - btmtk_fw_get_filename(fw_bin_name, sizeof(fw_bin_name), dev_id, - fw_version, fw_flavor); - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -index 7617963901fa..03933b2c5ebc 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -@@ -855,6 +855,7 @@ struct amdgpu_device { - bool need_swiotlb; - bool accel_working; - struct notifier_block acpi_nb; -+ struct notifier_block pm_nb; - struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; - struct debugfs_blob_wrapper debugfs_vbios_blob; - struct debugfs_blob_wrapper debugfs_discovery_blob; -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c -index 45e28726e148..d77772e2dabf 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c -@@ -145,6 +145,8 @@ const char *amdgpu_asic_name[] = { - }; - - static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); -+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, -+ void *data); - - /** - * DOC: pcie_replay_count -@@ -4511,6 +4513,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, - - amdgpu_device_check_iommu_direct_map(adev); - -+ adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; -+ r = register_pm_notifier(&adev->pm_nb); -+ if (r) -+ goto failed; -+ - return 0; - - release_ras_con: -@@ -4575,6 +4582,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) - drain_workqueue(adev->mman.bdev.wq); - adev->shutdown = true; - -+ unregister_pm_notifier(&adev->pm_nb); -+ - /* make sure IB test finished before entering exclusive mode - * to avoid preemption on IB test - */ -@@ -4692,8 +4701,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) - { - int ret; - -- /* No need to evict vram on APUs for suspend to ram or s2idle */ -- if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) -+ /* No need to evict vram on APUs unless going to S4 */ -+ if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) - return 0; - - ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); -@@ -4705,6 +4714,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) - /* - * Suspend & resume. - */ -+/** -+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events -+ * @nb: notifier block -+ * @mode: suspend mode -+ * @data: data -+ * -+ * This function is called when the system is about to suspend or hibernate. -+ * It is used to evict resources from the device before the system goes to -+ * sleep while there is still access to swap. -+ */ -+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, -+ void *data) -+{ -+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); -+ int r; -+ -+ switch (mode) { -+ case PM_HIBERNATION_PREPARE: -+ adev->in_s4 = true; -+ fallthrough; -+ case PM_SUSPEND_PREPARE: -+ r = amdgpu_device_evict_resources(adev); -+ /* -+ * This is considered non-fatal at this time because -+ * amdgpu_device_prepare() will also fatally evict resources. -+ * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 -+ */ -+ if (r) -+ drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); -+ break; -+ } -+ -+ return NOTIFY_DONE; -+} -+ - /** - * amdgpu_device_prepare - prepare for device suspend - * -@@ -4744,7 +4788,7 @@ int amdgpu_device_prepare(struct drm_device *dev) - return 0; - - unprepare: -- adev->in_s0ix = adev->in_s3 = false; -+ adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; - - return r; - } -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index 852e6f315576..94a9a9266f8e 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -2639,7 +2639,6 @@ static int amdgpu_pmops_freeze(struct device *dev) - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - -- adev->in_s4 = true; - r = amdgpu_device_suspend(drm_dev, true); - adev->in_s4 = false; - if (r) -@@ -3078,6 +3077,11 @@ static int __init amdgpu_init(void) - /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ - amdgpu_amdkfd_init(); - -+ if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { -+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); -+ pr_crit("Overdrive is enabled, please disable it before reporting any bugs.\n"); -+ } -+ - /* let modprobe override vga console setting */ - return pci_register_driver(&amdgpu_kms_pci_driver); - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c -index 312dfa84f29f..a8abc3091801 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c -@@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) - { - uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; - uint32_t flags = pdd->process->dbg_flags; -+ struct amdgpu_device *adev = pdd->dev->adev; -+ int r; - - if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) - return 0; - -+ if (!pdd->proc_ctx_cpu_ptr) { -+ r = amdgpu_amdkfd_alloc_gtt_mem(adev, -+ AMDGPU_MES_PROC_CTX_SIZE, -+ &pdd->proc_ctx_bo, -+ &pdd->proc_ctx_gpu_addr, -+ &pdd->proc_ctx_cpu_ptr, -+ false); -+ if (r) { -+ dev_err(adev->dev, -+ "failed to allocate process context bo\n"); -+ return r; -+ } -+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); -+ } -+ - return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl, - pdd->watch_points, flags, sq_trap_en); - } +diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +index e0e3bb865359..ba98d56a0fe4 100644 +--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c ++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +@@ -187,7 +187,8 @@ bool edp_set_backlight_level_nits(struct dc_link *link, + (uint8_t *)(target_luminance), + sizeof(struct target_luminance_value)) != DC_OK) + return false; +- } else if (link->backlight_control_type == BACKLIGHT_CONTROL_AMD_AUX) { ++// } else if (link->backlight_control_type == BACKLIGHT_CONTROL_AMD_AUX) { ++ } else { + struct dpcd_source_backlight_set dpcd_backlight_set; + *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; + *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 855beafb76ff..ad78059ee954 100644 --- a/drivers/gpu/drm/drm_edid.c @@ -16646,8 +14835,52 @@ index 855beafb76ff..ad78059ee954 100644 if (!newmode) continue; +diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c +index 506c6f377e7d..46e3e42f9eb5 100644 +--- a/drivers/hid/hid-asus.c ++++ b/drivers/hid/hid-asus.c +@@ -432,6 +432,26 @@ static int asus_kbd_get_functions(struct hid_device *hdev, + return ret; + } + ++static int asus_kbd_disable_oobe(struct hid_device *hdev) ++{ ++ const u8 init[][6] = { ++ { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }, ++ { FEATURE_KBD_REPORT_ID, 0xBA, 0xC5, 0xC4 }, ++ { FEATURE_KBD_REPORT_ID, 0xD0, 0x8F, 0x01 }, ++ { FEATURE_KBD_REPORT_ID, 0xD0, 0x85, 0xFF } ++ }; ++ int ret; ++ ++ for (size_t i = 0; i < ARRAY_SIZE(init); i++) { ++ ret = asus_kbd_set_report(hdev, init[i], sizeof(init[i])); ++ if (ret < 0) ++ return ret; ++ } ++ ++ hid_info(hdev, "Disabled OOBE for keyboard\n"); ++ return 0; ++} ++ + static void asus_schedule_work(struct asus_kbd_leds *led) + { + unsigned long flags; +@@ -534,6 +554,12 @@ static int asus_kbd_register_leds(struct hid_device *hdev) + ret = asus_kbd_init(hdev, FEATURE_KBD_LED_REPORT_ID2); + if (ret < 0) + return ret; ++ ++ if (dmi_match(DMI_PRODUCT_FAMILY, "ProArt P16")) { ++ ret = asus_kbd_disable_oobe(hdev); ++ if (ret < 0) ++ return ret; ++ } + } else { + /* Initialize keyboard */ + ret = asus_kbd_init(hdev, FEATURE_KBD_REPORT_ID); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h -index 0f23be98c56e..1b92729bd378 100644 +index 1f47fda809b9..6c2df0d37b3b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ @@ -16658,467 +14891,49 @@ index 0f23be98c56e..1b92729bd378 100644 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b -diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c -index cf4fe21a5039..04107b950717 100644 ---- a/fs/ntfs3/bitmap.c -+++ b/fs/ntfs3/bitmap.c -@@ -710,20 +710,17 @@ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) - { - int err = 0; - struct super_block *sb = wnd->sb; -- size_t bits0 = bits; - u32 wbits = 8 * sb->s_blocksize; - size_t iw = bit >> (sb->s_blocksize_bits + 3); - u32 wbit = bit & (wbits - 1); - struct buffer_head *bh; -+ u32 op; +diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h +index 365e119bebaa..783e2a336861 100644 +--- a/include/linux/platform_data/x86/asus-wmi.h ++++ b/include/linux/platform_data/x86/asus-wmi.h +@@ -184,6 +184,11 @@ static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"), + }, + }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt P16"), ++ }, ++ }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA403U"), +diff --git a/kernel/fork.c b/kernel/fork.c +index 0cb5431b4d7e..e919c8c3a121 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1518,12 +1518,13 @@ struct file *get_task_exe_file(struct task_struct *task) + struct file *exe_file = NULL; + struct mm_struct *mm; -- while (iw < wnd->nwnd && bits) { -- u32 tail, op; -- -+ for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { - if (iw + 1 == wnd->nwnd) - wbits = wnd->bits_last; - -- tail = wbits - wbit; -- op = min_t(u32, tail, bits); -+ op = min_t(u32, wbits - wbit, bits); - - bh = wnd_map(wnd, iw); - if (IS_ERR(bh)) { -@@ -736,20 +733,15 @@ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) - ntfs_bitmap_clear_le(bh->b_data, wbit, op); - - wnd->free_bits[iw] += op; -+ wnd->total_zeroes += op; - - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - unlock_buffer(bh); - put_bh(bh); - -- wnd->total_zeroes += op; -- bits -= op; -- wbit = 0; -- iw += 1; -+ wnd_add_free_ext(wnd, bit, op, false); - } -- -- wnd_add_free_ext(wnd, bit, bits0, false); -- - return err; - } - -@@ -760,20 +752,17 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) - { - int err = 0; - struct super_block *sb = wnd->sb; -- size_t bits0 = bits; - size_t iw = bit >> (sb->s_blocksize_bits + 3); - u32 wbits = 8 * sb->s_blocksize; - u32 wbit = bit & (wbits - 1); - struct buffer_head *bh; -+ u32 op; - -- while (iw < wnd->nwnd && bits) { -- u32 tail, op; -- -+ for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { - if (unlikely(iw + 1 == wnd->nwnd)) - wbits = wnd->bits_last; - -- tail = wbits - wbit; -- op = min_t(u32, tail, bits); -+ op = min_t(u32, wbits - wbit, bits); - - bh = wnd_map(wnd, iw); - if (IS_ERR(bh)) { -@@ -785,21 +774,16 @@ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) - - ntfs_bitmap_set_le(bh->b_data, wbit, op); - wnd->free_bits[iw] -= op; -+ wnd->total_zeroes -= op; - - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - unlock_buffer(bh); - put_bh(bh); - -- wnd->total_zeroes -= op; -- bits -= op; -- wbit = 0; -- iw += 1; -+ if (!RB_EMPTY_ROOT(&wnd->start_tree)) -+ wnd_remove_free_ext(wnd, bit, op); - } -- -- if (!RB_EMPTY_ROOT(&wnd->start_tree)) -- wnd_remove_free_ext(wnd, bit, bits0); -- - return err; - } - -@@ -852,15 +836,13 @@ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) - size_t iw = bit >> (sb->s_blocksize_bits + 3); - u32 wbits = 8 * sb->s_blocksize; - u32 wbit = bit & (wbits - 1); -+ u32 op; - -- while (iw < wnd->nwnd && bits) { -- u32 tail, op; -- -+ for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { - if (unlikely(iw + 1 == wnd->nwnd)) - wbits = wnd->bits_last; - -- tail = wbits - wbit; -- op = min_t(u32, tail, bits); -+ op = min_t(u32, wbits - wbit, bits); - - if (wbits != wnd->free_bits[iw]) { - bool ret; -@@ -875,10 +857,6 @@ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) - if (!ret) - return false; - } -- -- bits -= op; -- wbit = 0; -- iw += 1; - } - - return true; -@@ -928,6 +906,7 @@ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) - size_t iw = bit >> (sb->s_blocksize_bits + 3); - u32 wbits = 8 * sb->s_blocksize; - u32 wbit = bit & (wbits - 1); -+ u32 op; - size_t end; - struct rb_node *n; - struct e_node *e; -@@ -945,14 +924,11 @@ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) - return false; - - use_wnd: -- while (iw < wnd->nwnd && bits) { -- u32 tail, op; -- -+ for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { - if (unlikely(iw + 1 == wnd->nwnd)) - wbits = wnd->bits_last; - -- tail = wbits - wbit; -- op = min_t(u32, tail, bits); -+ op = min_t(u32, wbits - wbit, bits); - - if (wnd->free_bits[iw]) { - bool ret; -@@ -966,10 +942,6 @@ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) - if (!ret) - goto out; - } -- -- bits -= op; -- wbit = 0; -- iw += 1; - } - ret = true; - -diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c -index f704ceef9539..3f96a11804c9 100644 ---- a/fs/ntfs3/file.c -+++ b/fs/ntfs3/file.c -@@ -182,13 +182,15 @@ static int ntfs_extend_initialized_size(struct file *file, - loff_t pos = valid; - int err; - -+ if (valid >= new_valid) -+ return 0; -+ - if (is_resident(ni)) { - ni->i_valid = new_valid; - return 0; - } - - WARN_ON(is_compressed(ni)); -- WARN_ON(valid >= new_valid); - - for (;;) { - u32 zerofrom, len; -@@ -987,6 +989,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - u64 frame_vbo; - pgoff_t index; - bool frame_uptodate; -+ struct folio *folio; - - if (frame_size < PAGE_SIZE) { - /* -@@ -1041,8 +1044,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - if (err) { - for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; -- unlock_page(page); -- put_page(page); -+ folio = page_folio(page); -+ folio_unlock(folio); -+ folio_put(folio); - } - goto out; - } -@@ -1052,9 +1056,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - off = offset_in_page(valid); - for (; ip < pages_per_frame; ip++, off = 0) { - page = pages[ip]; -+ folio = page_folio(page); - zero_user_segment(page, off, PAGE_SIZE); - flush_dcache_page(page); -- SetPageUptodate(page); -+ folio_mark_uptodate(folio); - } - - ni_lock(ni); -@@ -1063,9 +1068,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - - for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; -- SetPageUptodate(page); -- unlock_page(page); -- put_page(page); -+ folio = page_folio(page); -+ folio_mark_uptodate(folio); -+ folio_unlock(folio); -+ folio_put(folio); - } - - if (err) -@@ -1107,8 +1113,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - for (ip = 0; ip < pages_per_frame; - ip++) { - page = pages[ip]; -- unlock_page(page); -- put_page(page); -+ folio = page_folio(page); -+ folio_unlock(folio); -+ folio_put(folio); - } - goto out; - } -@@ -1149,9 +1156,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - for (ip = 0; ip < pages_per_frame; ip++) { - page = pages[ip]; - ClearPageDirty(page); -- SetPageUptodate(page); -- unlock_page(page); -- put_page(page); -+ folio = page_folio(page); -+ folio_mark_uptodate(folio); -+ folio_unlock(folio); -+ folio_put(folio); - } - - if (err) -diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c -index c33e818b3164..8b39d0ce5f28 100644 ---- a/fs/ntfs3/frecord.c -+++ b/fs/ntfs3/frecord.c -@@ -1958,7 +1958,6 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - if (end > alloc_size) - end = alloc_size; - -- - while (vbo < end) { - if (idx == -1) { - ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); -diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c -index 0fa636038b4e..03471bc9371c 100644 ---- a/fs/ntfs3/fsntfs.c -+++ b/fs/ntfs3/fsntfs.c -@@ -2699,4 +2699,4 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) - out: - __putname(uni); - return err; --} -\ No newline at end of file -+} -diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c -index f810f0419d25..61d53d39f3b9 100644 ---- a/fs/ntfs3/record.c -+++ b/fs/ntfs3/record.c -@@ -212,7 +212,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) - return NULL; - - if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 || -- !IS_ALIGNED(off, 4)) { -+ !IS_ALIGNED(off, 8)) { - return NULL; - } - -@@ -236,8 +236,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) - off += asize; - } - -- /* Can we use the first field (attr->type). */ -- /* NOTE: this code also checks attr->size availability. */ -+ /* -+ * Can we use the first fields: -+ * attr->type, -+ * attr->size -+ */ - if (off + 8 > used) { - static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8); - return NULL; -@@ -259,10 +262,17 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) - - asize = le32_to_cpu(attr->size); - -+ if (!IS_ALIGNED(asize, 8)) ++ if (task->flags & PF_KTHREAD) + return NULL; + - /* Check overflow and boundary. */ - if (off + asize < off || off + asize > used) - return NULL; - -+ /* Can we use the field attr->non_res. */ -+ if (off + 9 > used) -+ return NULL; -+ - /* Check size of attribute. */ - if (!attr->non_res) { - /* Check resident fields. */ -diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c -index 48566dff0dc9..6e86d66197ef 100644 ---- a/fs/ntfs3/run.c -+++ b/fs/ntfs3/run.c -@@ -1112,9 +1112,9 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, - err = wnd_set_used_safe(wnd, lcn, len, &done); - if (zone) { - /* Restore zone. Lock mft run. */ -- struct rw_semaphore *lock; -- lock = is_mounted(sbi) ? &sbi->mft.ni->file.run_lock : -- NULL; -+ struct rw_semaphore *lock = -+ is_mounted(sbi) ? &sbi->mft.ni->file.run_lock : -+ NULL; - if (lock) - down_read(lock); - ntfs_refresh_zone(sbi); -diff --git a/kernel/futex/core.c b/kernel/futex/core.c -index 136768ae2637..9107704a6574 100644 ---- a/kernel/futex/core.c -+++ b/kernel/futex/core.c -@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key * - return NULL; + task_lock(task); + mm = task->mm; +- if (mm) { +- if (!(task->flags & PF_KTHREAD)) +- exe_file = get_mm_exe_file(mm); +- } ++ if (mm) ++ exe_file = get_mm_exe_file(mm); + task_unlock(task); + return exe_file; } - --int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) --{ -- int ret; -- -- pagefault_disable(); -- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); -- pagefault_enable(); -- -- return ret; --} -- --int futex_get_value_locked(u32 *dest, u32 __user *from) --{ -- int ret; -- -- pagefault_disable(); -- ret = __get_user(*dest, from); -- pagefault_enable(); -- -- return ret ? -EFAULT : 0; --} -- - /** - * wait_for_owner_exiting - Block until the owner has exited - * @ret: owner's current futex lock status -diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h -index 8b195d06f4e8..618ce1fe870e 100644 ---- a/kernel/futex/futex.h -+++ b/kernel/futex/futex.h -@@ -6,6 +6,7 @@ - #include - #include - #include -+#include - - #ifdef CONFIG_PREEMPT_RT - #include -@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct futex_q *q); - extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q); - - extern int fault_in_user_writeable(u32 __user *uaddr); --extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval); --extern int futex_get_value_locked(u32 *dest, u32 __user *from); - extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key); - -+static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) -+{ -+ int ret; -+ -+ pagefault_disable(); -+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); -+ pagefault_enable(); -+ -+ return ret; -+} -+ -+/* -+ * This does a plain atomic user space read, and the user pointer has -+ * already been verified earlier by get_futex_key() to be both aligned -+ * and actually in user space, just like futex_atomic_cmpxchg_inatomic(). -+ * -+ * We still want to avoid any speculation, and while __get_user() is -+ * the traditional model for this, it's actually slower than doing -+ * this manually these days. -+ * -+ * We could just have a per-architecture special function for it, -+ * the same way we do futex_atomic_cmpxchg_inatomic(), but rather -+ * than force everybody to do that, write it out long-hand using -+ * the low-level user-access infrastructure. -+ * -+ * This looks a bit overkill, but generally just results in a couple -+ * of instructions. -+ */ -+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from) -+{ -+ u32 val; -+ -+ if (can_do_masked_user_access()) -+ from = masked_user_access_begin(from); -+ else if (!user_read_access_begin(from, sizeof(*from))) -+ return -EFAULT; -+ unsafe_get_user(val, from, Efault); -+ user_access_end(); -+ *dest = val; -+ return 0; -+Efault: -+ user_access_end(); -+ return -EFAULT; -+} -+ -+static inline int futex_get_value_locked(u32 *dest, u32 __user *from) -+{ -+ int ret; -+ -+ pagefault_disable(); -+ ret = futex_read_inatomic(dest, from); -+ pagefault_enable(); -+ -+ return ret; -+} -+ - extern void __futex_unqueue(struct futex_q *q); - extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); - extern int futex_unqueue(struct futex_q *q); diff --git a/kernel/kprobes.c b/kernel/kprobes.c -index da59c68df841..55d0835ea0cf 100644 +index b027a4030976..5cc750200f19 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c -@@ -1570,16 +1570,25 @@ static int check_kprobe_address_safe(struct kprobe *p, +@@ -1566,16 +1566,25 @@ static int check_kprobe_address_safe(struct kprobe *p, if (ret) return ret; jump_label_lock(); @@ -17145,7 +14960,7 @@ index da59c68df841..55d0835ea0cf 100644 } /* Ensure it is not in reserved area. */ if (in_gate_area_no_mm((unsigned long) p->addr) || -@@ -1588,21 +1597,13 @@ static int check_kprobe_address_safe(struct kprobe *p, +@@ -1584,21 +1593,13 @@ static int check_kprobe_address_safe(struct kprobe *p, static_call_text_reserved(p->addr, p->addr) || find_bug((unsigned long)p->addr) || is_cfi_preamble_symbol((unsigned long)p->addr)) { @@ -17168,7 +14983,7 @@ index da59c68df841..55d0835ea0cf 100644 /* * If the module freed '.init.text', we couldn't insert * kprobes in there. -@@ -1610,13 +1611,11 @@ static int check_kprobe_address_safe(struct kprobe *p, +@@ -1606,13 +1607,11 @@ static int check_kprobe_address_safe(struct kprobe *p, if (within_module_init((unsigned long)p->addr, *probed_mod) && !module_is_coming(*probed_mod)) { module_put(*probed_mod); @@ -17183,27 +14998,10 @@ index da59c68df841..55d0835ea0cf 100644 return ret; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c -index 40f915f893e2..8c47a77bd660 100644 +index 19813b387ef9..29f9cf31dd34 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c -@@ -2917,7 +2917,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, - */ - if (p->scx.slice && !scx_rq_bypassing(rq)) { - dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD); -- return; -+ goto switch_class; - } - - /* -@@ -2934,6 +2934,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, - } - } - -+switch_class: - if (next && next->sched_class != &ext_sched_class) - switch_class(rq, next); - } -@@ -4760,9 +4761,9 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, +@@ -5206,9 +5206,9 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK, ops_state >> SCX_OPSS_QSEQ_SHIFT); @@ -17215,55 +15013,6 @@ index 40f915f893e2..8c47a77bd660 100644 dump_line(s, " cpus=%*pb", cpumask_pr_args(p->cpus_ptr)); if (SCX_HAS_OP(dump_task)) { -diff --git a/kernel/workqueue.c b/kernel/workqueue.c -index cee65cb43108..f7d8fc204579 100644 ---- a/kernel/workqueue.c -+++ b/kernel/workqueue.c -@@ -3837,16 +3837,28 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, - { - bool wait = false; - struct pool_workqueue *pwq; -+ struct worker_pool *current_pool = NULL; - - if (flush_color >= 0) { - WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush)); - atomic_set(&wq->nr_pwqs_to_flush, 1); - } - -+ /* -+ * For unbound workqueue, pwqs will map to only a few pools. -+ * Most of the time, pwqs within the same pool will be linked -+ * sequentially to wq->pwqs by cpu index. So in the majority -+ * of pwq iters, the pool is the same, only doing lock/unlock -+ * if the pool has changed. This can largely reduce expensive -+ * lock operations. -+ */ - for_each_pwq(pwq, wq) { -- struct worker_pool *pool = pwq->pool; -- -- raw_spin_lock_irq(&pool->lock); -+ if (current_pool != pwq->pool) { -+ if (likely(current_pool)) -+ raw_spin_unlock_irq(¤t_pool->lock); -+ current_pool = pwq->pool; -+ raw_spin_lock_irq(¤t_pool->lock); -+ } - - if (flush_color >= 0) { - WARN_ON_ONCE(pwq->flush_color != -1); -@@ -3863,9 +3875,11 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, - pwq->work_color = work_color; - } - -- raw_spin_unlock_irq(&pool->lock); - } - -+ if (current_pool) -+ raw_spin_unlock_irq(¤t_pool->lock); -+ - if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) - complete(&wq->first_flusher->done); - diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index dca706617adc..89d3aef160b7 100644 --- a/scripts/package/PKGBUILD @@ -17281,13 +15030,15 @@ index dca706617adc..89d3aef160b7 100644 mkdir -p "${builddir}" cp System.map "${builddir}/System.map" diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c -index 3ed82f98e2de..8cae9004fb6c 100644 +index ad66378d7321..4210bc8f12e1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c -@@ -10625,6 +10625,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { +@@ -10641,8 +10641,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), +- SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), +- SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), + SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), @@ -17296,10 +15047,400 @@ index 3ed82f98e2de..8cae9004fb6c 100644 -- 2.48.0.rc1 -From c50e67956d592d75217878c821e0d82cc2d34aef Mon Sep 17 00:00:00 2001 +From edca92ed206343ae09ee1af6ae0dfc26a68085b1 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 2 Jan 2025 12:36:13 +0100 -Subject: [PATCH 09/13] ntsync +Date: Mon, 20 Jan 2025 13:22:28 +0100 +Subject: [PATCH 07/12] itmt-core-ranking + +Signed-off-by: Peter Jung +--- + arch/x86/include/asm/topology.h | 4 +- + arch/x86/kernel/itmt.c | 81 ++++++++++++++------------------- + arch/x86/kernel/smpboot.c | 19 +------- + kernel/sched/fair.c | 42 +++++++++++++---- + kernel/sched/sched.h | 1 - + kernel/sched/topology.c | 15 +----- + 6 files changed, 70 insertions(+), 92 deletions(-) + +diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h +index fd41103ad342..63bab25a4896 100644 +--- a/arch/x86/include/asm/topology.h ++++ b/arch/x86/include/asm/topology.h +@@ -250,7 +250,7 @@ extern bool x86_topology_update; + #include + + DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority); +-extern unsigned int __read_mostly sysctl_sched_itmt_enabled; ++extern bool __read_mostly sysctl_sched_itmt_enabled; + + /* Interface to set priority of a cpu */ + void sched_set_itmt_core_prio(int prio, int core_cpu); +@@ -263,7 +263,7 @@ void sched_clear_itmt_support(void); + + #else /* CONFIG_SCHED_MC_PRIO */ + +-#define sysctl_sched_itmt_enabled 0 ++#define sysctl_sched_itmt_enabled false + static inline void sched_set_itmt_core_prio(int prio, int core_cpu) + { + } +diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c +index 51b805c727fc..9cea1fc36c18 100644 +--- a/arch/x86/kernel/itmt.c ++++ b/arch/x86/kernel/itmt.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -34,49 +35,38 @@ static bool __read_mostly sched_itmt_capable; + * of higher turbo frequency for cpus supporting Intel Turbo Boost Max + * Technology 3.0. + * +- * It can be set via /proc/sys/kernel/sched_itmt_enabled ++ * It can be set via /sys/kernel/debug/x86/sched_itmt_enabled + */ +-unsigned int __read_mostly sysctl_sched_itmt_enabled; ++bool __read_mostly sysctl_sched_itmt_enabled; + +-static int sched_itmt_update_handler(const struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) ++static ssize_t sched_itmt_enabled_write(struct file *filp, ++ const char __user *ubuf, ++ size_t cnt, loff_t *ppos) + { +- unsigned int old_sysctl; +- int ret; ++ ssize_t result; ++ bool orig; + +- mutex_lock(&itmt_update_mutex); ++ guard(mutex)(&itmt_update_mutex); + +- if (!sched_itmt_capable) { +- mutex_unlock(&itmt_update_mutex); +- return -EINVAL; +- } +- +- old_sysctl = sysctl_sched_itmt_enabled; +- ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); ++ orig = sysctl_sched_itmt_enabled; ++ result = debugfs_write_file_bool(filp, ubuf, cnt, ppos); + +- if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { ++ if (sysctl_sched_itmt_enabled != orig) { + x86_topology_update = true; + rebuild_sched_domains(); + } + +- mutex_unlock(&itmt_update_mutex); +- +- return ret; ++ return result; + } + +-static struct ctl_table itmt_kern_table[] = { +- { +- .procname = "sched_itmt_enabled", +- .data = &sysctl_sched_itmt_enabled, +- .maxlen = sizeof(unsigned int), +- .mode = 0644, +- .proc_handler = sched_itmt_update_handler, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_ONE, +- }, ++static const struct file_operations dfs_sched_itmt_fops = { ++ .read = debugfs_read_file_bool, ++ .write = sched_itmt_enabled_write, ++ .open = simple_open, ++ .llseek = default_llseek, + }; + +-static struct ctl_table_header *itmt_sysctl_header; ++static struct dentry *dfs_sched_itmt; + + /** + * sched_set_itmt_support() - Indicate platform supports ITMT +@@ -97,16 +87,18 @@ static struct ctl_table_header *itmt_sysctl_header; + */ + int sched_set_itmt_support(void) + { +- mutex_lock(&itmt_update_mutex); ++ guard(mutex)(&itmt_update_mutex); + +- if (sched_itmt_capable) { +- mutex_unlock(&itmt_update_mutex); ++ if (sched_itmt_capable) + return 0; +- } + +- itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table); +- if (!itmt_sysctl_header) { +- mutex_unlock(&itmt_update_mutex); ++ dfs_sched_itmt = debugfs_create_file_unsafe("sched_itmt_enabled", ++ 0644, ++ arch_debugfs_dir, ++ &sysctl_sched_itmt_enabled, ++ &dfs_sched_itmt_fops); ++ if (IS_ERR_OR_NULL(dfs_sched_itmt)) { ++ dfs_sched_itmt = NULL; + return -ENOMEM; + } + +@@ -117,8 +109,6 @@ int sched_set_itmt_support(void) + x86_topology_update = true; + rebuild_sched_domains(); + +- mutex_unlock(&itmt_update_mutex); +- + return 0; + } + +@@ -134,18 +124,15 @@ int sched_set_itmt_support(void) + */ + void sched_clear_itmt_support(void) + { +- mutex_lock(&itmt_update_mutex); ++ guard(mutex)(&itmt_update_mutex); + +- if (!sched_itmt_capable) { +- mutex_unlock(&itmt_update_mutex); ++ if (!sched_itmt_capable) + return; +- } ++ + sched_itmt_capable = false; + +- if (itmt_sysctl_header) { +- unregister_sysctl_table(itmt_sysctl_header); +- itmt_sysctl_header = NULL; +- } ++ debugfs_remove(dfs_sched_itmt); ++ dfs_sched_itmt = NULL; + + if (sysctl_sched_itmt_enabled) { + /* disable sched_itmt if we are no longer ITMT capable */ +@@ -153,8 +140,6 @@ void sched_clear_itmt_support(void) + x86_topology_update = true; + rebuild_sched_domains(); + } +- +- mutex_unlock(&itmt_update_mutex); + } + + int arch_asym_cpu_priority(int cpu) +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index b5a8f0891135..ef63b1c0b491 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -482,12 +482,6 @@ static int x86_core_flags(void) + return cpu_core_flags() | x86_sched_itmt_flags(); + } + #endif +-#ifdef CONFIG_SCHED_SMT +-static int x86_smt_flags(void) +-{ +- return cpu_smt_flags(); +-} +-#endif + #ifdef CONFIG_SCHED_CLUSTER + static int x86_cluster_flags(void) + { +@@ -495,15 +489,6 @@ static int x86_cluster_flags(void) + } + #endif + +-static int x86_die_flags(void) +-{ +- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU) || +- cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) +- return x86_sched_itmt_flags(); +- +- return 0; +-} +- + /* + * Set if a package/die has multiple NUMA nodes inside. + * AMD Magny-Cours, Intel Cluster-on-Die, and Intel +@@ -519,7 +504,7 @@ static void __init build_sched_topology(void) + + #ifdef CONFIG_SCHED_SMT + x86_topology[i++] = (struct sched_domain_topology_level){ +- cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) ++ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) + }; + #endif + #ifdef CONFIG_SCHED_CLUSTER +@@ -539,7 +524,7 @@ static void __init build_sched_topology(void) + */ + if (!x86_has_numa_in_package) { + x86_topology[i++] = (struct sched_domain_topology_level){ +- cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(PKG) ++ cpu_cpu_mask, x86_sched_itmt_flags, SD_INIT_NAME(PKG) + }; + } + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 8c0f17a96d4f..c532ffb153b4 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -9836,6 +9836,8 @@ struct sg_lb_stats { + unsigned int group_weight; + enum group_type group_type; + unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ ++ unsigned int asym_prefer_cpu; /* Group CPU with highest asym priority */ ++ int highest_asym_prio; /* Asym priority of asym_prefer_cpu */ + unsigned int group_smt_balance; /* Task on busy SMT be moved */ + unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ + #ifdef CONFIG_NUMA_BALANCING +@@ -10165,7 +10167,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group + (sgs->group_weight - sgs->idle_cpus != 1)) + return false; + +- return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu); ++ return sched_asym(env->sd, env->dst_cpu, sgs->asym_prefer_cpu); + } + + /* One group has more than one SMT CPU while the other group does not */ +@@ -10246,6 +10248,17 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) + return check_cpu_capacity(rq, sd); + } + ++static inline void ++update_sg_pick_asym_prefer(struct sg_lb_stats *sgs, int cpu) ++{ ++ int asym_prio = arch_asym_cpu_priority(cpu); ++ ++ if (asym_prio > sgs->highest_asym_prio) { ++ sgs->asym_prefer_cpu = cpu; ++ sgs->highest_asym_prio = asym_prio; ++ } ++} ++ + /** + * update_sg_lb_stats - Update sched_group's statistics for load balancing. + * @env: The load balancing environment. +@@ -10262,11 +10275,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, + bool *sg_overloaded, + bool *sg_overutilized) + { +- int i, nr_running, local_group; ++ int i, nr_running, local_group, sd_flags = env->sd->flags; ++ bool balancing_at_rd = !env->sd->parent; + + memset(sgs, 0, sizeof(*sgs)); + + local_group = group == sds->local; ++ sgs->highest_asym_prio = INT_MIN; + + for_each_cpu_and(i, sched_group_span(group), env->cpus) { + struct rq *rq = cpu_rq(i); +@@ -10280,16 +10295,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, + nr_running = rq->nr_running; + sgs->sum_nr_running += nr_running; + +- if (nr_running > 1) +- *sg_overloaded = 1; ++ if (sd_flags & SD_ASYM_PACKING) ++ update_sg_pick_asym_prefer(sgs, i); + + if (cpu_overutilized(i)) + *sg_overutilized = 1; + +-#ifdef CONFIG_NUMA_BALANCING +- sgs->nr_numa_running += rq->nr_numa_running; +- sgs->nr_preferred_running += rq->nr_preferred_running; +-#endif + /* + * No need to call idle_cpu() if nr_running is not 0 + */ +@@ -10299,10 +10310,21 @@ static inline void update_sg_lb_stats(struct lb_env *env, + continue; + } + ++ /* Overload indicator is only updated at root domain */ ++ if (balancing_at_rd && nr_running > 1) ++ *sg_overloaded = 1; ++ ++#ifdef CONFIG_NUMA_BALANCING ++ /* Only fbq_classify_group() uses this to classify NUMA groups */ ++ if (sd_flags & SD_NUMA) { ++ sgs->nr_numa_running += rq->nr_numa_running; ++ sgs->nr_preferred_running += rq->nr_preferred_running; ++ } ++#endif + if (local_group) + continue; + +- if (env->sd->flags & SD_ASYM_CPUCAPACITY) { ++ if (sd_flags & SD_ASYM_CPUCAPACITY) { + /* Check for a misfit task on the cpu */ + if (sgs->group_misfit_task_load < rq->misfit_task_load) { + sgs->group_misfit_task_load = rq->misfit_task_load; +@@ -10397,7 +10419,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, + + case group_asym_packing: + /* Prefer to move from lowest priority CPU's work */ +- return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu); ++ return sched_asym_prefer(busiest->asym_prefer_cpu, sgs->asym_prefer_cpu); + + case group_misfit_task: + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index da653eba7884..dee2797009e3 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2056,7 +2056,6 @@ struct sched_group { + unsigned int group_weight; + unsigned int cores; + struct sched_group_capacity *sgc; +- int asym_prefer_cpu; /* CPU of highest priority in group */ + int flags; + + /* +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 9748a4c8d668..59b8157cb114 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -1302,7 +1302,7 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) + WARN_ON(!sg); + + do { +- int cpu, cores = 0, max_cpu = -1; ++ int cpu, cores = 0; + + sg->group_weight = cpumask_weight(sched_group_span(sg)); + +@@ -1314,19 +1314,6 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) + #endif + } + sg->cores = cores; +- +- if (!(sd->flags & SD_ASYM_PACKING)) +- goto next; +- +- for_each_cpu(cpu, sched_group_span(sg)) { +- if (max_cpu < 0) +- max_cpu = cpu; +- else if (sched_asym_prefer(cpu, max_cpu)) +- max_cpu = cpu; +- } +- sg->asym_prefer_cpu = max_cpu; +- +-next: + sg = sg->next; + } while (sg != sd->groups); + +-- +2.48.0.rc1 + +From dad63380fd4bccaf1df47a5d2a14b3622a828bbf Mon Sep 17 00:00:00 2001 +From: Peter Jung +Date: Mon, 20 Jan 2025 13:22:39 +0100 +Subject: [PATCH 08/12] ntsync Signed-off-by: Peter Jung --- @@ -17725,10 +15866,10 @@ index 000000000000..25e7c4aef968 + ``objs`` and in ``alert``. If this is attempted, the function fails + with ``EINVAL``. diff --git a/MAINTAINERS b/MAINTAINERS -index a2d251917629..a30770b6f75a 100644 +index 0fa7c5728f1e..efecb59adfe6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -16501,6 +16501,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git +@@ -16709,6 +16709,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git F: Documentation/filesystems/ntfs3.rst F: fs/ntfs3/ @@ -17745,7 +15886,7 @@ index a2d251917629..a30770b6f75a 100644 M: Finn Thain L: linux-m68k@lists.linux-m68k.org diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig -index 3fe7e2a9bd29..6c8b999a5e08 100644 +index 09cbe3f0ab1e..fb772bfe27c3 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -517,7 +517,6 @@ config OPEN_DICE @@ -18936,7 +17077,7 @@ index dcfa38fdc93c..6d06793512b1 100644 #endif diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile -index 363d031a16f7..ff18c0361e38 100644 +index 2401e973c359..a8c9648e5adc 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -18,6 +18,7 @@ TARGETS += devices/error_logs @@ -20326,21 +18467,20 @@ index 000000000000..3aad311574c4 -- 2.48.0.rc1 -From a950dc524c02418e8190b8b658acfc636f16dc2b Mon Sep 17 00:00:00 2001 +From d0d15e3d79a2d5bb2c94b8ff3d2ab51f0b0100fe Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 2 Jan 2025 12:36:25 +0100 -Subject: [PATCH 10/13] perf-per-core +Date: Mon, 20 Jan 2025 13:22:50 +0100 +Subject: [PATCH 09/12] perf-per-core Signed-off-by: Peter Jung --- Documentation/arch/x86/topology.rst | 4 + - arch/x86/events/rapl.c | 507 ++++++++++++++------------ + arch/x86/events/rapl.c | 415 ++++++++++++++++---------- arch/x86/include/asm/processor.h | 1 + arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/debugfs.c | 1 + arch/x86/kernel/cpu/topology_common.c | 1 + - include/linux/cpuhotplug.h | 1 - - 7 files changed, 288 insertions(+), 228 deletions(-) + 6 files changed, 273 insertions(+), 150 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 7352ab89a55a..c12837e61bda 100644 @@ -20358,7 +18498,7 @@ index 7352ab89a55a..c12837e61bda 100644 System topology examples diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c -index a481a939862e..d3bb3865c1b1 100644 +index a8defc813c36..d3bb3865c1b1 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -39,6 +39,10 @@ @@ -20427,7 +18567,7 @@ index a481a939862e..d3bb3865c1b1 100644 }; enum rapl_unit_quirk { -@@ -139,45 +150,43 @@ enum rapl_unit_quirk { +@@ -139,44 +150,43 @@ enum rapl_unit_quirk { }; struct rapl_model { @@ -20444,7 +18584,6 @@ index a481a939862e..d3bb3865c1b1 100644 /* 1/2^hw_unit Joule */ -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; -static struct rapl_pmus *rapl_pmus; --static cpumask_t rapl_cpu_mask; -static unsigned int rapl_cntr_mask; +static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly; +static int rapl_core_hw_unit __read_mostly; @@ -20466,14 +18605,14 @@ index a481a939862e..d3bb3865c1b1 100644 -} - -static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu) -+static inline unsigned int get_rapl_pmu_idx(int cpu, int scope) - { +-{ - return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) : - topology_die_cpumask(cpu); -} - -static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) --{ ++static inline unsigned int get_rapl_pmu_idx(int cpu, int scope) + { - unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu); - /* @@ -20497,7 +18636,7 @@ index a481a939862e..d3bb3865c1b1 100644 } static inline u64 rapl_read_counter(struct perf_event *event) -@@ -187,19 +196,20 @@ static inline u64 rapl_read_counter(struct perf_event *event) +@@ -186,19 +196,20 @@ static inline u64 rapl_read_counter(struct perf_event *event) return raw; } @@ -20524,7 +18663,7 @@ index a481a939862e..d3bb3865c1b1 100644 } static u64 rapl_event_update(struct perf_event *event) -@@ -226,7 +236,7 @@ static u64 rapl_event_update(struct perf_event *event) +@@ -225,7 +236,7 @@ static u64 rapl_event_update(struct perf_event *event) delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; @@ -20533,7 +18672,7 @@ index a481a939862e..d3bb3865c1b1 100644 local64_add(sdelta, &event->count); -@@ -241,34 +251,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) +@@ -240,34 +251,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { @@ -20577,7 +18716,7 @@ index a481a939862e..d3bb3865c1b1 100644 struct perf_event *event) { if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) -@@ -276,39 +286,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, +@@ -275,39 +286,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, event->hw.state = 0; @@ -20631,7 +18770,7 @@ index a481a939862e..d3bb3865c1b1 100644 list_del(&event->active_entry); -@@ -326,23 +336,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) +@@ -325,23 +336,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } @@ -20660,7 +18799,7 @@ index a481a939862e..d3bb3865c1b1 100644 return 0; } -@@ -355,12 +365,14 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) +@@ -354,12 +365,14 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; @@ -20680,12 +18819,10 @@ index a481a939862e..d3bb3865c1b1 100644 /* check only supported bits are set */ if (event->attr.config & ~RAPL_EVENT_MASK) -@@ -369,29 +381,49 @@ static int rapl_pmu_event_init(struct perf_event *event) +@@ -368,26 +381,49 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; -- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; -- - if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) + rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu); + if (!rapl_pmus) @@ -20737,7 +18874,6 @@ index a481a939862e..d3bb3865c1b1 100644 + rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; + if (!rapl_pmu) return -EINVAL; -- event->cpu = pmu->cpu; - event->pmu_private = pmu; - event->hw.event_base = rapl_msrs[bit].msr; + @@ -20745,29 +18881,7 @@ index a481a939862e..d3bb3865c1b1 100644 event->hw.config = cfg; event->hw.idx = bit; -@@ -403,34 +435,19 @@ static void rapl_pmu_event_read(struct perf_event *event) - rapl_event_update(event); - } - --static ssize_t rapl_get_attr_cpumask(struct device *dev, -- struct device_attribute *attr, char *buf) --{ -- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); --} -- --static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); -- --static struct attribute *rapl_pmu_attrs[] = { -- &dev_attr_cpumask.attr, -- NULL, --}; -- --static struct attribute_group rapl_pmu_attr_group = { -- .attrs = rapl_pmu_attrs, --}; -- - RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); - RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); +@@ -404,12 +440,14 @@ RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); @@ -20782,7 +18896,7 @@ index a481a939862e..d3bb3865c1b1 100644 /* * we compute in 0.23 nJ increments regardless of MSR -@@ -440,6 +457,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 +@@ -419,6 +457,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); @@ -20790,21 +18904,20 @@ index a481a939862e..d3bb3865c1b1 100644 /* * There are no default events, but we need to create -@@ -467,7 +485,12 @@ static struct attribute_group rapl_pmu_format_group = { +@@ -451,6 +490,12 @@ static const struct attribute_group *rapl_attr_groups[] = { + NULL, }; - static const struct attribute_group *rapl_attr_groups[] = { -- &rapl_pmu_attr_group, ++static const struct attribute_group *rapl_core_attr_groups[] = { + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + -+static const struct attribute_group *rapl_core_attr_groups[] = { - &rapl_pmu_format_group, - &rapl_pmu_events_group, - NULL, -@@ -533,6 +556,18 @@ static struct attribute_group rapl_events_psys_group = { + static struct attribute *rapl_events_cores[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_cores_unit), +@@ -511,6 +556,18 @@ static struct attribute_group rapl_events_psys_group = { .attrs = rapl_events_psys, }; @@ -20823,7 +18936,7 @@ index a481a939862e..d3bb3865c1b1 100644 static bool test_msr(int idx, void *data) { return test_bit(idx, (unsigned long *) data); -@@ -558,11 +593,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { +@@ -536,11 +593,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { }; /* @@ -20838,74 +18951,16 @@ index a481a939862e..d3bb3865c1b1 100644 [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, -@@ -570,77 +605,25 @@ static struct perf_msr amd_rapl_msrs[] = { +@@ -548,18 +605,25 @@ static struct perf_msr amd_rapl_msrs[] = { [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; --static int rapl_cpu_offline(unsigned int cpu) --{ -- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); -- int target; -- -- /* Check if exiting cpu is used for collecting rapl events */ -- if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) -- return 0; -- -- pmu->cpu = -1; -- /* Find a new cpu to collect rapl events */ -- target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu); -- -- /* Migrate rapl events to the new target */ -- if (target < nr_cpu_ids) { -- cpumask_set_cpu(target, &rapl_cpu_mask); -- pmu->cpu = target; -- perf_pmu_migrate_context(pmu->pmu, cpu, target); -- } -- return 0; --} -- --static int rapl_cpu_online(unsigned int cpu) --{ -- s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu); -- if (rapl_pmu_idx < 0) { -- pr_err("topology_logical_(package/die)_id() returned a negative value"); -- return -EINVAL; -- } -- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); -- int target; -- -- if (!pmu) { -- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); -- if (!pmu) -- return -ENOMEM; -- -- raw_spin_lock_init(&pmu->lock); -- INIT_LIST_HEAD(&pmu->active_list); -- pmu->pmu = &rapl_pmus->pmu; -- pmu->timer_interval = ms_to_ktime(rapl_timer_ms); -- rapl_hrtimer_init(pmu); -- -- rapl_pmus->pmus[rapl_pmu_idx] = pmu; -- } -- -- /* -- * Check if there is an online cpu in the package which collects rapl -- * events already. -- */ -- target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu)); -- if (target < nr_cpu_ids) -- return 0; -- -- cpumask_set_cpu(cpu, &rapl_cpu_mask); -- pmu->cpu = cpu; -- return 0; --} +-static int rapl_check_hw_unit(struct rapl_model *rm) +static struct perf_msr amd_rapl_core_msrs[] = { + [PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group, + test_msr, false, RAPL_MSR_MASK }, +}; - --static int rapl_check_hw_unit(struct rapl_model *rm) ++ +static int rapl_check_hw_unit(void) { u64 msr_rapl_power_unit_bits; @@ -20919,15 +18974,15 @@ index a481a939862e..d3bb3865c1b1 100644 - rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) + rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; -+ -+ rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; - switch (rm->unit_quirk) { ++ rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; ++ + switch (rapl_model->unit_quirk) { /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See -@@ -648,17 +631,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm) +@@ -567,17 +631,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm) * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ case RAPL_UNIT_QUIRK_INTEL_HSW: @@ -20947,7 +19002,7 @@ index a481a939862e..d3bb3865c1b1 100644 /* * Calculate the timer rate: * Use reference of 200W for scaling the timeout to avoid counter -@@ -667,9 +649,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm) +@@ -586,9 +649,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm) * if hw unit is 32, then we use 2 ms 1/200/2 */ rapl_timer_ms = 2; @@ -20959,7 +19014,7 @@ index a481a939862e..d3bb3865c1b1 100644 } return 0; } -@@ -677,24 +659,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm) +@@ -596,24 +659,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm) static void __init rapl_advertise(void) { int i; @@ -20998,51 +19053,65 @@ index a481a939862e..d3bb3865c1b1 100644 kfree(rapl_pmus); } -@@ -707,17 +697,60 @@ static const struct attribute_group *rapl_attr_update[] = { +@@ -626,46 +697,60 @@ static const struct attribute_group *rapl_attr_update[] = { NULL, }; --static int __init init_rapl_pmus(void) +-static int __init init_rapl_pmu(void) +static const struct attribute_group *rapl_core_attr_update[] = { + &rapl_events_core_group, + NULL, +}; + +static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus) -+{ + { +- struct rapl_pmu *pmu; + struct rapl_pmu *rapl_pmu; -+ int idx; -+ -+ for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) { + int idx; + + for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) { +- pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); +- if (!pmu) + rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL); + if (!rapl_pmu) -+ goto free; -+ + goto free; + +- raw_spin_lock_init(&pmu->lock); +- INIT_LIST_HEAD(&pmu->active_list); +- pmu->pmu = &rapl_pmus->pmu; +- pmu->timer_interval = ms_to_ktime(rapl_timer_ms); +- rapl_hrtimer_init(pmu); + raw_spin_lock_init(&rapl_pmu->lock); + INIT_LIST_HEAD(&rapl_pmu->active_list); + rapl_pmu->pmu = &rapl_pmus->pmu; + rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(rapl_pmu); -+ + +- rapl_pmus->pmus[idx] = pmu; + rapl_pmus->rapl_pmu[idx] = rapl_pmu; -+ } -+ -+ return 0; -+free: -+ for (; idx > 0; idx--) + } + + return 0; + free: + for (; idx > 0; idx--) +- kfree(rapl_pmus->pmus[idx - 1]); + kfree(rapl_pmus->rapl_pmu[idx - 1]); -+ return -ENOMEM; -+} -+ + return -ENOMEM; + } + +-static int __init init_rapl_pmus(void) +static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope, + const struct attribute_group **rapl_attr_groups, + const struct attribute_group **rapl_attr_update) { int nr_rapl_pmu = topology_max_packages(); +- int rapl_pmu_scope = PERF_PMU_SCOPE_PKG; + struct rapl_pmus *rapl_pmus; -- if (!rapl_pmu_is_pkg_scope()) +- if (!rapl_pmu_is_pkg_scope()) { - nr_rapl_pmu *= topology_max_dies_per_package(); +- rapl_pmu_scope = PERF_PMU_SCOPE_DIE; +- } + /* + * rapl_pmu_scope must be either PKG, DIE or CORE + */ @@ -21063,15 +19132,11 @@ index a481a939862e..d3bb3865c1b1 100644 rapl_pmus->nr_rapl_pmu = nr_rapl_pmu; rapl_pmus->pmu.attr_groups = rapl_attr_groups; rapl_pmus->pmu.attr_update = rapl_attr_update; -@@ -728,77 +761,81 @@ static int __init init_rapl_pmus(void) - rapl_pmus->pmu.start = rapl_pmu_event_start; - rapl_pmus->pmu.stop = rapl_pmu_event_stop; - rapl_pmus->pmu.read = rapl_pmu_event_read; -+ rapl_pmus->pmu.scope = rapl_pmu_scope; +@@ -680,75 +765,77 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; -- return 0; -+ + +- return init_rapl_pmu(); + return init_rapl_pmu(rapl_pmus); } @@ -21162,7 +19227,7 @@ index a481a939862e..d3bb3865c1b1 100644 }; static const struct x86_cpu_id rapl_model_match[] __initconst = { -@@ -854,57 +891,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); +@@ -804,45 +891,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; @@ -21178,10 +19243,10 @@ index a481a939862e..d3bb3865c1b1 100644 return -ENODEV; - rm = (struct rapl_model *) id->driver_data; +- +- rapl_msrs = rm->rapl_msrs; + rapl_model = (struct rapl_model *) id->driver_data; -- rapl_msrs = rm->rapl_msrs; -- - rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, - false, (void *) &rm->events); - @@ -21196,12 +19261,7 @@ index a481a939862e..d3bb3865c1b1 100644 if (ret) return ret; -- /* -- * Install callbacks. Core will call them for each online cpu. -- */ -- ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, -- "perf/x86/rapl:online", -- rapl_cpu_online, rapl_cpu_offline); +- ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); + rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, + PERF_RAPL_PKG_EVENTS_MAX, false, + (void *) &rapl_model->pkg_events); @@ -21210,9 +19270,6 @@ index a481a939862e..d3bb3865c1b1 100644 if (ret) goto out; -- ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); -- if (ret) -- goto out1; + if (rapl_model->core_events) { + ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE, + rapl_core_attr_groups, @@ -21232,13 +19289,11 @@ index a481a939862e..d3bb3865c1b1 100644 + cleanup_rapl_pmus(rapl_pmus_core); + } + } - ++ +core_init_failed: rapl_advertise(); return 0; --out1: -- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); - cleanup_rapl_pmus(); @@ -21249,7 +19304,6 @@ index a481a939862e..d3bb3865c1b1 100644 static void __exit intel_rapl_exit(void) { -- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - perf_pmu_unregister(&rapl_pmus->pmu); - cleanup_rapl_pmus(); + if (rapl_pmus_core) { @@ -21273,7 +19327,7 @@ index 20e6009381ed..c0cd10182e90 100644 // AMD Node ID and Nodes per Package info u32 amd_node_id; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h -index fd41103ad342..3973cb9bb2e6 100644 +index 63bab25a4896..ec134b719144 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,6 +143,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); @@ -21308,25 +19362,13 @@ index 8277c64f88db..b5a5e1411469 100644 } /* Package relative core ID */ -diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h -index 2361ed4d2b15..37a9afffb59e 100644 ---- a/include/linux/cpuhotplug.h -+++ b/include/linux/cpuhotplug.h -@@ -208,7 +208,6 @@ enum cpuhp_state { - CPUHP_AP_PERF_X86_UNCORE_ONLINE, - CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, - CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, -- CPUHP_AP_PERF_X86_RAPL_ONLINE, - CPUHP_AP_PERF_S390_CF_ONLINE, - CPUHP_AP_PERF_S390_SF_ONLINE, - CPUHP_AP_PERF_ARM_CCI_ONLINE, -- 2.48.0.rc1 -From 7f6250498af65dddd6a238984829d8b08e9016b8 Mon Sep 17 00:00:00 2001 +From 6a7ea67c66634276802b4b9b0964a0b00db97d9c Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 2 Jan 2025 12:36:37 +0100 -Subject: [PATCH 11/13] pksm +Date: Mon, 20 Jan 2025 13:23:02 +0100 +Subject: [PATCH 10/12] pksm Signed-off-by: Peter Jung --- @@ -21355,178 +19397,178 @@ Signed-off-by: Peter Jung 22 files changed, 206 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl -index 74720667fe09..e6a11f3c0a2e 100644 +index c59d53d6d3f3..121696f903e8 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl -@@ -502,3 +502,6 @@ - 570 common lsm_set_self_attr sys_lsm_set_self_attr - 571 common lsm_list_modules sys_lsm_list_modules - 572 common mseal sys_mseal -+573 common process_ksm_enable sys_process_ksm_enable -+574 common process_ksm_disable sys_process_ksm_disable -+575 common process_ksm_status sys_process_ksm_status +@@ -506,3 +506,6 @@ + 574 common getxattrat sys_getxattrat + 575 common listxattrat sys_listxattrat + 576 common removexattrat sys_removexattrat ++577 common process_ksm_enable sys_process_ksm_enable ++578 common process_ksm_disable sys_process_ksm_disable ++579 common process_ksm_status sys_process_ksm_status diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl -index 23c98203c40f..10a3099decbe 100644 +index 49eeb2ad8dbd..1ce4d983b5b2 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl -@@ -477,3 +477,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -481,3 +481,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl -index 22a3cbd4c602..12d2c7594bf0 100644 +index f5ed71f1910d..17e865370d37 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl -@@ -462,3 +462,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -466,3 +466,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl -index 2b81a6bd78b2..e2a93c856eed 100644 +index 680f568b77f2..64740e895587 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl -@@ -468,3 +468,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -472,3 +472,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl -index 953f5b7dc723..b921fbf56fa6 100644 +index 0b9b7e25b69a..bfafb91a2eda 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl -@@ -401,3 +401,6 @@ - 460 n32 lsm_set_self_attr sys_lsm_set_self_attr - 461 n32 lsm_list_modules sys_lsm_list_modules - 462 n32 mseal sys_mseal -+463 n32 process_ksm_enable sys_process_ksm_enable -+464 n32 process_ksm_disable sys_process_ksm_disable -+465 n32 process_ksm_status sys_process_ksm_status +@@ -405,3 +405,6 @@ + 464 n32 getxattrat sys_getxattrat + 465 n32 listxattrat sys_listxattrat + 466 n32 removexattrat sys_removexattrat ++467 n32 process_ksm_enable sys_process_ksm_enable ++468 n32 process_ksm_disable sys_process_ksm_disable ++469 n32 process_ksm_status sys_process_ksm_status diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl -index 1464c6be6eb3..8d7f9ddd66f4 100644 +index c844cd5cda62..39d446aeac64 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl -@@ -377,3 +377,6 @@ - 460 n64 lsm_set_self_attr sys_lsm_set_self_attr - 461 n64 lsm_list_modules sys_lsm_list_modules - 462 n64 mseal sys_mseal -+463 n64 process_ksm_enable sys_process_ksm_enable -+464 n64 process_ksm_disable sys_process_ksm_disable -+465 n64 process_ksm_status sys_process_ksm_status +@@ -381,3 +381,6 @@ + 464 n64 getxattrat sys_getxattrat + 465 n64 listxattrat sys_listxattrat + 466 n64 removexattrat sys_removexattrat ++467 n64 process_ksm_enable sys_process_ksm_enable ++468 n64 process_ksm_disable sys_process_ksm_disable ++469 n64 process_ksm_status sys_process_ksm_status diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl -index 2439a2491cff..9d6142739954 100644 +index 349b8aad1159..61536c55715a 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl -@@ -450,3 +450,6 @@ - 460 o32 lsm_set_self_attr sys_lsm_set_self_attr - 461 o32 lsm_list_modules sys_lsm_list_modules - 462 o32 mseal sys_mseal -+463 o32 process_ksm_enable sys_process_ksm_enable -+464 o32 process_ksm_disable sys_process_ksm_disable -+465 o32 process_ksm_status sys_process_ksm_status +@@ -454,3 +454,6 @@ + 464 o32 getxattrat sys_getxattrat + 465 o32 listxattrat sys_listxattrat + 466 o32 removexattrat sys_removexattrat ++467 o32 process_ksm_enable sys_process_ksm_enable ++468 o32 process_ksm_disable sys_process_ksm_disable ++469 o32 process_ksm_status sys_process_ksm_status diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl -index 66dc406b12e4..9d46476fd908 100644 +index d9fc94c86965..85dca5afcf06 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl -@@ -461,3 +461,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -465,3 +465,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl -index ebae8415dfbb..16f71bc2f6f0 100644 +index d8b4ab78bef0..57aa958c1b97 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl -@@ -553,3 +553,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -557,3 +557,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl -index 01071182763e..7394bad8178e 100644 +index e9115b4d8b63..2afc778f2d17 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl -@@ -465,3 +465,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status +@@ -469,3 +469,6 @@ + 464 common getxattrat sys_getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status sys_process_ksm_status diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl -index c55fd7696d40..b9fc31221b87 100644 +index c8cad33bf250..dfe06a84d902 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl -@@ -466,3 +466,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -470,3 +470,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl -index cfdfb3707c16..0d79fd772854 100644 +index 727f99d333b3..4c43b0d2d09f 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl -@@ -508,3 +508,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -512,3 +512,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl -index 534c74b14fab..c546a30575f1 100644 +index 4d0fb2fba7e2..a63252b84261 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl -@@ -468,3 +468,6 @@ - 460 i386 lsm_set_self_attr sys_lsm_set_self_attr - 461 i386 lsm_list_modules sys_lsm_list_modules - 462 i386 mseal sys_mseal -+463 i386 process_ksm_enable sys_process_ksm_enable -+464 i386 process_ksm_disable sys_process_ksm_disable -+465 i386 process_ksm_status sys_process_ksm_status +@@ -472,3 +472,6 @@ + 464 i386 getxattrat sys_getxattrat + 465 i386 listxattrat sys_listxattrat + 466 i386 removexattrat sys_removexattrat ++467 i386 process_ksm_enable sys_process_ksm_enable ++468 i386 process_ksm_disable sys_process_ksm_disable ++469 i386 process_ksm_status sys_process_ksm_status diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl -index 7093ee21c0d1..0fcd10ba8dfe 100644 +index 5eb708bff1c7..b5fe77405938 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl -@@ -386,6 +386,9 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -390,6 +390,9 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl -index 67083fc1b2f5..c1aecee4ad9b 100644 +index 37effc1b134e..5c944f0dcc20 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl -@@ -433,3 +433,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -437,3 +437,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h -index 5758104921e6..cc9c4fac2412 100644 +index c6333204d451..00400d99eef3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h -@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); +@@ -831,6 +831,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); @@ -21537,31 +19579,31 @@ index 5758104921e6..cc9c4fac2412 100644 unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h -index 5bf6148cac2b..613e559ad6e0 100644 +index 88dc393c2bca..34d73f16b478 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h -@@ -841,8 +841,15 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) - #define __NR_mseal 462 - __SYSCALL(__NR_mseal, sys_mseal) +@@ -850,8 +850,15 @@ __SYSCALL(__NR_listxattrat, sys_listxattrat) + #define __NR_removexattrat 466 + __SYSCALL(__NR_removexattrat, sys_removexattrat) -+#define __NR_process_ksm_enable 463 ++#define __NR_process_ksm_enable 467 +__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable) -+#define __NR_process_ksm_disable 464 ++#define __NR_process_ksm_disable 468 +__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable) -+#define __NR_process_ksm_status 465 ++#define __NR_process_ksm_status 469 +__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status) + #undef __NR_syscalls --#define __NR_syscalls 463 -+#define __NR_syscalls 466 +-#define __NR_syscalls 467 ++#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/kernel/sys.c b/kernel/sys.c -index 4da31f28fda8..fcd3aeaddd05 100644 +index c4c701c6f0b4..8806d113f5db 100644 --- a/kernel/sys.c +++ b/kernel/sys.c -@@ -2791,6 +2791,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, +@@ -2816,6 +2816,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return error; } @@ -21721,45 +19763,45 @@ index c00a86931f8c..d82213d68522 100644 COND_SYSCALL(mbind); COND_SYSCALL(get_mempolicy); diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl -index 845e24eb372e..227d9cc12365 100644 +index ebbdb3c42e9f..b19b6bfe5cd4 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl -@@ -403,3 +403,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -407,3 +407,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl -index ebae8415dfbb..16f71bc2f6f0 100644 +index d8b4ab78bef0..57aa958c1b97 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl -@@ -553,3 +553,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status +@@ -557,3 +557,6 @@ + 464 common getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl -index 01071182763e..7394bad8178e 100644 +index e9115b4d8b63..2afc778f2d17 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl -@@ -465,3 +465,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status +@@ -469,3 +469,6 @@ + 464 common getxattrat sys_getxattrat sys_getxattrat + 465 common listxattrat sys_listxattrat sys_listxattrat + 466 common removexattrat sys_removexattrat sys_removexattrat ++467 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable ++468 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable ++469 common process_ksm_status sys_process_ksm_status sys_process_ksm_status -- 2.48.0.rc1 -From 1002df1884e8c5d50d68de40c8589a4c5972ffb3 Mon Sep 17 00:00:00 2001 +From 5e459e48f274c34d701726a61a96140381b1de2b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 2 Jan 2025 12:36:48 +0100 -Subject: [PATCH 12/13] t2 +Date: Mon, 20 Jan 2025 13:23:11 +0100 +Subject: [PATCH 11/12] t2 Signed-off-by: Peter Jung --- @@ -21777,10 +19819,10 @@ Signed-off-by: Peter Jung drivers/gpu/drm/tiny/Makefile | 1 + drivers/gpu/drm/tiny/appletbdrm.c | 624 +++++++++ drivers/gpu/vga/vga_switcheroo.c | 7 +- - drivers/hid/Kconfig | 22 + + drivers/hid/Kconfig | 26 + drivers/hid/Makefile | 2 + drivers/hid/hid-appletb-bl.c | 207 +++ - drivers/hid/hid-appletb-kbd.c | 432 +++++++ + drivers/hid/hid-appletb-kbd.c | 506 ++++++++ drivers/hid/hid-multitouch.c | 60 +- drivers/hid/hid-quirks.c | 8 +- drivers/hwmon/applesmc.c | 1138 ++++++++++++----- @@ -21819,7 +19861,7 @@ Signed-off-by: Peter Jung lib/test_printf.c | 20 +- lib/vsprintf.c | 36 +- scripts/checkpatch.pl | 2 +- - 56 files changed, 8270 insertions(+), 336 deletions(-) + 56 files changed, 8348 insertions(+), 336 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd create mode 100644 drivers/gpu/drm/tiny/appletbdrm.c create mode 100644 drivers/hid/hid-appletb-bl.c @@ -21871,10 +19913,10 @@ index 000000000000..2a19584d091e + 3 None + == ================= diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst -index 14e093da3ccd..ccd7bd29a6d6 100644 +index ecccc0473da9..6de6b0e6abf3 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst -@@ -630,6 +630,38 @@ Examples:: +@@ -648,6 +648,38 @@ Examples:: %p4cc Y10 little-endian (0x20303159) %p4cc NV12 big-endian (0xb231564e) @@ -21914,10 +19956,10 @@ index 14e093da3ccd..ccd7bd29a6d6 100644 ---- diff --git a/MAINTAINERS b/MAINTAINERS -index a30770b6f75a..63ee2eb3ab41 100644 +index efecb59adfe6..16af42c68cca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -7013,6 +7013,12 @@ S: Supported +@@ -7066,6 +7066,12 @@ S: Supported T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/sun4i/sun8i* @@ -21931,10 +19973,10 @@ index a30770b6f75a..63ee2eb3ab41 100644 S: Orphan T: git https://gitlab.freedesktop.org/drm/misc/kernel.git diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index 94a9a9266f8e..d6d76a7cebe6 100644 +index 811d020f3f4b..ade5a5b597e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -2260,6 +2260,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, +@@ -2256,6 +2256,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0, i; bool supports_atomic = false; @@ -22017,10 +20059,10 @@ index b1be458ed4dd..28c0e76a1e88 100644 drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); return 0; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c -index b1c294236cc8..21e23ba5391e 100644 +index 49b5cc01ce40..1435f49f2ce6 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c -@@ -4641,6 +4641,7 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) +@@ -4685,6 +4685,7 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) { @@ -22028,7 +20070,7 @@ index b1c294236cc8..21e23ba5391e 100644 struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); if (dig_port->base.port != PORT_A) -@@ -4649,6 +4650,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) +@@ -4693,6 +4694,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) if (dig_port->saved_port_bits & DDI_A_4_LANES) return false; @@ -22039,10 +20081,10 @@ index b1c294236cc8..21e23ba5391e 100644 * supported configuration */ diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c -index 49a1ac4f5491..c8c10a6104c4 100644 +index 00852ff5b247..4c56f1b622be 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c -@@ -199,10 +199,10 @@ static int intelfb_create(struct drm_fb_helper *helper, +@@ -197,10 +197,10 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->fb = NULL; if (fb && @@ -22057,7 +20099,7 @@ index 49a1ac4f5491..c8c10a6104c4 100644 fb->base.width, fb->base.height, sizes->fb_width, sizes->fb_height); diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c -index 29b56d53a340..7226ec8fdd9c 100644 +index 28f497ae785b..c2952b0f8b88 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -64,6 +64,18 @@ static void quirk_increase_ddi_disabled_time(struct intel_display *display) @@ -22088,7 +20130,7 @@ index 29b56d53a340..7226ec8fdd9c 100644 + { 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes }, }; - static struct intel_dpcd_quirk intel_dpcd_quirks[] = { + static const struct intel_dpcd_quirk intel_dpcd_quirks[] = { diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h index cafdebda7535..a5296f82776e 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.h @@ -22243,7 +20285,7 @@ index 08992636ec05..35cd3405d045 100644 KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_xrgb2101010, convert_xrgb8888_gen_params), KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb2101010, convert_xrgb8888_gen_params), diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig -index f6889f649bc1..559a97bce12c 100644 +index 94cbdb1337c0..1201aee7bab3 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -1,5 +1,17 @@ @@ -22265,7 +20307,7 @@ index f6889f649bc1..559a97bce12c 100644 tristate "ARC PGU" depends on DRM && OF diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile -index 76dde89a044b..9a1b412e764a 100644 +index 4aaf56f8707d..d9add9c3eda3 100644 --- a/drivers/gpu/drm/tiny/Makefile +++ b/drivers/gpu/drm/tiny/Makefile @@ -1,5 +1,6 @@ @@ -22924,10 +20966,10 @@ index 18f2c92beff8..3de1bca45ed2 100644 } diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index f8a56d631242..6c8e9e004907 100644 +index 4d2a89d65b65..311f0ab5d42f 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig -@@ -148,6 +148,27 @@ config HID_APPLEIR +@@ -148,6 +148,31 @@ config HID_APPLEIR Say Y here if you want support for Apple infrared remote control. @@ -22944,6 +20986,10 @@ index f8a56d631242..6c8e9e004907 100644 +config HID_APPLETB_KBD + tristate "Apple Touch Bar Keyboard Mode" + depends on USB_HID ++ depends on BACKLIGHT_CLASS_DEVICE ++ depends on INPUT ++ select INPUT_SPARSEKMAP ++ select HID_APPLETB_BL + help + Say Y here if you want support for the keyboard mode (escape, + function, media and brightness keys) of Touch Bars on x86 MacBook @@ -22955,7 +21001,7 @@ index f8a56d631242..6c8e9e004907 100644 config HID_ASUS tristate "Asus" depends on USB_HID -@@ -729,6 +750,7 @@ config HID_MULTITOUCH +@@ -741,6 +766,7 @@ config HID_MULTITOUCH Say Y here if you have one of the following devices: - 3M PCT touch screens - ActionStar dual touch panels @@ -22964,7 +21010,7 @@ index f8a56d631242..6c8e9e004907 100644 - Cando dual touch panels - Chunghwa panels diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 496dab54c73a..13d32f55e5d4 100644 +index 24de45f3677d..1989288e0438 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_HID_ALPS) += hid-alps.o @@ -23191,10 +21237,10 @@ index 000000000000..819157686e59 +MODULE_LICENSE("GPL"); diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c new file mode 100644 -index 000000000000..442c4d8848df +index 000000000000..fa28a691da6a --- /dev/null +++ b/drivers/hid/hid-appletb-kbd.c -@@ -0,0 +1,432 @@ +@@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Apple Touch Bar Keyboard Mode Driver @@ -23213,6 +21259,8 @@ index 000000000000..442c4d8848df +#include +#include +#include ++#include ++#include +#include + +#include "hid-ids.h" @@ -23224,6 +21272,7 @@ index 000000000000..442c4d8848df +#define APPLETB_KBD_MODE_MAX APPLETB_KBD_MODE_OFF + +#define APPLETB_DEVID_KEYBOARD 1 ++#define APPLETB_DEVID_TRACKPAD 2 + +#define HID_USAGE_MODE 0x00ff0004 + @@ -23238,14 +21287,29 @@ index 000000000000..442c4d8848df +module_param_named(fntoggle, appletb_tb_fn_toggle, bool, 0644); +MODULE_PARM_DESC(fntoggle, "Switch between Fn and media controls on pressing Fn key"); + ++static bool appletb_tb_autodim = true; ++module_param_named(autodim, appletb_tb_autodim, bool, 0644); ++MODULE_PARM_DESC(autodim, "Automatically dim and turn off the Touch Bar after some time"); ++ ++static int appletb_tb_dim_timeout = 60; ++module_param_named(dim_timeout, appletb_tb_dim_timeout, int, 0644); ++MODULE_PARM_DESC(dim_timeout, "Dim timeout in sec"); ++ ++static int appletb_tb_idle_timeout = 15; ++module_param_named(idle_timeout, appletb_tb_idle_timeout, int, 0644); ++MODULE_PARM_DESC(idle_timeout, "Idle timeout in sec"); ++ +struct appletb_kbd { + struct hid_field *mode_field; -+ -+ u8 saved_mode; -+ u8 current_mode; + struct input_handler inp_handler; + struct input_handle kbd_handle; -+ ++ struct input_handle tpd_handle; ++ struct backlight_device *backlight_dev; ++ struct timer_list inactivity_timer; ++ bool has_dimmed; ++ bool has_turned_off; ++ u8 saved_mode; ++ u8 current_mode; +}; + +static const struct key_entry appletb_kbd_keymap[] = { @@ -23343,6 +21407,34 @@ index 000000000000..442c4d8848df + } +} + ++static void appletb_inactivity_timer(struct timer_list *t) ++{ ++ struct appletb_kbd *kbd = from_timer(kbd, t, inactivity_timer); ++ ++ if (kbd->backlight_dev && appletb_tb_autodim) { ++ if (!kbd->has_dimmed) { ++ backlight_device_set_brightness(kbd->backlight_dev, 1); ++ kbd->has_dimmed = true; ++ mod_timer(&kbd->inactivity_timer, jiffies + msecs_to_jiffies(appletb_tb_idle_timeout * 1000)); ++ } else if (!kbd->has_turned_off) { ++ backlight_device_set_brightness(kbd->backlight_dev, 0); ++ kbd->has_turned_off = true; ++ } ++ } ++} ++ ++static void reset_inactivity_timer(struct appletb_kbd *kbd) ++{ ++ if (kbd->backlight_dev && appletb_tb_autodim) { ++ if (kbd->has_dimmed || kbd->has_turned_off) { ++ backlight_device_set_brightness(kbd->backlight_dev, 2); ++ kbd->has_dimmed = false; ++ kbd->has_turned_off = false; ++ } ++ mod_timer(&kbd->inactivity_timer, jiffies + msecs_to_jiffies(appletb_tb_dim_timeout * 1000)); ++ } ++} ++ +static int appletb_kbd_hid_event(struct hid_device *hdev, struct hid_field *field, + struct hid_usage *usage, __s32 value) +{ @@ -23367,6 +21459,8 @@ index 000000000000..442c4d8848df + if (slot < 0) + return 0; + ++ reset_inactivity_timer(kbd); ++ + translation = sparse_keymap_entry_from_scancode(input, usage->code); + + if (translation && kbd->current_mode == APPLETB_KBD_MODE_SPCL) { @@ -23383,6 +21477,8 @@ index 000000000000..442c4d8848df +{ + struct appletb_kbd *kbd = handle->private; + ++ reset_inactivity_timer(kbd); ++ + if (type == EV_KEY && code == KEY_FN && appletb_tb_fn_toggle) { + if (value == 1) { + kbd->saved_mode = kbd->current_mode; @@ -23408,6 +21504,9 @@ index 000000000000..442c4d8848df + if (id->driver_info == APPLETB_DEVID_KEYBOARD) { + handle = &kbd->kbd_handle; + handle->name = "tbkbd"; ++ } else if (id->driver_info == APPLETB_DEVID_TRACKPAD) { ++ handle = &kbd->tpd_handle; ++ handle->name = "tbtpd"; + } else { + return -ENOENT; + } @@ -23464,9 +21563,8 @@ index 000000000000..442c4d8848df + + sparse_keymap_setup(input, appletb_kbd_keymap, NULL); + -+ for (idx = 0; appletb_kbd_keymap[idx].type != KE_END; idx++) { ++ for (idx = 0; appletb_kbd_keymap[idx].type != KE_END; idx++) + input_set_capability(input, EV_KEY, appletb_kbd_keymap[idx].code); -+ } + + return 0; +} @@ -23481,6 +21579,15 @@ index 000000000000..442c4d8848df + .keybit = { [BIT_WORD(KEY_FN)] = BIT_MASK(KEY_FN) }, + .driver_info = APPLETB_DEVID_KEYBOARD, + }, ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_BUS | ++ INPUT_DEVICE_ID_MATCH_VENDOR | ++ INPUT_DEVICE_ID_MATCH_KEYBIT, ++ .bustype = BUS_USB, ++ .vendor = USB_VENDOR_ID_APPLE, ++ .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, ++ .driver_info = APPLETB_DEVID_TRACKPAD, ++ }, + { } +}; + @@ -23537,6 +21644,15 @@ index 000000000000..442c4d8848df + goto stop_hw; + } + ++ kbd->backlight_dev = backlight_device_get_by_name("appletb_backlight"); ++ if (!kbd->backlight_dev) ++ dev_err_probe(dev, ret, "Failed to get backlight device\n"); ++ else { ++ backlight_device_set_brightness(kbd->backlight_dev, 2); ++ timer_setup(&kbd->inactivity_timer, appletb_inactivity_timer, 0); ++ mod_timer(&kbd->inactivity_timer, jiffies + msecs_to_jiffies(appletb_tb_dim_timeout * 1000)); ++ } ++ + kbd->inp_handler.event = appletb_kbd_inp_event; + kbd->inp_handler.connect = appletb_kbd_inp_connect; + kbd->inp_handler.disconnect = appletb_kbd_inp_disconnect; @@ -23575,6 +21691,7 @@ index 000000000000..442c4d8848df + appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); + + input_unregister_handler(&kbd->inp_handler); ++ del_timer_sync(&kbd->inactivity_timer); + + hid_hw_close(hdev); + hid_hw_stop(hdev); @@ -23623,15 +21740,18 @@ index 000000000000..442c4d8848df +}; +module_hid_driver(appletb_kbd_hid_driver); + ++/* The backlight driver should be loaded before the keyboard driver is initialised*/ ++MODULE_SOFTDEP("pre: hid_appletb_bl"); ++ +MODULE_AUTHOR("Ronald Tschalär"); +MODULE_AUTHOR("Kerem Karabay "); +MODULE_DESCRIPTION("MacBookPro Touch Bar Keyboard Mode Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index e936019d21fe..0d5382e965de 100644 +index 785743036647..2c3845cbb451 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c -@@ -72,6 +72,7 @@ MODULE_LICENSE("GPL"); +@@ -73,6 +73,7 @@ MODULE_LICENSE("GPL"); #define MT_QUIRK_FORCE_MULTI_INPUT BIT(20) #define MT_QUIRK_DISABLE_WAKEUP BIT(21) #define MT_QUIRK_ORIENTATION_INVERT BIT(22) @@ -23639,7 +21759,7 @@ index e936019d21fe..0d5382e965de 100644 #define MT_INPUTMODE_TOUCHSCREEN 0x02 #define MT_INPUTMODE_TOUCHPAD 0x03 -@@ -145,6 +146,7 @@ struct mt_class { +@@ -153,6 +154,7 @@ struct mt_class { __s32 sn_height; /* Signal/noise ratio for height events */ __s32 sn_pressure; /* Signal/noise ratio for pressure events */ __u8 maxcontacts; @@ -23647,7 +21767,7 @@ index e936019d21fe..0d5382e965de 100644 bool is_indirect; /* true for touchpads */ bool export_all_inputs; /* do not ignore mouse, keyboards, etc... */ }; -@@ -212,6 +214,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); +@@ -220,6 +222,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); #define MT_CLS_GOOGLE 0x0111 #define MT_CLS_RAZER_BLADE_STEALTH 0x0112 #define MT_CLS_SMART_TECH 0x0113 @@ -23655,7 +21775,7 @@ index e936019d21fe..0d5382e965de 100644 #define MT_CLS_SIS 0x0457 #define MT_DEFAULT_MAXCONTACT 10 -@@ -397,6 +400,13 @@ static const struct mt_class mt_classes[] = { +@@ -405,6 +408,13 @@ static const struct mt_class mt_classes[] = { MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_SEPARATE_APP_REPORT, }, @@ -23669,7 +21789,7 @@ index e936019d21fe..0d5382e965de 100644 { .name = MT_CLS_SIS, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | MT_QUIRK_ALWAYS_VALID | -@@ -495,9 +505,6 @@ static void mt_feature_mapping(struct hid_device *hdev, +@@ -503,9 +513,6 @@ static void mt_feature_mapping(struct hid_device *hdev, if (!td->maxcontacts && field->logical_maximum <= MT_MAX_MAXCONTACT) td->maxcontacts = field->logical_maximum; @@ -23679,7 +21799,7 @@ index e936019d21fe..0d5382e965de 100644 break; case HID_DG_BUTTONTYPE: -@@ -571,13 +578,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td, +@@ -579,13 +586,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td, mt_application->application = application; INIT_LIST_HEAD(&mt_application->mt_usages); @@ -23695,7 +21815,7 @@ index e936019d21fe..0d5382e965de 100644 mt_application->mt_flags |= INPUT_MT_POINTER; td->inputmode_value = MT_INPUTMODE_TOUCHPAD; } -@@ -641,7 +648,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, +@@ -649,7 +656,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) { for (n = 0; n < field->report_count; n++) { @@ -23706,7 +21826,7 @@ index e936019d21fe..0d5382e965de 100644 rdata->is_mt_collection = true; break; } -@@ -813,6 +822,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, +@@ -821,6 +830,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, MT_STORE_FIELD(confidence_state); return 1; @@ -23722,7 +21842,7 @@ index e936019d21fe..0d5382e965de 100644 case HID_DG_TIPSWITCH: if (field->application != HID_GD_SYSTEM_MULTIAXIS) input_set_capability(hi->input, -@@ -820,6 +838,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, +@@ -828,6 +846,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, MT_STORE_FIELD(tip_state); return 1; case HID_DG_CONTACTID: @@ -23730,7 +21850,7 @@ index e936019d21fe..0d5382e965de 100644 MT_STORE_FIELD(contactid); app->touches_by_report++; return 1; -@@ -875,10 +894,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, +@@ -883,10 +902,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, case HID_DG_CONTACTMAX: /* contact max are global to the report */ return -1; @@ -23741,7 +21861,7 @@ index e936019d21fe..0d5382e965de 100644 } /* let hid-input decide for the others */ return 0; -@@ -1306,6 +1321,10 @@ static int mt_touch_input_configured(struct hid_device *hdev, +@@ -1314,6 +1329,10 @@ static int mt_touch_input_configured(struct hid_device *hdev, struct input_dev *input = hi->input; int ret; @@ -23752,7 +21872,7 @@ index e936019d21fe..0d5382e965de 100644 if (!td->maxcontacts) td->maxcontacts = MT_DEFAULT_MAXCONTACT; -@@ -1313,6 +1332,9 @@ static int mt_touch_input_configured(struct hid_device *hdev, +@@ -1321,6 +1340,9 @@ static int mt_touch_input_configured(struct hid_device *hdev, if (td->serial_maybe) mt_post_parse_default_settings(td, app); @@ -23762,7 +21882,7 @@ index e936019d21fe..0d5382e965de 100644 if (cls->is_indirect) app->mt_flags |= INPUT_MT_POINTER; -@@ -1764,6 +1786,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) +@@ -1770,6 +1792,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) } } @@ -23778,7 +21898,7 @@ index e936019d21fe..0d5382e965de 100644 td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL); if (!td) { dev_err(&hdev->dev, "cannot allocate multitouch data\n"); -@@ -1811,10 +1842,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) +@@ -1817,10 +1848,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) timer_setup(&td->release_timer, mt_expired_timeout, 0); @@ -23789,7 +21909,7 @@ index e936019d21fe..0d5382e965de 100644 if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) mt_fix_const_fields(hdev, HID_DG_CONTACTID); -@@ -2299,6 +2326,11 @@ static const struct hid_device_id mt_devices[] = { +@@ -2305,6 +2332,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, @@ -26032,24 +24152,24 @@ index 1417e230edbd..e69785af8e1d 100644 * Retina MacBook Pros cannot switch the panel's AUX separately * and need eDP pre-calibration. They are distinguishable from diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig -index 3fb68d60dfc1..7337f658fe96 100644 +index 075e775d3868..e1cc0d60eeb6 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig -@@ -64,4 +64,6 @@ source "drivers/staging/fieldbus/Kconfig" +@@ -50,4 +50,6 @@ source "drivers/staging/vme_user/Kconfig" - source "drivers/staging/vme_user/Kconfig" + source "drivers/staging/gpib/Kconfig" +source "drivers/staging/apple-bce/Kconfig" + endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile -index c977aa13fad4..241ea7562045 100644 +index e681e403509c..4045c588b3b4 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile -@@ -21,3 +21,4 @@ obj-$(CONFIG_GREYBUS) += greybus/ +@@ -14,3 +14,4 @@ obj-$(CONFIG_GREYBUS) += greybus/ obj-$(CONFIG_BCM2835_VCHIQ) += vc04_services/ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ - obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ + obj-$(CONFIG_GPIB) += gpib/ +obj-$(CONFIG_APPLE_BCE) += apple-bce/ diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig new file mode 100644 @@ -31884,10 +30004,10 @@ index 428d81afe215..aa1604d92c1a 100644 const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state); diff --git a/lib/test_printf.c b/lib/test_printf.c -index 8448b6d02bd9..f63591b3ee69 100644 +index 59dbe4f9a4cb..6fc82cb0b4cf 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c -@@ -719,18 +719,26 @@ static void __init fwnode_pointer(void) +@@ -779,18 +779,26 @@ static void __init fwnode_pointer(void) static void __init fourcc_pointer(void) { struct { @@ -31921,10 +30041,10 @@ index 8448b6d02bd9..f63591b3ee69 100644 static void __init diff --git a/lib/vsprintf.c b/lib/vsprintf.c -index c5e2ec9303c5..874e3af8104c 100644 +index 9d3dac38a3f4..17926ad7863c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c -@@ -1760,27 +1760,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, +@@ -1795,27 +1795,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, char output[sizeof("0123 little-endian (0x01234567)")]; char *p = output; unsigned int i; @@ -31981,7 +30101,7 @@ index c5e2ec9303c5..874e3af8104c 100644 *p++ = ' '; *p++ = '('; -@@ -2334,6 +2357,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr); +@@ -2379,6 +2402,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr); * read the documentation (path below) first. * - 'NF' For a netdev_features_t * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value. @@ -31990,7 +30110,7 @@ index c5e2ec9303c5..874e3af8104c 100644 * a certain separator (' ' by default): * C colon diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl -index b03d526e4c45..66d09cbec5a8 100755 +index 9eed3683ad76..7ddbf75f4c26 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6912,7 +6912,7 @@ sub process { @@ -32005,10 +30125,10 @@ index b03d526e4c45..66d09cbec5a8 100755 -- 2.48.0.rc1 -From b5fd8ae7f5b5a06d48fed1a23aa1aa147fbdcb66 Mon Sep 17 00:00:00 2001 +From 6f96c228cd968c7f47eb90d9e7ad6d679bf5a7f0 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Thu, 2 Jan 2025 12:36:58 +0100 -Subject: [PATCH 13/13] zstd +Date: Mon, 20 Jan 2025 13:23:20 +0100 +Subject: [PATCH 12/12] zstd Signed-off-by: Peter Jung ---