From c348d3c2caf426969c51c8f2f737d7921f613c19 Mon Sep 17 00:00:00 2001 From: ferrreo Date: Thu, 27 Apr 2023 18:06:03 +0100 Subject: [PATCH] Update all patch from cachy --- patches/0001-cachy-all.patch | 15038 ++------------------------------- 1 file changed, 800 insertions(+), 14238 deletions(-) diff --git a/patches/0001-cachy-all.patch b/patches/0001-cachy-all.patch index 87683ef..b307560 100644 --- a/patches/0001-cachy-all.patch +++ b/patches/0001-cachy-all.patch @@ -1,7 +1,7 @@ -From 9a8d83b362088f3eca0fbde0b2cc1a66ee142103 Mon Sep 17 00:00:00 2001 +From a2522409b71cfd3a4f7fc95effca4c322adaf7b0 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 6 Mar 2023 18:43:03 +0100 -Subject: [PATCH 01/12] bbr2 +Subject: [PATCH 1/8] bbr2 Signed-off-by: Peter Jung --- @@ -3281,12 +3281,12 @@ index cb79127f45c3..70e4de876a7f 100644 event = icsk->icsk_pending; -- -2.40.0 +2.40.1 -From 44f2b9d76af75aab59d14b879403aa02cecb2b32 Mon Sep 17 00:00:00 2001 +From 0d9e557b60746641c464bab65aae86fd78cb9024 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 17 Apr 2023 18:21:50 +0200 -Subject: [PATCH 02/12] bfq +Subject: [PATCH 2/8] bfq Signed-off-by: Peter Jung --- @@ -3327,75 +3327,81 @@ index d9ed3108c17a..66146bbcd4af 100644 slab_kill: -- -2.40.0 +2.40.1 -From f96cbff0d52f3343956e5fb0f8e481ac33ad47fa Mon Sep 17 00:00:00 2001 +From 7b6e9ae435973f69a18f51d226879b128fa6026f Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Sat, 22 Apr 2023 11:43:07 +0200 -Subject: [PATCH 03/12] cachy +Date: Wed, 26 Apr 2023 22:04:07 +0200 +Subject: [PATCH 3/8] cachy Signed-off-by: Peter Jung --- - .gitignore | 1 + - .../admin-guide/kernel-parameters.txt | 9 + - Documentation/dontdiff | 1 + - Makefile | 8 +- - arch/arc/configs/axs101_defconfig | 1 + - arch/arc/configs/axs103_defconfig | 1 + - arch/arc/configs/axs103_smp_defconfig | 1 + - arch/arc/configs/haps_hs_defconfig | 1 + - arch/arc/configs/haps_hs_smp_defconfig | 1 + - arch/arc/configs/hsdk_defconfig | 1 + - arch/arc/configs/nsim_700_defconfig | 1 + - arch/arc/configs/nsimosci_defconfig | 1 + - arch/arc/configs/nsimosci_hs_defconfig | 1 + - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 + - arch/arc/configs/tb10x_defconfig | 1 + - arch/arc/configs/vdk_hs38_defconfig | 1 + - arch/arc/configs/vdk_hs38_smp_defconfig | 1 + - arch/x86/Kconfig.cpu | 416 ++- - arch/x86/Makefile | 45 +- - arch/x86/Makefile.postlink | 41 + - arch/x86/boot/compressed/.gitignore | 1 - - arch/x86/boot/compressed/Makefile | 10 +- - arch/x86/include/asm/vermagic.h | 72 + - drivers/Makefile | 15 +- - drivers/i2c/busses/Kconfig | 9 + - drivers/i2c/busses/Makefile | 1 + - drivers/i2c/busses/i2c-nct6775.c | 647 ++++ - drivers/i2c/busses/i2c-piix4.c | 4 +- - drivers/md/dm-crypt.c | 5 + - drivers/pci/quirks.c | 101 + - drivers/platform/x86/Kconfig | 24 + - drivers/platform/x86/Makefile | 4 + - drivers/platform/x86/legion-laptop.c | 2783 +++++++++++++++++ - drivers/platform/x86/steamdeck.c | 523 ++++ - include/linux/pagemap.h | 2 +- - include/linux/user_namespace.h | 4 + - include/net/netns/ipv4.h | 1 + - include/trace/events/tcp.h | 7 + - init/Kconfig | 39 + - kernel/Kconfig.hz | 24 + - kernel/fork.c | 14 + - kernel/module/Kconfig | 25 + - kernel/sched/fair.c | 20 +- - kernel/sysctl.c | 12 + - kernel/user_namespace.c | 7 + - mm/Kconfig | 2 +- - mm/compaction.c | 4 + - mm/page-writeback.c | 8 + - mm/swap.c | 5 + - mm/vmpressure.c | 4 + - mm/vmscan.c | 8 + - net/ipv4/sysctl_net_ipv4.c | 7 + - net/ipv4/tcp_input.c | 36 + - net/ipv4/tcp_ipv4.c | 2 + - scripts/Makefile.lib | 13 +- - scripts/Makefile.modinst | 7 +- - 56 files changed, 4938 insertions(+), 46 deletions(-) + .gitignore | 1 + + .../admin-guide/kernel-parameters.txt | 12 + + Documentation/dontdiff | 1 + + Makefile | 8 +- + arch/arc/configs/axs101_defconfig | 1 + + arch/arc/configs/axs103_defconfig | 1 + + arch/arc/configs/axs103_smp_defconfig | 1 + + arch/arc/configs/haps_hs_defconfig | 1 + + arch/arc/configs/haps_hs_smp_defconfig | 1 + + arch/arc/configs/hsdk_defconfig | 1 + + arch/arc/configs/nsim_700_defconfig | 1 + + arch/arc/configs/nsimosci_defconfig | 1 + + arch/arc/configs/nsimosci_hs_defconfig | 1 + + arch/arc/configs/nsimosci_hs_smp_defconfig | 1 + + arch/arc/configs/tb10x_defconfig | 1 + + arch/arc/configs/vdk_hs38_defconfig | 1 + + arch/arc/configs/vdk_hs38_smp_defconfig | 1 + + arch/x86/Kconfig.cpu | 416 ++++++++++- + arch/x86/Makefile | 45 +- + arch/x86/Makefile.postlink | 41 ++ + arch/x86/boot/compressed/.gitignore | 1 - + arch/x86/boot/compressed/Makefile | 10 +- + arch/x86/include/asm/pci.h | 6 + + arch/x86/include/asm/vermagic.h | 72 ++ + arch/x86/pci/common.c | 7 +- + drivers/Makefile | 15 +- + drivers/ata/ahci.c | 23 +- + drivers/cpufreq/Kconfig.x86 | 2 - + drivers/cpufreq/intel_pstate.c | 2 + + drivers/i2c/busses/Kconfig | 9 + + drivers/i2c/busses/Makefile | 1 + + drivers/i2c/busses/i2c-nct6775.c | 647 ++++++++++++++++++ + drivers/i2c/busses/i2c-piix4.c | 4 +- + drivers/md/dm-crypt.c | 5 + + drivers/pci/controller/Makefile | 6 + + drivers/pci/controller/intel-nvme-remap.c | 462 +++++++++++++ + drivers/pci/quirks.c | 101 +++ + drivers/platform/x86/Kconfig | 14 + + drivers/platform/x86/Makefile | 3 + + drivers/platform/x86/steamdeck.c | 523 ++++++++++++++ + include/linux/pagemap.h | 2 +- + include/linux/user_namespace.h | 4 + + include/net/netns/ipv4.h | 1 + + include/trace/events/tcp.h | 7 + + init/Kconfig | 39 ++ + kernel/Kconfig.hz | 24 + + kernel/fork.c | 14 + + kernel/module/Kconfig | 25 + + kernel/sched/fair.c | 20 +- + kernel/sysctl.c | 12 + + kernel/user_namespace.c | 7 + + mm/Kconfig | 2 +- + mm/compaction.c | 4 + + mm/page-writeback.c | 8 + + mm/swap.c | 5 + + mm/vmpressure.c | 4 + + mm/vmscan.c | 8 + + net/ipv4/sysctl_net_ipv4.c | 7 + + net/ipv4/tcp_input.c | 36 + + net/ipv4/tcp_ipv4.c | 2 + + scripts/Makefile.lib | 13 +- + scripts/Makefile.modinst | 7 +- + 62 files changed, 2637 insertions(+), 64 deletions(-) create mode 100644 arch/x86/Makefile.postlink create mode 100644 drivers/i2c/busses/i2c-nct6775.c - create mode 100644 drivers/platform/x86/legion-laptop.c + create mode 100644 drivers/pci/controller/intel-nvme-remap.c create mode 100644 drivers/platform/x86/steamdeck.c diff --git a/.gitignore b/.gitignore @@ -3411,10 +3417,20 @@ index 70ec6037fa7a..9bafd3c6bb5f 100644 /vmlinux-gdb.py /vmlinuz diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 7016cb12dc4e..736233f95d59 100644 +index 7016cb12dc4e..97303fa40350 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -4190,6 +4190,15 @@ +@@ -2084,6 +2084,9 @@ + disable + Do not enable intel_pstate as the default + scaling driver for the supported processors ++ enable ++ Enable intel_pstate in-case "disable" was passed ++ previously in the kernel boot parameters + passive + Use intel_pstate as a scaling driver, but configure it + to work with generic cpufreq governors (instead of +@@ -4190,6 +4193,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. @@ -4291,6 +4307,30 @@ index 6b6cfe607bdb..19d1fb601796 100644 $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) +diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h +index b40c462b4af3..c4e66e60d559 100644 +--- a/arch/x86/include/asm/pci.h ++++ b/arch/x86/include/asm/pci.h +@@ -27,6 +27,7 @@ struct pci_sysdata { + #if IS_ENABLED(CONFIG_VMD) + struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ + #endif ++ struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */ + }; + + extern int pci_routeirq; +@@ -70,6 +71,11 @@ static inline bool is_vmd(struct pci_bus *bus) + #define is_vmd(bus) false + #endif /* CONFIG_VMD */ + ++static inline bool is_nvme_remap(struct pci_bus *bus) ++{ ++ return to_pci_sysdata(bus)->nvme_remap_dev != NULL; ++} ++ + /* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec3..18021e8c0c28 100644 --- a/arch/x86/include/asm/vermagic.h @@ -4381,6 +4421,28 @@ index 75884d2cdec3..18021e8c0c28 100644 #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE +diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c +index ddb798603201..7c20387d8202 100644 +--- a/arch/x86/pci/common.c ++++ b/arch/x86/pci/common.c +@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void) + return 0; + } + +-#if IS_ENABLED(CONFIG_VMD) + struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) + { ++#if IS_ENABLED(CONFIG_VMD) + if (is_vmd(dev->bus)) + return to_pci_sysdata(dev->bus)->vmd_dev; ++#endif ++ ++ if (is_nvme_remap(dev->bus)) ++ return to_pci_sysdata(dev->bus)->nvme_remap_dev; + + return dev; + } +-#endif diff --git a/drivers/Makefile b/drivers/Makefile index 20b118dca999..c19dee206e53 100644 --- a/drivers/Makefile @@ -4416,6 +4478,95 @@ index 20b118dca999..c19dee206e53 100644 obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ +diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c +index 14a1c0d14916..7f5a77ddc7d4 100644 +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -1522,7 +1522,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) + } + #endif + +-static void ahci_remap_check(struct pci_dev *pdev, int bar, ++static int ahci_remap_check(struct pci_dev *pdev, int bar, + struct ahci_host_priv *hpriv) + { + int i; +@@ -1535,7 +1535,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, + pci_resource_len(pdev, bar) < SZ_512K || + bar != AHCI_PCI_BAR_STANDARD || + !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) +- return; ++ return 0; + + cap = readq(hpriv->mmio + AHCI_REMAP_CAP); + for (i = 0; i < AHCI_MAX_REMAP; i++) { +@@ -1550,18 +1550,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, + } + + if (!hpriv->remapped_nvme) +- return; +- +- dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", +- hpriv->remapped_nvme); +- dev_warn(&pdev->dev, +- "Switch your BIOS from RAID to AHCI mode to use them.\n"); ++ return 0; + +- /* +- * Don't rely on the msi-x capability in the remap case, +- * share the legacy interrupt across ahci and remapped devices. +- */ +- hpriv->flags |= AHCI_HFLAG_NO_MSI; ++ /* Abort probe, allowing intel-nvme-remap to step in when available */ ++ dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n"); ++ return -ENODEV; + } + + static int ahci_get_irq_vector(struct ata_host *host, int port) +@@ -1781,7 +1774,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; + + /* detect remapped nvme devices */ +- ahci_remap_check(pdev, ahci_pci_bar, hpriv); ++ rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv); ++ if (rc) ++ return rc; + + sysfs_add_file_to_group(&pdev->dev.kobj, + &dev_attr_remapped_nvme.attr, +diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 +index 00476e94db90..c3a219218fac 100644 +--- a/drivers/cpufreq/Kconfig.x86 ++++ b/drivers/cpufreq/Kconfig.x86 +@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE + select ACPI_PROCESSOR if ACPI + select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO + select CPU_FREQ_GOV_PERFORMANCE +- select CPU_FREQ_GOV_SCHEDUTIL if SMP + help + This driver provides a P state for Intel core processors. + The driver implements an internal governor and will become +@@ -39,7 +38,6 @@ config X86_AMD_PSTATE + depends on X86 && ACPI + select ACPI_PROCESSOR + select ACPI_CPPC_LIB if X86_64 +- select CPU_FREQ_GOV_SCHEDUTIL if SMP + help + This driver adds a CPUFreq driver which utilizes a fine grain + processor performance frequency control range instead of legacy +diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c +index 48a4613cef1e..ad9414c32060 100644 +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -3491,6 +3491,8 @@ static int __init intel_pstate_setup(char *str) + + if (!strcmp(str, "disable")) + no_load = 1; ++ else if (!strcmp(str, "enable")) ++ no_load = 0; + else if (!strcmp(str, "active")) + default_driver = &intel_pstate; + else if (!strcmp(str, "passive")) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 25eb4e8fd22f..2f95d74ad0b4 100644 --- a/drivers/i2c/busses/Kconfig @@ -5135,6 +5286,489 @@ index 3ba53dc3cc3f..0fde1b3ced78 100644 ret = crypt_ctr_cipher(ti, argv[0], argv[1]); if (ret < 0) goto bad; +diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile +index 37c8663de7fe..897d19f92ede 100644 +--- a/drivers/pci/controller/Makefile ++++ b/drivers/pci/controller/Makefile +@@ -1,4 +1,10 @@ + # SPDX-License-Identifier: GPL-2.0 ++ifdef CONFIG_X86_64 ++ifdef CONFIG_SATA_AHCI ++obj-y += intel-nvme-remap.o ++endif ++endif ++ + obj-$(CONFIG_PCIE_CADENCE) += cadence/ + obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o + obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o +diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c +new file mode 100644 +index 000000000000..e105e6f5cc91 +--- /dev/null ++++ b/drivers/pci/controller/intel-nvme-remap.c +@@ -0,0 +1,462 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Intel remapped NVMe device support. ++ * ++ * Copyright (c) 2019 Endless Mobile, Inc. ++ * Author: Daniel Drake ++ * ++ * Some products ship by default with the SATA controller in "RAID" or ++ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this ++ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe ++ * devices disappear from the PCI bus, and instead their I/O memory becomes ++ * available within the AHCI device BARs. ++ * ++ * This scheme is understood to be a way of avoiding usage of the standard ++ * Windows NVMe driver under that OS, instead mandating usage of Intel's ++ * driver instead, which has better power management, and presumably offers ++ * some RAID/disk-caching solutions too. ++ * ++ * Here in this driver, we support the remapped NVMe mode by claiming the ++ * AHCI device and creating a fake PCIe root port. On the new bus, the ++ * original AHCI device is exposed with only minor tweaks. Then, fake PCI ++ * devices corresponding to the remapped NVMe devices are created. The usual ++ * ahci and nvme drivers are then expected to bind to these devices and ++ * operate as normal. ++ * ++ * The PCI configuration space for the NVMe devices is completely ++ * unavailable, so we fake a minimal one and hope for the best. ++ * ++ * Interrupts are shared between the AHCI and NVMe devices. For simplicity, ++ * we only support the legacy interrupt here, although MSI support ++ * could potentially be added later. ++ */ ++ ++#define MODULE_NAME "intel-nvme-remap" ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AHCI_PCI_BAR_STANDARD 5 ++ ++struct nvme_remap_dev { ++ struct pci_dev *dev; /* AHCI device */ ++ struct pci_bus *bus; /* our fake PCI bus */ ++ struct pci_sysdata sysdata; ++ int irq_base; /* our fake interrupts */ ++ ++ /* ++ * When we detect an all-ones write to a BAR register, this flag ++ * is set, so that we return the BAR size on the next read (a ++ * standard PCI behaviour). ++ * This includes the assumption that an all-ones BAR write is ++ * immediately followed by a read of the same register. ++ */ ++ bool bar_sizing; ++ ++ /* ++ * Resources copied from the AHCI device, to be regarded as ++ * resources on our fake bus. ++ */ ++ struct resource ahci_resources[PCI_NUM_RESOURCES]; ++ ++ /* Resources corresponding to the NVMe devices. */ ++ struct resource remapped_dev_mem[AHCI_MAX_REMAP]; ++ ++ /* Number of remapped NVMe devices found. */ ++ int num_remapped_devices; ++}; ++ ++static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus) ++{ ++ return container_of(bus->sysdata, struct nvme_remap_dev, sysdata); ++} ++ ++ ++/******** PCI configuration space **********/ ++ ++/* ++ * Helper macros for tweaking returned contents of PCI configuration space. ++ * ++ * value contains len bytes of data read from reg. ++ * If fixup_reg is included in that range, fix up the contents of that ++ * register to fixed_value. ++ */ ++#define NR_FIX8(fixup_reg, fixed_value) do { \ ++ if (reg <= fixup_reg && fixup_reg < reg + len) \ ++ ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \ ++ } while (0) ++ ++#define NR_FIX16(fixup_reg, fixed_value) do { \ ++ NR_FIX8(fixup_reg, fixed_value); \ ++ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ ++ } while (0) ++ ++#define NR_FIX24(fixup_reg, fixed_value) do { \ ++ NR_FIX8(fixup_reg, fixed_value); \ ++ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ ++ NR_FIX8(fixup_reg + 2, fixed_value >> 16); \ ++ } while (0) ++ ++#define NR_FIX32(fixup_reg, fixed_value) do { \ ++ NR_FIX16(fixup_reg, (u16) fixed_value); \ ++ NR_FIX16(fixup_reg + 2, fixed_value >> 16); \ ++ } while (0) ++ ++/* ++ * Read PCI config space of the slot 0 (AHCI) device. ++ * We pass through the read request to the underlying device, but ++ * tweak the results in some cases. ++ */ ++static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg, ++ int len, u32 *value) ++{ ++ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); ++ struct pci_bus *ahci_dev_bus = nrdev->dev->bus; ++ int ret; ++ ++ ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn, ++ reg, len, value); ++ if (ret) ++ return ret; ++ ++ /* ++ * Adjust the device class, to prevent this driver from attempting to ++ * additionally probe the device we're simulating here. ++ */ ++ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI); ++ ++ /* ++ * Unset interrupt pin, otherwise ACPI tries to find routing ++ * info for our virtual IRQ, fails, and complains. ++ */ ++ NR_FIX8(PCI_INTERRUPT_PIN, 0); ++ ++ /* ++ * Truncate the AHCI BAR to not include the region that covers the ++ * hidden devices. This will cause the ahci driver to successfully ++ * probe th new device (instead of handing it over to this driver). ++ */ ++ if (nrdev->bar_sizing) { ++ NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1)); ++ nrdev->bar_sizing = false; ++ } ++ ++ return PCIBIOS_SUCCESSFUL; ++} ++ ++/* ++ * Read PCI config space of a remapped device. ++ * Since the original PCI config space is inaccessible, we provide a minimal, ++ * fake config space instead. ++ */ ++static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port, ++ int reg, int len, u32 *value) ++{ ++ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); ++ struct resource *remapped_mem; ++ ++ if (port > nrdev->num_remapped_devices) ++ return PCIBIOS_DEVICE_NOT_FOUND; ++ ++ *value = 0; ++ remapped_mem = &nrdev->remapped_dev_mem[port - 1]; ++ ++ /* Set a Vendor ID, otherwise Linux assumes no device is present */ ++ NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL); ++ ++ /* Always appear on & bus mastering */ ++ NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); ++ ++ /* Set class so that nvme driver probes us */ ++ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS); ++ ++ if (nrdev->bar_sizing) { ++ NR_FIX32(PCI_BASE_ADDRESS_0, ++ ~(resource_size(remapped_mem) - 1)); ++ nrdev->bar_sizing = false; ++ } else { ++ resource_size_t mem_start = remapped_mem->start; ++ ++ mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64; ++ NR_FIX32(PCI_BASE_ADDRESS_0, mem_start); ++ mem_start >>= 32; ++ NR_FIX32(PCI_BASE_ADDRESS_1, mem_start); ++ } ++ ++ return PCIBIOS_SUCCESSFUL; ++} ++ ++/* Read PCI configuration space. */ ++static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn, ++ int reg, int len, u32 *value) ++{ ++ if (PCI_SLOT(devfn) == 0) ++ return nvme_remap_pci_read_slot0(bus, reg, len, value); ++ else ++ return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn), ++ reg, len, value); ++} ++ ++/* ++ * Write PCI config space of the slot 0 (AHCI) device. ++ * Apart from the special case of BAR sizing, we disable all writes. ++ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master) ++ * that would affect the operation of the NVMe devices. ++ */ ++static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg, ++ int len, u32 value) ++{ ++ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); ++ struct pci_bus *ahci_dev_bus = nrdev->dev->bus; ++ ++ if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) { ++ /* ++ * Writing all-ones to a BAR means that the size of the ++ * memory region is being checked. Flag this so that we can ++ * reply with an appropriate size on the next read. ++ */ ++ if (value == ~0) ++ nrdev->bar_sizing = true; ++ ++ return ahci_dev_bus->ops->write(ahci_dev_bus, ++ nrdev->dev->devfn, ++ reg, len, value); ++ } ++ ++ return PCIBIOS_SET_FAILED; ++} ++ ++/* ++ * Write PCI config space of a remapped device. ++ * Since the original PCI config space is inaccessible, we reject all ++ * writes, except for the special case of BAR probing. ++ */ ++static int nvme_remap_pci_write_remapped(struct pci_bus *bus, ++ unsigned int port, ++ int reg, int len, u32 value) ++{ ++ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); ++ ++ if (port > nrdev->num_remapped_devices) ++ return PCIBIOS_DEVICE_NOT_FOUND; ++ ++ /* ++ * Writing all-ones to a BAR means that the size of the memory ++ * region is being checked. Flag this so that we can reply with ++ * an appropriate size on the next read. ++ */ ++ if (value == ~0 && reg >= PCI_BASE_ADDRESS_0 ++ && reg <= PCI_BASE_ADDRESS_5) { ++ nrdev->bar_sizing = true; ++ return PCIBIOS_SUCCESSFUL; ++ } ++ ++ return PCIBIOS_SET_FAILED; ++} ++ ++/* Write PCI configuration space. */ ++static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn, ++ int reg, int len, u32 value) ++{ ++ if (PCI_SLOT(devfn) == 0) ++ return nvme_remap_pci_write_slot0(bus, reg, len, value); ++ else ++ return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn), ++ reg, len, value); ++} ++ ++static struct pci_ops nvme_remap_pci_ops = { ++ .read = nvme_remap_pci_read, ++ .write = nvme_remap_pci_write, ++}; ++ ++ ++/******** Initialization & exit **********/ ++ ++/* ++ * Find a PCI domain ID to use for our fake bus. ++ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits). ++ */ ++static int find_free_domain(void) ++{ ++ int domain = 0xffff; ++ struct pci_bus *bus = NULL; ++ ++ while ((bus = pci_find_next_bus(bus)) != NULL) ++ domain = max_t(int, domain, pci_domain_nr(bus)); ++ ++ return domain + 1; ++} ++ ++static int find_remapped_devices(struct nvme_remap_dev *nrdev, ++ struct list_head *resources) ++{ ++ void __iomem *mmio; ++ int i, count = 0; ++ u32 cap; ++ ++ mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD, ++ pci_resource_len(nrdev->dev, ++ AHCI_PCI_BAR_STANDARD)); ++ if (!mmio) ++ return -ENODEV; ++ ++ /* Check if this device might have remapped nvme devices. */ ++ if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K || ++ !(readl(mmio + AHCI_VSCAP) & 1)) ++ return -ENODEV; ++ ++ cap = readq(mmio + AHCI_REMAP_CAP); ++ for (i = AHCI_MAX_REMAP-1; i >= 0; i--) { ++ struct resource *remapped_mem; ++ ++ if ((cap & (1 << i)) == 0) ++ continue; ++ if (readl(mmio + ahci_remap_dcc(i)) ++ != PCI_CLASS_STORAGE_EXPRESS) ++ continue; ++ ++ /* We've found a remapped device */ ++ remapped_mem = &nrdev->remapped_dev_mem[count++]; ++ remapped_mem->start = ++ pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD) ++ + ahci_remap_base(i); ++ remapped_mem->end = remapped_mem->start ++ + AHCI_REMAP_N_SIZE - 1; ++ remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; ++ pci_add_resource(resources, remapped_mem); ++ } ++ ++ pcim_iounmap(nrdev->dev, mmio); ++ ++ if (count == 0) ++ return -ENODEV; ++ ++ nrdev->num_remapped_devices = count; ++ dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n", ++ nrdev->num_remapped_devices); ++ return 0; ++} ++ ++static void nvme_remap_remove_root_bus(void *data) ++{ ++ struct pci_bus *bus = data; ++ ++ pci_stop_root_bus(bus); ++ pci_remove_root_bus(bus); ++} ++ ++static int nvme_remap_probe(struct pci_dev *dev, ++ const struct pci_device_id *id) ++{ ++ struct nvme_remap_dev *nrdev; ++ LIST_HEAD(resources); ++ int i; ++ int ret; ++ struct pci_dev *child; ++ ++ nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL); ++ nrdev->sysdata.domain = find_free_domain(); ++ nrdev->sysdata.nvme_remap_dev = dev; ++ nrdev->dev = dev; ++ pci_set_drvdata(dev, nrdev); ++ ++ ret = pcim_enable_device(dev); ++ if (ret < 0) ++ return ret; ++ ++ pci_set_master(dev); ++ ++ ret = find_remapped_devices(nrdev, &resources); ++ if (ret) ++ return ret; ++ ++ /* Add resources from the original AHCI device */ ++ for (i = 0; i < PCI_NUM_RESOURCES; i++) { ++ struct resource *res = &dev->resource[i]; ++ ++ if (res->start) { ++ struct resource *nr_res = &nrdev->ahci_resources[i]; ++ ++ nr_res->start = res->start; ++ nr_res->end = res->end; ++ nr_res->flags = res->flags; ++ pci_add_resource(&resources, nr_res); ++ } ++ } ++ ++ /* Create virtual interrupts */ ++ nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0, ++ nrdev->num_remapped_devices + 1, ++ 0); ++ if (nrdev->irq_base < 0) ++ return nrdev->irq_base; ++ ++ /* Create and populate PCI bus */ ++ nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops, ++ &nrdev->sysdata, &resources); ++ if (!nrdev->bus) ++ return -ENODEV; ++ ++ if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus, ++ nrdev->bus)) ++ return -ENOMEM; ++ ++ /* We don't support sharing MSI interrupts between these devices */ ++ nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; ++ ++ pci_scan_child_bus(nrdev->bus); ++ ++ list_for_each_entry(child, &nrdev->bus->devices, bus_list) { ++ /* ++ * Prevent PCI core from trying to move memory BARs around. ++ * The hidden NVMe devices are at fixed locations. ++ */ ++ for (i = 0; i < PCI_NUM_RESOURCES; i++) { ++ struct resource *res = &child->resource[i]; ++ ++ if (res->flags & IORESOURCE_MEM) ++ res->flags |= IORESOURCE_PCI_FIXED; ++ } ++ ++ /* Share the legacy IRQ between all devices */ ++ child->irq = dev->irq; ++ } ++ ++ pci_assign_unassigned_bus_resources(nrdev->bus); ++ pci_bus_add_devices(nrdev->bus); ++ ++ return 0; ++} ++ ++static const struct pci_device_id nvme_remap_ids[] = { ++ /* ++ * Match all Intel RAID controllers. ++ * ++ * There's overlap here with the set of devices detected by the ahci ++ * driver, but ahci will only successfully probe when there ++ * *aren't* any remapped NVMe devices, and this driver will only ++ * successfully probe when there *are* remapped NVMe devices that ++ * need handling. ++ */ ++ { ++ PCI_VDEVICE(INTEL, PCI_ANY_ID), ++ .class = PCI_CLASS_STORAGE_RAID << 8, ++ .class_mask = 0xffffff00, ++ }, ++ {0,} ++}; ++MODULE_DEVICE_TABLE(pci, nvme_remap_ids); ++ ++static struct pci_driver nvme_remap_drv = { ++ .name = MODULE_NAME, ++ .id_table = nvme_remap_ids, ++ .probe = nvme_remap_probe, ++}; ++module_pci_driver(nvme_remap_drv); ++ ++MODULE_AUTHOR("Daniel Drake "); ++MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 44cab813bf95..25edf55de985 100644 --- a/drivers/pci/quirks.c @@ -5255,27 +5889,10 @@ index 44cab813bf95..25edf55de985 100644 }; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig -index 4a01b315e0a9..e4a6c31a80df 100644 +index 4a01b315e0a9..e9ddf76b8b57 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig -@@ -641,6 +641,16 @@ config THINKPAD_LMI - To compile this driver as a module, choose M here: the module will - be called think-lmi. - -+config LEGION_LAPTOP -+ tristate "Lenovo Legion Laptop Extras" -+ depends on ACPI -+ depends on ACPI_WMI || ACPI_WMI = n -+ depends on HWMON || HWMON = n -+ select ACPI_PLATFORM_PROFILE -+ help -+ This is a driver for Lenovo Legion laptops and contains drivers for -+ hotkey, fan control, and power mode. -+ - source "drivers/platform/x86/intel/Kconfig" - - config MSI_LAPTOP -@@ -1099,6 +1109,20 @@ config WINMATE_FM07_KEYS +@@ -1099,6 +1099,20 @@ config WINMATE_FM07_KEYS buttons below the display. This module adds an input device that delivers key events when these buttons are pressed. @@ -5297,2813 +5914,16 @@ index 4a01b315e0a9..e4a6c31a80df 100644 config P2SB diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile -index 1d3d1b02541b..fde9a683103e 100644 +index 1d3d1b02541b..75b30a3face9 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile -@@ -66,6 +66,7 @@ obj-$(CONFIG_IDEAPAD_LAPTOP) += ideapad-laptop.o - obj-$(CONFIG_SENSORS_HDAPS) += hdaps.o - obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o - obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o -+obj-$(CONFIG_LEGION_LAPTOP) += legion-laptop.o - - # Intel - obj-y += intel/ -@@ -134,3 +135,6 @@ obj-$(CONFIG_SIEMENS_SIMATIC_IPC) += simatic-ipc.o +@@ -134,3 +134,6 @@ obj-$(CONFIG_SIEMENS_SIMATIC_IPC) += simatic-ipc.o # Winmate obj-$(CONFIG_WINMATE_FM07_KEYS) += winmate-fm07-keys.o + +# Steam Deck +obj-$(CONFIG_STEAMDECK) += steamdeck.o -diff --git a/drivers/platform/x86/legion-laptop.c b/drivers/platform/x86/legion-laptop.c -new file mode 100644 -index 000000000000..d1268d239cc5 ---- /dev/null -+++ b/drivers/platform/x86/legion-laptop.c -@@ -0,0 +1,2783 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * legion-laptop.c - Extra Lenovo Legion laptop support, in -+ * particular for fan curve control and power mode. -+ * -+ * Copyright (C) 2022 johnfan -+ * -+ * -+ * This driver might work on other Lenovo Legion models. If you -+ * want to try it you can pass force=1 as argument -+ * to the module which will force it to load even when the DMI -+ * data doesn't match the model AND FIRMWARE. -+ * -+ * Support for other hardware of this model is already partially -+ * provided by the module ideapd-laptop. -+ * -+ * The development page for this driver is located at -+ * https://github.com/johnfanv2/LenovoLegionLinux -+ * -+ * This driver exports the files: -+ * - /sys/kernel/debug/legion/fancurve (ro) -+ * The fan curve in the form stored in the firmware in an -+ * human readable table. -+ * -+ * - /sys/module/legion_laptop/drivers/platform\:legion/PNP0C09\:00/powermode (rw) -+ * 0: balanced mode (white) -+ * 1: performance mode (red) -+ * 2: quiet mode (blue) -+ * ?: custom mode (pink) -+ * -+ * NOTE: Writing to this will load the default fan curve from -+ * the firmware for this mode, so the fan curve might -+ * have to be reconfigured if needed. -+ * -+ * It implements the usual hwmon interface to monitor fan speed and temmperature -+ * and allows to set the fan curve inside the firware. -+ * -+ * - /sys/class/hwmon/X/fan1_input or /sys/class/hwmon/X/fan2_input (ro) -+ * Current fan speed of fan1/fan2. -+ * - /sys/class/hwmon/X/temp1_input (ro) -+ * - /sys/class/hwmon/X/temp2_input (ro) -+ * - /sys/class/hwmon/X/temp3_input (ro) -+ * Temperature (Celsius) of CPU, GPU, and IC used for fan control. -+ * - /sys/class/hwmon/X/pwmY_auto_pointZ_pwm (rw) -+ * PWM (0-255) of the fan at the Y-level in the fan curve -+ * - /sys/class/hwmon/X/pwmY_auto_pointZ_temp (rw) -+ * upper temperature of tempZ (CPU, GPU, or IC) at the Y-level in the fan curve -+ * - /sys/class/hwmon/X/pwmY_auto_pointZ_temp_hyst (rw) -+ * hysteris (CPU, GPU, or IC) at the Y-level in the fan curve. The lower -+ * temperatue of the level is the upper temperature minus the hysteris -+ * -+ * -+ * Credits for reverse engineering the firmware to: -+ * - David Woodhouse: heavily inspired by lenovo_laptop.c -+ * - Luke Cama: Windows version "LegionFanControl" -+ * - SmokelessCPU: reverse engineering of custom registers in EC -+ * and commincation method with EC via ports -+ * - 0x1F9F1: additional reverse engineering for complete fan curve -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("johnfan"); -+MODULE_DESCRIPTION("Lenovo Legion laptop extras"); -+ -+static bool force; -+module_param(force, bool, 0440); -+MODULE_PARM_DESC( -+ force, -+ "Force loading this module even if model or BIOS does not match."); -+ -+static bool ec_readonly; -+module_param(ec_readonly, bool, 0440); -+MODULE_PARM_DESC( -+ ec_readonly, -+ "Only read from embedded controller but do not write or change settings."); -+ -+#define LEGIONFEATURES \ -+ "fancurve powermode platformprofile platformprofilenotify minifancurve" -+ -+//Size of fancurve stored in embedded controller -+#define MAXFANCURVESIZE 10 -+ -+#define LEGION_DRVR_SHORTNAME "legion" -+#define LEGION_HWMON_NAME LEGION_DRVR_SHORTNAME "_hwmon" -+ -+/* =============================== */ -+/* Embedded Controller Description */ -+/* =============================== */ -+ -+/* The configuration and registers to access the embedded controller -+ * depending on different the version of the software on the -+ * embedded controller or and the BIOS/UEFI firmware. -+ * -+ * To control fan curve in the embedded controller (EC) one has to -+ * write to its "RAM". There are different possibilities: -+ * - EC RAM is memory mapped (write to it with ioremap) -+ * - access EC RAM via ported mapped IO (outb/inb) -+ * - access EC RAM via ACPI methods. It is only possible to write -+ * to part of it (first 0xFF bytes?) -+ * -+ * In later models the firmware directly exposes ACPI methods to -+ * set the fan curve direclty, without writing to EC RAM. This -+ * is done inside the ACPI method. -+ */ -+ -+/** -+ * Offsets for interseting values inside the EC RAM (0 = start of -+ * EC RAM. These might change depending on the software inside of -+ * the EC, which can be updated by a BIOS update from Lenovo. -+ */ -+// TODO: same order as in initialization -+struct ec_register_offsets { -+ // Super I/O Configuration Registers -+ // 7.15 General Control (GCTRL) -+ // General Control (GCTRL) -+ // (see EC Interface Registers and 6.2 Plug and Play Configuration (PNPCFG)) in datasheet -+ // note: these are in two places saved -+ // in EC Interface Registers and in super io configuraion registers -+ // Chip ID -+ u16 ECHIPID1; -+ u16 ECHIPID2; -+ // Chip Version -+ u16 ECHIPVER; -+ u16 ECDEBUG; -+ -+ // Lenovo Custom OEM extension -+ // Firmware of ITE can be extended by -+ // custom program using its own "variables" -+ // These are the offsets to these "variables" -+ u16 EXT_FAN_CUR_POINT; -+ u16 EXT_FAN_POINTS_SIZE; -+ u16 EXT_FAN1_BASE; -+ u16 EXT_FAN2_BASE; -+ u16 EXT_FAN_ACC_BASE; -+ u16 EXT_FAN_DEC_BASE; -+ u16 EXT_CPU_TEMP; -+ u16 EXT_CPU_TEMP_HYST; -+ u16 EXT_GPU_TEMP; -+ u16 EXT_GPU_TEMP_HYST; -+ u16 EXT_VRM_TEMP; -+ u16 EXT_VRM_TEMP_HYST; -+ u16 EXT_FAN1_RPM_LSB; -+ u16 EXT_FAN1_RPM_MSB; -+ u16 EXT_FAN2_RPM_LSB; -+ u16 EXT_FAN2_RPM_MSB; -+ u16 EXT_FAN1_TARGET_RPM; -+ u16 EXT_FAN2_TARGET_RPM; -+ u16 EXT_POWERMODE; -+ u16 EXT_MINIFANCURVE_ON_COOL; -+ // values -+ // 0x04: enable mini fan curve if very long on cool level -+ // - this might be due to potential temp failure -+ // - or just because really so cool -+ // 0xA0: disable it -+ u16 EXT_LOCKFANCONTROLLER; -+ u16 EXT_MAXIMUMFANSPEED; -+ u16 EXT_WHITE_KEYBOARD_BACKLIGHT; -+ u16 EXT_IC_TEMP_INPUT; -+ u16 EXT_CPU_TEMP_INPUT; -+ u16 EXT_GPU_TEMP_INPUT; -+}; -+ -+struct model_config { -+ const struct ec_register_offsets *registers; -+ bool check_embedded_controller_id; -+ u16 embedded_controller_id; -+ -+ // first addr in EC we access/scan -+ phys_addr_t memoryio_physical_ec_start; -+ size_t memoryio_size; -+ -+ // TODO: maybe use bitfield -+ bool has_minifancurve; -+}; -+ -+/* =================================== */ -+/* Coinfiguration for different models */ -+/* =================================== */ -+ -+// Idea by SmokelesssCPU (modified) -+// - all default names and register addresses are supported by datasheet -+// - register addresses for custom firmware by SmokelesssCPU -+static const struct ec_register_offsets ec_register_offsets_v0 = { -+ .ECHIPID1 = 0x2000, -+ .ECHIPID2 = 0x2001, -+ .ECHIPVER = 0x2002, -+ .ECDEBUG = 0x2003, -+ .EXT_FAN_CUR_POINT = 0xC534, -+ .EXT_FAN_POINTS_SIZE = 0xC535, -+ .EXT_FAN1_BASE = 0xC540, -+ .EXT_FAN2_BASE = 0xC550, -+ .EXT_FAN_ACC_BASE = 0xC560, -+ .EXT_FAN_DEC_BASE = 0xC570, -+ .EXT_CPU_TEMP = 0xC580, -+ .EXT_CPU_TEMP_HYST = 0xC590, -+ .EXT_GPU_TEMP = 0xC5A0, -+ .EXT_GPU_TEMP_HYST = 0xC5B0, -+ .EXT_VRM_TEMP = 0xC5C0, -+ .EXT_VRM_TEMP_HYST = 0xC5D0, -+ .EXT_FAN1_RPM_LSB = 0xC5E0, -+ .EXT_FAN1_RPM_MSB = 0xC5E1, -+ .EXT_FAN2_RPM_LSB = 0xC5E2, -+ .EXT_FAN2_RPM_MSB = 0xC5E3, -+ .EXT_MINIFANCURVE_ON_COOL = 0xC536, -+ .EXT_LOCKFANCONTROLLER = 0xc4AB, -+ .EXT_CPU_TEMP_INPUT = 0xc538, -+ .EXT_GPU_TEMP_INPUT = 0xc539, -+ .EXT_IC_TEMP_INPUT = 0xC5E8, -+ .EXT_POWERMODE = 0xc420, -+ .EXT_FAN1_TARGET_RPM = 0xc600, -+ .EXT_FAN2_TARGET_RPM = 0xc601, -+ .EXT_MAXIMUMFANSPEED = 0xBD, -+ .EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400) -+}; -+ -+static const struct model_config model_v0 = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true -+}; -+ -+static const struct model_config model_kfcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false -+}; -+ -+static const struct model_config model_hacn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false -+}; -+ -+ -+static const struct model_config model_k9cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, // or replace 0xC400 by 0x0400 ? -+ .memoryio_size = 0x300, -+ .has_minifancurve = false -+}; -+ -+ -+ -+static const struct dmi_system_id denylist[] = { {} }; -+ -+static const struct dmi_system_id optimistic_allowlist[] = { -+ { -+ // modelyear: 2021 -+ // generation: 6 -+ // name: Legion 5, Legion 5 pro, Legion 7 -+ // Family: Legion 5 15ACH6H, ... -+ .ident = "GKCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "GKCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2020 -+ .ident = "EUCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "EUCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2020 -+ .ident = "EFCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "EFCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2020 -+ .ident = "FSCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "FSCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2021 -+ .ident = "HHCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "HHCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2022 -+ .ident = "H1CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "H1CN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2022 -+ .ident = "J2CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "J2CN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2022 -+ .ident = "JUCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "JUCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2022 -+ .ident = "KFCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "KFCN"), -+ }, -+ .driver_data = (void *)&model_kfcn -+ }, -+ { -+ // modelyear: 2021 -+ .ident = "HACN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "HACN"), -+ }, -+ .driver_data = (void *)&model_hacn -+ }, -+ { -+ // modelyear: 2021 -+ .ident = "G9CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "G9CN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // modelyear: 2022 -+ .ident = "K9CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "K9CN"), -+ }, -+ .driver_data = (void *)&model_k9cn -+ }, -+ {} -+}; -+ -+/* ================================= */ -+/* ACPI access */ -+/* ================================= */ -+ -+// function from ideapad-laptop.c -+static int eval_int(acpi_handle handle, const char *name, unsigned long *res) -+{ -+ unsigned long long result; -+ acpi_status status; -+ -+ status = acpi_evaluate_integer(handle, (char *)name, NULL, &result); -+ if (ACPI_FAILURE(status)) -+ return -EIO; -+ -+ *res = result; -+ -+ return 0; -+} -+ -+// function from ideapad-laptop.c -+static int exec_simple_method(acpi_handle handle, const char *name, -+ unsigned long arg) -+{ -+ acpi_status status = -+ acpi_execute_simple_method(handle, (char *)name, arg); -+ -+ return ACPI_FAILURE(status) ? -EIO : 0; -+} -+ -+// function from ideapad-laptop.c -+static int exec_sbmc(acpi_handle handle, unsigned long arg) -+{ -+ // \_SB.PCI0.LPC0.EC0.VPC0.SBMC -+ return exec_simple_method(handle, "SBMC", arg); -+} -+ -+static int eval_qcho(acpi_handle handle, unsigned long *res) -+{ -+ // \_SB.PCI0.LPC0.EC0.QCHO -+ return eval_int(handle, "QCHO", res); -+} -+ -+/* ================================= */ -+/* EC RAM Access with port-mapped IO */ -+/* ================================= */ -+ -+/* -+ * See datasheet of e.g. IT8502E/F/G, e.g. -+ * 6.2 Plug and Play Configuration (PNPCFG) -+ * -+ * Depending on configured BARDSEL register -+ * the ports -+ * ECRAM_PORTIO_ADDR_PORT and -+ * ECRAM_PORTIO_DATA_PORT -+ * are configured. -+ * -+ * By performing IO on these ports one can -+ * read/write to registers in the EC. -+ * -+ * "To access a register of PNPCFG, write target index to -+ * address port and access this PNPCFG register via -+ * data port" [datasheet, 6.2 Plug and Play Configuration] -+ */ -+ -+// IO ports used to write to communicate with embedded controller -+// Start of used ports -+#define ECRAM_PORTIO_START_PORT 0x4E -+// Number of used ports -+#define ECRAM_PORTIO_PORTS_SIZE 2 -+// Port used to specify address in EC RAM to read/write -+// 0x4E/0x4F is the usual port for IO super controler -+// 0x2E/0x2F also common (ITE can also be configure to use these) -+#define ECRAM_PORTIO_ADDR_PORT 0x4E -+// Port to send/receive the value to write/read -+#define ECRAM_PORTIO_DATA_PORT 0x4F -+// Name used to request ports -+#define ECRAM_PORTIO_NAME "legion" -+ -+struct ecram_portio { -+ /* protects read/write to EC RAM performed -+ * as a certain sequence of outb, inb -+ * commands on the IO ports. There can -+ * be at most one. -+ */ -+ struct mutex io_port_mutex; -+}; -+ -+ssize_t ecram_portio_init(struct ecram_portio *ec_portio) -+{ -+ if (!request_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE, -+ ECRAM_PORTIO_NAME)) { -+ pr_info("Cannot init ecram_portio the %x ports starting at %x\n", -+ ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT); -+ return -ENODEV; -+ } -+ //pr_info("Reserved %x ports starting at %x\n", ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT); -+ mutex_init(&ec_portio->io_port_mutex); -+ return 0; -+} -+ -+void ecram_portio_exit(struct ecram_portio *ec_portio) -+{ -+ release_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE); -+} -+ -+/* Read a byte from the EC RAM. -+ * -+ * Return status because of commong signature for alle -+ * methods to access EC RAM. -+ */ -+ssize_t ecram_portio_read(struct ecram_portio *ec_portio, u16 offset, u8 *value) -+{ -+ mutex_lock(&ec_portio->io_port_mutex); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x11, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ // TODO: no explicit cast between types seems to be sometimes -+ // done and sometimes not -+ outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x10, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x12, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ *value = inb(ECRAM_PORTIO_DATA_PORT); -+ -+ mutex_unlock(&ec_portio->io_port_mutex); -+ return 0; -+} -+ -+/* Write a byte to the EC RAM. -+ * -+ * Return status because of commong signature for alle -+ * methods to access EC RAM. -+ */ -+ssize_t ecram_portio_write(struct ecram_portio *ec_portio, u16 offset, u8 value) -+{ -+ mutex_lock(&ec_portio->io_port_mutex); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x11, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ // TODO: no explicit cast between types seems to be sometimes -+ // done and sometimes not -+ outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x10, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x12, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ outb(value, ECRAM_PORTIO_DATA_PORT); -+ -+ mutex_unlock(&ec_portio->io_port_mutex); -+ return 0; -+} -+ -+/* =================================== */ -+/* EC RAM Access */ -+/* =================================== */ -+ -+struct ecram { -+ struct ecram_portio portio; -+}; -+ -+ssize_t ecram_init(struct ecram *ecram, phys_addr_t memoryio_ec_physical_start, -+ size_t region_size) -+{ -+ ssize_t err; -+ -+ err = ecram_portio_init(&ecram->portio); -+ if (err) { -+ pr_info("Failed ecram_portio_init\n"); -+ goto err_ecram_portio_init; -+ } -+ -+ return 0; -+ -+err_ecram_portio_init: -+ return err; -+} -+ -+void ecram_exit(struct ecram *ecram) -+{ -+ pr_info("Unloading legion ecram\n"); -+ ecram_portio_exit(&ecram->portio); -+ pr_info("Unloading legion ecram done\n"); -+} -+ -+/** -+ * ecram_offset address on the EC -+ */ -+static u8 ecram_read(struct ecram *ecram, u16 ecram_offset) -+{ -+ u8 value; -+ int err; -+ -+ err = ecram_portio_read(&ecram->portio, ecram_offset, &value); -+ if (err) -+ pr_info("Error reading EC RAM at 0x%x\n", ecram_offset); -+ return value; -+} -+ -+static void ecram_write(struct ecram *ecram, u16 ecram_offset, u8 value) -+{ -+ int err; -+ -+ if (ec_readonly) { -+ pr_info("Skipping writing EC RAM at 0x%x because readonly.\n", -+ ecram_offset); -+ return; -+ } -+ err = ecram_portio_write(&ecram->portio, ecram_offset, value); -+ if (err) -+ pr_info("Error writing EC RAM at 0x%x\n", ecram_offset); -+} -+ -+/* =============================== */ -+/* Reads from EC */ -+/* =============================== */ -+ -+u16 read_ec_id(struct ecram *ecram, const struct model_config *model) -+{ -+ u8 id1 = ecram_read(ecram, model->registers->ECHIPID1); -+ u8 id2 = ecram_read(ecram, model->registers->ECHIPID2); -+ -+ return (id1 << 8) + id2; -+} -+ -+u16 read_ec_version(struct ecram *ecram, const struct model_config *model) -+{ -+ u8 vers = ecram_read(ecram, model->registers->ECHIPVER); -+ u8 debug = ecram_read(ecram, model->registers->ECDEBUG); -+ -+ return (vers << 8) + debug; -+} -+ -+/* ============================= */ -+/* Data model for sensor values */ -+/* ============================ */ -+ -+struct sensor_values { -+ u16 fan1_rpm; // current speed in rpm of fan 1 -+ u16 fan2_rpm; // current speed in rpm of fan2 -+ u16 fan1_target_rpm; // target speed in rpm of fan 1 -+ u16 fan2_target_rpm; // target speed in rpm of fan 2 -+ u8 cpu_temp_celsius; // cpu temperature in celcius -+ u8 gpu_temp_celsius; // gpu temperature in celcius -+ u8 ic_temp_celsius; // ic temperature in celcius -+}; -+ -+enum SENSOR_ATTR { -+ SENSOR_CPU_TEMP_ID = 1, -+ SENSOR_GPU_TEMP_ID = 2, -+ SENSOR_IC_TEMP_ID = 3, -+ SENSOR_FAN1_RPM_ID = 4, -+ SENSOR_FAN2_RPM_ID = 5, -+ SENSOR_FAN1_TARGET_RPM_ID = 6, -+ SENSOR_FAN2_TARGET_RPM_ID = 7 -+}; -+ -+static int read_sensor_values(struct ecram *ecram, -+ const struct model_config *model, -+ struct sensor_values *values) -+{ -+ values->fan1_target_rpm = -+ 100 * ecram_read(ecram, model->registers->EXT_FAN1_TARGET_RPM); -+ values->fan2_target_rpm = -+ 100 * ecram_read(ecram, model->registers->EXT_FAN2_TARGET_RPM); -+ -+ values->fan1_rpm = -+ ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) + -+ (((int)ecram_read(ecram, model->registers->EXT_FAN1_RPM_MSB)) -+ << 8); -+ values->fan2_rpm = -+ ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) + -+ (((int)ecram_read(ecram, model->registers->EXT_FAN2_RPM_MSB)) -+ << 8); -+ -+ values->cpu_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_CPU_TEMP_INPUT); -+ values->gpu_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_GPU_TEMP_INPUT); -+ values->ic_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_IC_TEMP_INPUT); -+ -+ values->cpu_temp_celsius = ecram_read(ecram, 0xC5E6); -+ values->gpu_temp_celsius = ecram_read(ecram, 0xC5E7); -+ values->ic_temp_celsius = ecram_read(ecram, 0xC5E8); -+ -+ return 0; -+} -+ -+/* =============================== */ -+/* Behaviour changing functions */ -+/* =============================== */ -+ -+int read_powermode(struct ecram *ecram, const struct model_config *model) -+{ -+ return ecram_read(ecram, model->registers->EXT_POWERMODE); -+} -+ -+ssize_t write_powermode(struct ecram *ecram, const struct model_config *model, -+ u8 value) -+{ -+ if (!(value >= 0 && value <= 2)) { -+ pr_info("Unexpected power mode value ignored: %d\n", value); -+ return -ENOMEM; -+ } -+ ecram_write(ecram, model->registers->EXT_POWERMODE, value); -+ return 0; -+} -+ -+/** -+ * Shortly toggle powermode to a different mode -+ * and switch back, e.g. to reset fan curve. -+ */ -+void toggle_powermode(struct ecram *ecram, const struct model_config *model) -+{ -+ int old_powermode = read_powermode(ecram, model); -+ int next_powermode = old_powermode == 0 ? 1 : 0; -+ -+ write_powermode(ecram, model, next_powermode); -+ mdelay(1500); -+ write_powermode(ecram, model, old_powermode); -+} -+ -+#define lockfancontroller_ON 8 -+#define lockfancontroller_OFF 0 -+ -+ssize_t write_lockfancontroller(struct ecram *ecram, -+ const struct model_config *model, bool state) -+{ -+ u8 val = state ? lockfancontroller_ON : lockfancontroller_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_LOCKFANCONTROLLER, val); -+ return 0; -+} -+ -+int read_lockfancontroller(struct ecram *ecram, -+ const struct model_config *model, bool *state) -+{ -+ int value = ecram_read(ecram, model->registers->EXT_LOCKFANCONTROLLER); -+ -+ switch (value) { -+ case lockfancontroller_ON: -+ *state = true; -+ break; -+ case lockfancontroller_OFF: -+ *state = false; -+ break; -+ default: -+ pr_info("Unexpected value in lockfanspeed register:%d\n", -+ value); -+ return -1; -+ } -+ return 0; -+} -+ -+#define MAXIMUMFANSPEED_ON 0x40 -+#define MAXIMUMFANSPEED_OFF 0x00 -+ -+int read_maximumfanspeed(struct ecram *ecram, const struct model_config *model, -+ bool *state) -+{ -+ int value = ecram_read(ecram, model->registers->EXT_MAXIMUMFANSPEED); -+ -+ switch (value) { -+ case MAXIMUMFANSPEED_ON: -+ *state = true; -+ break; -+ case MAXIMUMFANSPEED_OFF: -+ *state = false; -+ break; -+ default: -+ pr_info("Unexpected value in maximumfanspeed register:%d\n", -+ value); -+ return -1; -+ } -+ return 0; -+} -+ -+ssize_t write_maximumfanspeed(struct ecram *ecram, -+ const struct model_config *model, bool state) -+{ -+ u8 val = state ? MAXIMUMFANSPEED_ON : MAXIMUMFANSPEED_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_MAXIMUMFANSPEED, val); -+ return 0; -+} -+ -+#define MINIFANCUVE_ON_COOL_ON 0x04 -+#define MINIFANCUVE_ON_COOL_OFF 0xA0 -+ -+int read_minifancurve(struct ecram *ecram, const struct model_config *model, -+ bool *state) -+{ -+ int value = -+ ecram_read(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL); -+ -+ switch (value) { -+ case MINIFANCUVE_ON_COOL_ON: -+ *state = true; -+ break; -+ case MINIFANCUVE_ON_COOL_OFF: -+ *state = false; -+ break; -+ default: -+ pr_info("Unexpected value in MINIFANCURVE register:%d\n", -+ value); -+ return -1; -+ } -+ return 0; -+} -+ -+ssize_t write_minifancurve(struct ecram *ecram, -+ const struct model_config *model, bool state) -+{ -+ u8 val = state ? MINIFANCUVE_ON_COOL_ON : MINIFANCUVE_ON_COOL_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL, val); -+ return 0; -+} -+ -+#define KEYBOARD_BACKLIGHT_OFF 18 -+#define KEYBOARD_BACKLIGHT_ON1 21 -+#define KEYBOARD_BACKLIGHT_ON2 23 -+ -+int read_keyboard_backlight(struct ecram *ecram, -+ const struct model_config *model, int *state) -+{ -+ int value = ecram_read(ecram, -+ model->registers->EXT_WHITE_KEYBOARD_BACKLIGHT); -+ -+ //switch (value) { -+ //case MINIFANCUVE_ON_COOL_ON: -+ // *state = true; -+ // break; -+ //case MINIFANCUVE_ON_COOL_OFF: -+ // *state = false; -+ // break; -+ //default: -+ // pr_info("Unexpected value in MINIFANCURVE register:%d\n", -+ // value); -+ // return -1; -+ //} -+ *state = value; -+ return 0; -+} -+ -+int write_keyboard_backlight(struct ecram *ecram, -+ const struct model_config *model, int state) -+{ -+ u8 val = state > 0 ? KEYBOARD_BACKLIGHT_ON1 : KEYBOARD_BACKLIGHT_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_WHITE_KEYBOARD_BACKLIGHT, val); -+ return 0; -+} -+ -+#define FCT_RAPID_CHARGE_ON 0x07 -+#define FCT_RAPID_CHARGE_OFF 0x08 -+#define RAPID_CHARGE_ON 0x0 -+#define RAPID_CHARGE_OFF 0x1 -+ -+int read_rapidcharge(acpi_handle acpihandle, int *state) -+{ -+ unsigned long result; -+ int err; -+ -+ err = eval_qcho(acpihandle, &result); -+ if (err) -+ return err; -+ -+ *state = result; -+ return 0; -+} -+ -+int write_rapidcharge(acpi_handle acpihandle, bool state) -+{ -+ unsigned long fct_nr = state > 0 ? FCT_RAPID_CHARGE_ON : -+ FCT_RAPID_CHARGE_OFF; -+ return exec_sbmc(acpihandle, fct_nr); -+} -+ -+/* ============================= */ -+/* Data model for fan curve */ -+/* ============================ */ -+ -+struct fancurve_point { -+ // rpm1 devided by 100 -+ u8 rpm1_raw; -+ // rpm2 devided by 100 -+ u8 rpm2_raw; -+ // >=2 , <=5 (lower is faster); must be increasing by level -+ u8 accel; -+ // >=2 , <=5 (lower is faster); must be increasing by level -+ u8 decel; -+ -+ // min must be lower or equal than max -+ // last level max must be 127 -+ // <=127 cpu max temp for this level; must be increasing by level -+ u8 cpu_max_temp_celsius; -+ // <=127 cpu min temp for this level; must be increasing by level -+ u8 cpu_min_temp_celsius; -+ // <=127 gpu min temp for this level; must be increasing by level -+ u8 gpu_max_temp_celsius; -+ // <=127 gpu max temp for this level; must be increasing by level -+ u8 gpu_min_temp_celsius; -+ // <=127 ic max temp for this level; must be increasing by level -+ u8 ic_max_temp_celsius; -+ // <=127 ic max temp for this level; must be increasing by level -+ u8 ic_min_temp_celsius; -+}; -+ -+enum FANCURVE_ATTR { -+ FANCURVE_ATTR_PWM1 = 1, -+ FANCURVE_ATTR_PWM2 = 2, -+ FANCURVE_ATTR_CPU_TEMP = 3, -+ FANCURVE_ATTR_CPU_HYST = 4, -+ FANCURVE_ATTR_GPU_TEMP = 5, -+ FANCURVE_ATTR_GPU_HYST = 6, -+ FANCURVE_ATTR_IC_TEMP = 7, -+ FANCURVE_ATTR_IC_HYST = 8, -+ FANCURVE_ATTR_ACCEL = 9, -+ FANCURVE_ATTR_DECEL = 10, -+ FANCURVE_SIZE = 11, -+ FANCURVE_MINIFANCURVE_ON_COOL = 12 -+}; -+ -+// used for clearing table entries -+static const struct fancurve_point fancurve_point_zero = { 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0 }; -+ -+struct fancurve { -+ struct fancurve_point points[MAXFANCURVESIZE]; -+ // number of points used; must be <= MAXFANCURVESIZE -+ size_t size; -+ // the point that at which fans are run currently -+ size_t current_point_i; -+}; -+ -+// calculate derived values -+ -+int fancurve_get_cpu_deltahyst(struct fancurve_point *point) -+{ -+ return ((int)point->cpu_max_temp_celsius) - -+ ((int)point->cpu_min_temp_celsius); -+} -+ -+int fancurve_get_gpu_deltahyst(struct fancurve_point *point) -+{ -+ return ((int)point->gpu_max_temp_celsius) - -+ ((int)point->gpu_min_temp_celsius); -+} -+ -+int fancurve_get_ic_deltahyst(struct fancurve_point *point) -+{ -+ return ((int)point->ic_max_temp_celsius) - -+ ((int)point->ic_min_temp_celsius); -+} -+ -+// validation functions -+ -+bool fancurve_is_valid_min_temp(int min_temp) -+{ -+ return min_temp >= 0 && min_temp <= 127; -+} -+ -+bool fancurve_is_valid_max_temp(int max_temp) -+{ -+ return max_temp >= 0 && max_temp <= 127; -+} -+ -+// setters with validation -+// - make hwmon implementation easier -+// - keep fancurve valid, otherwise EC will not properly control fan -+ -+bool fancurve_set_rpm1(struct fancurve *fancurve, int point_id, int rpm) -+{ -+ bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500); -+ -+ if (valid) -+ fancurve->points[point_id].rpm1_raw = rpm / 100; -+ return valid; -+} -+ -+bool fancurve_set_rpm2(struct fancurve *fancurve, int point_id, int rpm) -+{ -+ bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500); -+ -+ if (valid) -+ fancurve->points[point_id].rpm2_raw = rpm / 100; -+ return valid; -+} -+ -+// TODO: remove { ... } from single line if body -+ -+bool fancurve_set_accel(struct fancurve *fancurve, int point_id, int accel) -+{ -+ bool valid = accel >= 2 && accel <= 5; -+ -+ if (valid) -+ fancurve->points[point_id].accel = accel; -+ return valid; -+} -+ -+bool fancurve_set_decel(struct fancurve *fancurve, int point_id, int decel) -+{ -+ bool valid = decel >= 2 && decel <= 5; -+ -+ if (valid) -+ fancurve->points[point_id].decel = decel; -+ return valid; -+} -+ -+bool fancurve_set_cpu_temp_max(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].cpu_max_temp_celsius = value; -+ -+ return valid; -+} -+ -+bool fancurve_set_gpu_temp_max(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].gpu_max_temp_celsius = value; -+ return valid; -+} -+ -+bool fancurve_set_ic_temp_max(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].ic_max_temp_celsius = value; -+ return valid; -+} -+ -+bool fancurve_set_cpu_temp_min(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].cpu_min_temp_celsius = value; -+ return valid; -+} -+ -+bool fancurve_set_gpu_temp_min(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].gpu_min_temp_celsius = value; -+ return valid; -+} -+ -+bool fancurve_set_ic_temp_min(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].ic_min_temp_celsius = value; -+ return valid; -+} -+ -+bool fancurve_set_size(struct fancurve *fancurve, int size, bool init_values) -+{ -+ bool valid = size >= 1 && size <= MAXFANCURVESIZE; -+ -+ if (!valid) -+ return false; -+ if (init_values && size < fancurve->size) { -+ // fancurve size is decreased, but last etnry alwasy needs 127 temperatures -+ // Note: size >=1 -+ fancurve->points[size - 1].cpu_max_temp_celsius = 127; -+ fancurve->points[size - 1].ic_max_temp_celsius = 127; -+ fancurve->points[size - 1].gpu_max_temp_celsius = 127; -+ } -+ if (init_values && size > fancurve->size) { -+ // fancurve increased, so new entries need valid values -+ int i; -+ int last = fancurve->size > 0 ? fancurve->size - 1 : 0; -+ -+ for (i = fancurve->size; i < size; ++i) -+ fancurve->points[i] = fancurve->points[last]; -+ } -+ return true; -+} -+ -+/* Read the fan curve from the EC. -+ * -+ * In newer models (>=2022) there is an ACPI/WMI to read fan curve as -+ * a whole. So read/write fan table as a whole to use -+ * same interface for both cases. -+ * -+ * It reads all points from EC memory, even if stored fancurve is smaller, so -+ * it can contain 0 entries. -+ */ -+static int read_fancurve(struct ecram *ecram, const struct model_config *model, -+ struct fancurve *fancurve) -+{ -+ size_t i = 0; -+ -+ for (i = 0; i < MAXFANCURVESIZE; ++i) { -+ struct fancurve_point *point = &fancurve->points[i]; -+ -+ point->rpm1_raw = -+ ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); -+ point->rpm2_raw = -+ ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); -+ -+ point->accel = ecram_read( -+ ecram, model->registers->EXT_FAN_ACC_BASE + i); -+ point->decel = ecram_read( -+ ecram, model->registers->EXT_FAN_DEC_BASE + i); -+ point->cpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_CPU_TEMP + i); -+ point->cpu_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_CPU_TEMP_HYST + i); -+ point->gpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_GPU_TEMP + i); -+ point->gpu_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_GPU_TEMP_HYST + i); -+ point->ic_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_VRM_TEMP + i); -+ point->ic_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_VRM_TEMP_HYST + i); -+ } -+ -+ // Do not trust that hardware; It might suddendly report -+ // a larger size, so clamp it. -+ fancurve->size = -+ ecram_read(ecram, model->registers->EXT_FAN_POINTS_SIZE); -+ fancurve->size = -+ min(fancurve->size, (typeof(fancurve->size))(MAXFANCURVESIZE)); -+ fancurve->current_point_i = -+ ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); -+ fancurve->current_point_i = -+ min(fancurve->current_point_i, fancurve->size); -+ return 0; -+} -+ -+static int write_fancurve(struct ecram *ecram, const struct model_config *model, -+ const struct fancurve *fancurve, bool write_size) -+{ -+ size_t i; -+ // Reset fan update counters (try to avoid any race conditions) -+ ecram_write(ecram, 0xC5FE, 0); -+ ecram_write(ecram, 0xC5FF, 0); -+ for (i = 0; i < MAXFANCURVESIZE; ++i) { -+ // Entries for points larger than fancurve size should be cleared -+ // to 0 -+ const struct fancurve_point *point = -+ i < fancurve->size ? &fancurve->points[i] : -+ &fancurve_point_zero; -+ -+ ecram_write(ecram, model->registers->EXT_FAN1_BASE + i, -+ point->rpm1_raw); -+ ecram_write(ecram, model->registers->EXT_FAN2_BASE + i, -+ point->rpm2_raw); -+ -+ ecram_write(ecram, model->registers->EXT_FAN_ACC_BASE + i, -+ point->accel); -+ ecram_write(ecram, model->registers->EXT_FAN_DEC_BASE + i, -+ point->decel); -+ -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP + i, -+ point->cpu_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i, -+ point->cpu_min_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP + i, -+ point->gpu_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i, -+ point->gpu_min_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_VRM_TEMP + i, -+ point->ic_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_VRM_TEMP_HYST + i, -+ point->ic_min_temp_celsius); -+ } -+ -+ if (write_size) { -+ ecram_write(ecram, model->registers->EXT_FAN_POINTS_SIZE, -+ fancurve->size); -+ } -+ -+ // Reset current fan level to 0, so algorithm in EC -+ // selects fan curve point again and resetting hysterisis -+ // effects -+ ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0); -+ -+ // Reset internal fan levels -+ ecram_write(ecram, 0xC634, 0); // CPU -+ ecram_write(ecram, 0xC635, 0); // GPU -+ ecram_write(ecram, 0xC636, 0); // SENSOR -+ -+ return 0; -+} -+ -+static ssize_t fancurve_print_seqfile(const struct fancurve *fancurve, -+ struct seq_file *s) -+{ -+ int i; -+ -+ seq_printf( -+ s, -+ "rpm1|rpm2|acceleration|deceleration|cpu_min_temp|cpu_max_temp|gpu_min_temp|gpu_max_temp|ic_min_temp|ic_max_temp\n"); -+ for (i = 0; i < fancurve->size; ++i) { -+ const struct fancurve_point *point = &fancurve->points[i]; -+ -+ seq_printf( -+ s, "%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\n", -+ point->rpm1_raw * 100, point->rpm2_raw * 100, -+ point->accel, point->decel, point->cpu_min_temp_celsius, -+ point->cpu_max_temp_celsius, -+ point->gpu_min_temp_celsius, -+ point->gpu_max_temp_celsius, point->ic_min_temp_celsius, -+ point->ic_max_temp_celsius); -+ } -+ return 0; -+} -+ -+/* ============================= */ -+/* Global and shared data between */ -+/* all calls to this module */ -+/* ============================ */ -+// Implemented like ideapad-laptop.c but currenlty still -+// wihtout dynamic memory allocation (instaed global _priv) -+ -+struct legion_private { -+ struct platform_device *platform_device; -+ // TODO: remove or keep? init? -+ // struct acpi_device *adev; -+ -+ // Method to access ECRAM -+ struct ecram ecram; -+ // Configuration with registers an ECRAM access method -+ const struct model_config *conf; -+ -+ // TODO: maybe refactor an keep only local to each function -+ // last known fan curve -+ struct fancurve fancurve; -+ // configured fan curve from user space -+ struct fancurve fancurve_configured; -+ -+ // update lock, when partial values of fancurve are changed -+ struct mutex fancurve_mutex; -+ -+ //interfaces -+ struct dentry *debugfs_dir; -+ struct device *hwmon_dev; -+ struct platform_profile_handler platform_profile_handler; -+ -+ // TODO: remove? -+ bool loaded; -+}; -+ -+// shared between different drivers: WMI, platform and proteced by mutex -+static struct legion_private *legion_shared; -+static struct legion_private _priv; -+static DEFINE_MUTEX(legion_shared_mutex); -+ -+static int legion_shared_init(struct legion_private *priv) -+{ -+ int ret; -+ -+ mutex_lock(&legion_shared_mutex); -+ -+ if (!legion_shared) { -+ legion_shared = priv; -+ mutex_init(&legion_shared->fancurve_mutex); -+ ret = 0; -+ } else { -+ pr_warn("Found multiple platform devices\n"); -+ ret = -EINVAL; -+ } -+ -+ priv->loaded = true; -+ mutex_unlock(&legion_shared_mutex); -+ -+ return ret; -+} -+ -+static void legion_shared_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion shared\n"); -+ mutex_lock(&legion_shared_mutex); -+ -+ if (legion_shared == priv) -+ legion_shared = NULL; -+ -+ mutex_unlock(&legion_shared_mutex); -+ pr_info("Unloading legion shared done\n"); -+} -+ -+/* ============================= */ -+/* debugfs interface */ -+/* ============================ */ -+ -+static int debugfs_ecmemory_show(struct seq_file *s, void *unused) -+{ -+ struct legion_private *priv = s->private; -+ size_t offset; -+ -+ for (offset = 0; offset < priv->conf->memoryio_size; ++offset) { -+ char value = ecram_read(&priv->ecram, -+ priv->conf->memoryio_physical_ec_start + -+ offset); -+ -+ seq_write(s, &value, 1); -+ } -+ return 0; -+} -+ -+DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemory); -+ -+static int debugfs_fancurve_show(struct seq_file *s, void *unused) -+{ -+ struct legion_private *priv = s->private; -+ bool is_minifancurve; -+ bool is_lockfancontroller; -+ bool is_maximumfanspeed; -+ int err; -+ -+ seq_printf(s, "EC Chip ID: %x\n", read_ec_id(&priv->ecram, priv->conf)); -+ seq_printf(s, "EC Chip Version: %x\n", -+ read_ec_version(&priv->ecram, priv->conf)); -+ seq_printf(s, "legion_laptop features: %s\n", LEGIONFEATURES); -+ seq_printf(s, "legion_laptop ec_readonly: %d\n", ec_readonly); -+ read_fancurve(&priv->ecram, priv->conf, &priv->fancurve); -+ -+ seq_printf(s, "minifancurve feature enabled: %d\n", -+ priv->conf->has_minifancurve); -+ err = read_minifancurve(&priv->ecram, priv->conf, &is_minifancurve); -+ seq_printf(s, "minifancurve on cool: %s\n", -+ err ? "error" : (is_minifancurve ? "true" : "false")); -+ err = read_lockfancontroller(&priv->ecram, priv->conf, -+ &is_lockfancontroller); -+ seq_printf(s, "lock fan controller: %s\n", -+ err ? "error" : (is_lockfancontroller ? "true" : "false")); -+ err = read_maximumfanspeed(&priv->ecram, priv->conf, -+ &is_maximumfanspeed); -+ seq_printf(s, "enable maximumfanspeed: %s\n", -+ err ? "error" : (is_maximumfanspeed ? "true" : "false")); -+ seq_printf(s, "enable maximumfanspeed status: %d\n", err); -+ -+ seq_printf(s, "fan curve current point id: %ld\n", -+ priv->fancurve.current_point_i); -+ seq_printf(s, "fan curve points size: %ld\n", priv->fancurve.size); -+ -+ seq_puts(s, "Current fan curve in hardware (embedded controller):\n"); -+ fancurve_print_seqfile(&priv->fancurve, s); -+ seq_puts(s, "=====================\n"); -+ return 0; -+} -+ -+DEFINE_SHOW_ATTRIBUTE(debugfs_fancurve); -+ -+static void legion_debugfs_init(struct legion_private *priv) -+{ -+ struct dentry *dir; -+ -+ // TODO: remove this note -+ // Note: as other kernel modules, do not catch errors here -+ // because if kernel is build without debugfs this -+ // will return an error but module still has to -+ // work, just without debugfs -+ // TODO: what permissions; some modules do 400 -+ // other do 444 -+ dir = debugfs_create_dir(LEGION_DRVR_SHORTNAME, NULL); -+ debugfs_create_file("fancurve", 0444, dir, priv, -+ &debugfs_fancurve_fops); -+ debugfs_create_file("ecmemory", 0444, dir, priv, -+ &debugfs_ecmemory_fops); -+ -+ priv->debugfs_dir = dir; -+} -+ -+static void legion_debugfs_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion dubugfs\n"); -+ // The following is does nothing if pointer is NULL -+ debugfs_remove_recursive(priv->debugfs_dir); -+ priv->debugfs_dir = NULL; -+ pr_info("Unloading legion dubugfs done\n"); -+} -+ -+/* ============================= */ -+/* sysfs interface */ -+/* ============================ */ -+ -+static ssize_t powermode_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int power_mode = read_powermode(&priv->ecram, priv->conf); -+ -+ return sysfs_emit(buf, "%d\n", power_mode); -+} -+ -+static ssize_t powermode_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, -+ size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int powermode; -+ int err; -+ -+ err = kstrtouint(buf, 0, &powermode); -+ if (err) -+ return err; -+ -+ err = write_powermode(&priv->ecram, priv->conf, powermode); -+ if (err) -+ return -EINVAL; -+ -+ // TODO: better? -+ // we have to wait a bit before change is done in hardware and -+ // readback done after notifying returns correct value, otherwise -+ // the notified reader will read old value -+ msleep(500); -+ platform_profile_notify(); -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(powermode); -+ -+static ssize_t lockfancontroller_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ bool is_lockfancontroller; -+ int err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_lockfancontroller(&priv->ecram, priv->conf, -+ &is_lockfancontroller); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return sysfs_emit(buf, "%d\n", is_lockfancontroller); -+} -+ -+static ssize_t lockfancontroller_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ bool is_lockfancontroller; -+ int err; -+ -+ err = kstrtobool(buf, &is_lockfancontroller); -+ if (err) -+ return err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = write_lockfancontroller(&priv->ecram, priv->conf, -+ is_lockfancontroller); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(lockfancontroller); -+ -+static ssize_t keyboard_backlight_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ int state; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ read_keyboard_backlight(&priv->ecram, priv->conf, &state); -+ return sysfs_emit(buf, "%d\n", state); -+} -+ -+static ssize_t keyboard_backlight_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int state; -+ int err; -+ -+ err = kstrtouint(buf, 0, &state); -+ if (err) -+ return err; -+ -+ err = write_keyboard_backlight(&priv->ecram, priv->conf, state); -+ if (err) -+ return -EINVAL; -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(keyboard_backlight); -+ -+static struct attribute *legion_sysfs_attributes[] = { -+ &dev_attr_powermode.attr, &dev_attr_lockfancontroller.attr, -+ &dev_attr_keyboard_backlight.attr, NULL -+}; -+ -+static const struct attribute_group legion_attribute_group = { -+ .attrs = legion_sysfs_attributes -+}; -+ -+static int legion_sysfs_init(struct legion_private *priv) -+{ -+ return device_add_group(&priv->platform_device->dev, -+ &legion_attribute_group); -+} -+ -+static void legion_sysfs_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion sysfs\n"); -+ device_remove_group(&priv->platform_device->dev, -+ &legion_attribute_group); -+ pr_info("Unloading legion sysfs done\n"); -+} -+ -+/* ============================= */ -+/* WMI + ACPI */ -+/* ============================ */ -+// heavily based on ideapad_laptop.c -+ -+// TODO: proper names if meaning of all events is clear -+enum LEGION_WMI_EVENT { -+ LEGION_WMI_EVENT_GAMEZONE = 1, -+ LEGION_EVENT_A, -+ LEGION_EVENT_B, -+ LEGION_EVENT_C, -+ LEGION_EVENT_D, -+ LEGION_EVENT_E, -+ LEGION_EVENT_F, -+ LEGION_EVENT_G -+}; -+ -+struct legion_wmi_private { -+ enum LEGION_WMI_EVENT event; -+}; -+ -+//static void legion_wmi_notify2(u32 value, void *context) -+// { -+// pr_info("WMI notify\n" ); -+// } -+ -+static void legion_wmi_notify(struct wmi_device *wdev, union acpi_object *data) -+{ -+ struct legion_wmi_private *wpriv; -+ struct legion_private *priv; -+ -+ mutex_lock(&legion_shared_mutex); -+ priv = legion_shared; -+ if ((!priv) && (priv->loaded)) { -+ pr_info("Received WMI event while not initialized!\n"); -+ goto unlock; -+ } -+ -+ wpriv = dev_get_drvdata(&wdev->dev); -+ switch (wpriv->event) { -+ case LEGION_EVENT_A: -+ pr_info("Fan event: legion type: %d; acpi type: %d (%d=integer)", -+ wpriv->event, data->type, ACPI_TYPE_INTEGER); -+ // TODO: here it is too early (first unlock mutext, then wait a bit) -+ //platform_profile_notify(); -+ break; -+ default: -+ pr_info("Event: legion type: %d; acpi type: %d (%d=integer)", -+ wpriv->event, data->type, ACPI_TYPE_INTEGER); -+ break; -+ } -+ -+unlock: -+ mutex_unlock(&legion_shared_mutex); -+ // todo; fix that! -+ // problem: we get a event just before the powermode change (from the key?), -+ // so if we notify to early, it will read the old power mode/platform profile -+ msleep(500); -+ platform_profile_notify(); -+} -+ -+static int legion_wmi_probe(struct wmi_device *wdev, const void *context) -+{ -+ struct legion_wmi_private *wpriv; -+ -+ wpriv = devm_kzalloc(&wdev->dev, sizeof(*wpriv), GFP_KERNEL); -+ if (!wpriv) -+ return -ENOMEM; -+ -+ *wpriv = *(const struct legion_wmi_private *)context; -+ -+ dev_set_drvdata(&wdev->dev, wpriv); -+ dev_info(&wdev->dev, "Register after probing for WMI.\n"); -+ return 0; -+} -+ -+static const struct legion_wmi_private legion_wmi_context_gamezone = { -+ .event = LEGION_WMI_EVENT_GAMEZONE -+}; -+static const struct legion_wmi_private legion_wmi_context_a = { -+ .event = LEGION_EVENT_A -+}; -+static const struct legion_wmi_private legion_wmi_context_b = { -+ .event = LEGION_EVENT_B -+}; -+static const struct legion_wmi_private legion_wmi_context_c = { -+ .event = LEGION_EVENT_C -+}; -+static const struct legion_wmi_private legion_wmi_context_d = { -+ .event = LEGION_EVENT_D -+}; -+static const struct legion_wmi_private legion_wmi_context_e = { -+ .event = LEGION_EVENT_E -+}; -+static const struct legion_wmi_private legion_wmi_context_f = { -+ .event = LEGION_EVENT_F -+}; -+ -+// check if really a method -+#define LEGION_WMI_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0" -+ -+#define LEGION_WMI_GUID_FAN_EVENT "D320289E-8FEA-41E0-86F9-611D83151B5F" -+#define LEGION_WMI_GUID_FAN2_EVENT "bc72a435-e8c1-4275-b3e2-d8b8074aba59" -+#define LEGION_WMI_GUID_GAMEZONE_KEY_EVENT \ -+ "10afc6d9-ea8b-4590-a2e7-1cd3c84bb4b1" -+#define LEGION_WMI_GUID_GAMEZONE_GPU_EVENT \ -+ "bfd42481-aee3-4502-a107-afb68425c5f8" -+#define LEGION_WMI_GUID_GAMEZONE_OC_EVENT "d062906b-12d4-4510-999d-4831ee80e985" -+#define LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT \ -+ "bfd42481-aee3-4501-a107-afb68425c5f8" -+//#define LEGION_WMI_GUID_GAMEZONE_DATA_EVENT "887b54e3-dddc-4b2c-8b88-68a26a8835d0" -+ -+static const struct wmi_device_id legion_wmi_ids[] = { -+ { LEGION_WMI_GAMEZONE_GUID, &legion_wmi_context_gamezone }, -+ { LEGION_WMI_GUID_FAN_EVENT, &legion_wmi_context_a }, -+ { LEGION_WMI_GUID_FAN2_EVENT, &legion_wmi_context_b }, -+ { LEGION_WMI_GUID_GAMEZONE_KEY_EVENT, &legion_wmi_context_c }, -+ { LEGION_WMI_GUID_GAMEZONE_GPU_EVENT, &legion_wmi_context_d }, -+ { LEGION_WMI_GUID_GAMEZONE_OC_EVENT, &legion_wmi_context_e }, -+ { LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT, &legion_wmi_context_f }, -+ { "8FC0DE0C-B4E4-43FD-B0F3-8871711C1294", -+ &legion_wmi_context_gamezone }, /* Legion 5 */ -+ {}, -+}; -+MODULE_DEVICE_TABLE(wmi, legion_wmi_ids); -+ -+static struct wmi_driver legion_wmi_driver = { -+ .driver = { -+ .name = "legion_wmi", -+ }, -+ .id_table = legion_wmi_ids, -+ .probe = legion_wmi_probe, -+ .notify = legion_wmi_notify, -+}; -+ -+//acpi_status status = wmi_install_notify_handler(LEGION_WMI_GAMEZONE_GUID, -+// legion_wmi_notify2, NULL); -+//if (ACPI_FAILURE(status)) { -+// return -ENODEV; -+//} -+//return 0; -+ -+static int legion_wmi_init(void) -+{ -+ return wmi_driver_register(&legion_wmi_driver); -+} -+ -+static void legion_wmi_exit(void) -+{ -+ // TODO: remove this -+ pr_info("Unloading legion WMI\n"); -+ -+ //wmi_remove_notify_handler(LEGION_WMI_GAMEZONE_GUID); -+ wmi_driver_unregister(&legion_wmi_driver); -+ pr_info("Unloading legion WMI done\n"); -+} -+ -+/* ============================= */ -+/* Platform profile */ -+/* ============================ */ -+ -+enum LEGION_POWERMODE { -+ LEGION_POWERMODE_BALANCED = 0, -+ LEGION_POWERMODE_PERFORMANCE = 1, -+ LEGION_POWERMODE_QUIET = 2, -+}; -+ -+static int legion_platform_profile_get(struct platform_profile_handler *pprof, -+ enum platform_profile_option *profile) -+{ -+ int powermode; -+ struct legion_private *priv; -+ -+ priv = container_of(pprof, struct legion_private, -+ platform_profile_handler); -+ powermode = read_powermode(&priv->ecram, priv->conf); -+ -+ switch (powermode) { -+ case LEGION_POWERMODE_BALANCED: -+ *profile = PLATFORM_PROFILE_BALANCED; -+ break; -+ case LEGION_POWERMODE_PERFORMANCE: -+ *profile = PLATFORM_PROFILE_PERFORMANCE; -+ break; -+ case LEGION_POWERMODE_QUIET: -+ *profile = PLATFORM_PROFILE_QUIET; -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static int legion_platform_profile_set(struct platform_profile_handler *pprof, -+ enum platform_profile_option profile) -+{ -+ int powermode; -+ struct legion_private *priv; -+ -+ priv = container_of(pprof, struct legion_private, -+ platform_profile_handler); -+ -+ switch (profile) { -+ case PLATFORM_PROFILE_BALANCED: -+ powermode = LEGION_POWERMODE_BALANCED; -+ break; -+ case PLATFORM_PROFILE_PERFORMANCE: -+ powermode = LEGION_POWERMODE_PERFORMANCE; -+ break; -+ case PLATFORM_PROFILE_QUIET: -+ powermode = LEGION_POWERMODE_QUIET; -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return write_powermode(&priv->ecram, priv->conf, powermode); -+} -+ -+static int legion_platform_profile_init(struct legion_private *priv) -+{ -+ int err; -+ -+ priv->platform_profile_handler.profile_get = -+ legion_platform_profile_get; -+ priv->platform_profile_handler.profile_set = -+ legion_platform_profile_set; -+ -+ set_bit(PLATFORM_PROFILE_QUIET, priv->platform_profile_handler.choices); -+ set_bit(PLATFORM_PROFILE_BALANCED, -+ priv->platform_profile_handler.choices); -+ set_bit(PLATFORM_PROFILE_PERFORMANCE, -+ priv->platform_profile_handler.choices); -+ -+ err = platform_profile_register(&priv->platform_profile_handler); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+static void legion_platform_profile_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion platform profile\n"); -+ platform_profile_remove(); -+ pr_info("Unloading legion platform profile done\n"); -+} -+ -+/* ============================= */ -+/* hwom interface */ -+/* ============================ */ -+ -+// hw-mon interface -+ -+// todo: register_group or register_info? -+ -+// TODO: use one common function (like here) or one function per attribute? -+static ssize_t sensor_label_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ int sensor_id = (to_sensor_dev_attr(attr))->index; -+ const char *label; -+ -+ switch (sensor_id) { -+ case SENSOR_CPU_TEMP_ID: -+ label = "CPU Temperature\n"; -+ break; -+ case SENSOR_GPU_TEMP_ID: -+ label = "GPU Temperature\n"; -+ break; -+ case SENSOR_IC_TEMP_ID: -+ label = "IC Temperature\n"; -+ break; -+ case SENSOR_FAN1_RPM_ID: -+ label = "Fan 1\n"; -+ break; -+ case SENSOR_FAN2_RPM_ID: -+ label = "Fan 2\n"; -+ break; -+ case SENSOR_FAN1_TARGET_RPM_ID: -+ label = "Fan 1 Target\n"; -+ break; -+ case SENSOR_FAN2_TARGET_RPM_ID: -+ label = "Fan 2 Target\n"; -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return sprintf(buf, label); -+} -+ -+// TODO: use one common function (like here) or one function per attribute? -+static ssize_t sensor_show(struct device *dev, struct device_attribute *devattr, -+ char *buf) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int sensor_id = (to_sensor_dev_attr(devattr))->index; -+ struct sensor_values values; -+ int outval; -+ -+ read_sensor_values(&priv->ecram, priv->conf, &values); -+ -+ switch (sensor_id) { -+ case SENSOR_CPU_TEMP_ID: -+ outval = 1000 * values.cpu_temp_celsius; -+ break; -+ case SENSOR_GPU_TEMP_ID: -+ outval = 1000 * values.gpu_temp_celsius; -+ break; -+ case SENSOR_IC_TEMP_ID: -+ outval = 1000 * values.ic_temp_celsius; -+ break; -+ case SENSOR_FAN1_RPM_ID: -+ outval = values.fan1_rpm; -+ break; -+ case SENSOR_FAN2_RPM_ID: -+ outval = values.fan2_rpm; -+ break; -+ case SENSOR_FAN1_TARGET_RPM_ID: -+ outval = values.fan1_target_rpm; -+ break; -+ case SENSOR_FAN2_TARGET_RPM_ID: -+ outval = values.fan2_target_rpm; -+ break; -+ default: -+ pr_info("Error reading sensor value with id %d\n", sensor_id); -+ return -EOPNOTSUPP; -+ } -+ -+ return sprintf(buf, "%d\n", outval); -+} -+ -+static SENSOR_DEVICE_ATTR_RO(temp1_input, sensor, SENSOR_CPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp1_label, sensor_label, SENSOR_CPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp2_input, sensor, SENSOR_GPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp2_label, sensor_label, SENSOR_GPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp3_input, sensor, SENSOR_IC_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp3_label, sensor_label, SENSOR_IC_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(fan1_input, sensor, SENSOR_FAN1_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan1_label, sensor_label, SENSOR_FAN1_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan2_input, sensor, SENSOR_FAN2_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan2_label, sensor_label, SENSOR_FAN2_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan1_target, sensor, SENSOR_FAN1_TARGET_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan2_target, sensor, SENSOR_FAN2_TARGET_RPM_ID); -+ -+static struct attribute *sensor_hwmon_attributes[] = { -+ &sensor_dev_attr_temp1_input.dev_attr.attr, -+ &sensor_dev_attr_temp1_label.dev_attr.attr, -+ &sensor_dev_attr_temp2_input.dev_attr.attr, -+ &sensor_dev_attr_temp2_label.dev_attr.attr, -+ &sensor_dev_attr_temp3_input.dev_attr.attr, -+ &sensor_dev_attr_temp3_label.dev_attr.attr, -+ &sensor_dev_attr_fan1_input.dev_attr.attr, -+ &sensor_dev_attr_fan1_label.dev_attr.attr, -+ &sensor_dev_attr_fan2_input.dev_attr.attr, -+ &sensor_dev_attr_fan2_label.dev_attr.attr, -+ &sensor_dev_attr_fan1_target.dev_attr.attr, -+ &sensor_dev_attr_fan2_target.dev_attr.attr, -+ NULL -+}; -+ -+static ssize_t autopoint_show(struct device *dev, -+ struct device_attribute *devattr, char *buf) -+{ -+ struct fancurve fancurve; -+ int err; -+ int value; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr; -+ int point_id = to_sensor_dev_attr_2(devattr)->index; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_fancurve(&priv->ecram, priv->conf, &fancurve); -+ mutex_unlock(&priv->fancurve_mutex); -+ -+ if (err) { -+ pr_info("Reading fancurve failed\n"); -+ return -EOPNOTSUPP; -+ } -+ if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) { -+ pr_info("Reading fancurve failed due to wrong point id: %d\n", -+ point_id); -+ return -EOPNOTSUPP; -+ } -+ -+ switch (fancurve_attr_id) { -+ case FANCURVE_ATTR_PWM1: -+ value = fancurve.points[point_id].rpm1_raw * 100; -+ break; -+ case FANCURVE_ATTR_PWM2: -+ value = fancurve.points[point_id].rpm2_raw * 100; -+ break; -+ case FANCURVE_ATTR_CPU_TEMP: -+ value = fancurve.points[point_id].cpu_max_temp_celsius; -+ break; -+ case FANCURVE_ATTR_CPU_HYST: -+ value = fancurve.points[point_id].cpu_min_temp_celsius; -+ break; -+ case FANCURVE_ATTR_GPU_TEMP: -+ value = fancurve.points[point_id].gpu_max_temp_celsius; -+ break; -+ case FANCURVE_ATTR_GPU_HYST: -+ value = fancurve.points[point_id].gpu_min_temp_celsius; -+ break; -+ case FANCURVE_ATTR_IC_TEMP: -+ value = fancurve.points[point_id].ic_max_temp_celsius; -+ break; -+ case FANCURVE_ATTR_IC_HYST: -+ value = fancurve.points[point_id].ic_min_temp_celsius; -+ break; -+ case FANCURVE_ATTR_ACCEL: -+ value = fancurve.points[point_id].accel; -+ break; -+ case FANCURVE_ATTR_DECEL: -+ value = fancurve.points[point_id].decel; -+ break; -+ case FANCURVE_SIZE: -+ value = fancurve.size; -+ break; -+ default: -+ pr_info("Reading fancurve failed due to wrong attribute id: %d\n", -+ fancurve_attr_id); -+ return -EOPNOTSUPP; -+ } -+ -+ return sprintf(buf, "%d\n", value); -+} -+ -+static ssize_t autopoint_store(struct device *dev, -+ struct device_attribute *devattr, -+ const char *buf, size_t count) -+{ -+ struct fancurve fancurve; -+ int err; -+ int value; -+ bool valid; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr; -+ int point_id = to_sensor_dev_attr_2(devattr)->index; -+ -+ if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) { -+ pr_info("Reading fancurve failed due to wrong point id: %d\n", -+ point_id); -+ err = -EOPNOTSUPP; -+ goto error; -+ } -+ -+ err = kstrtoint(buf, 0, &value); -+ if (err) { -+ pr_info("Parse for hwmon store is not succesful: error:%d; point_id: %d; fancurve_attr_id: %d\\n", -+ err, point_id, fancurve_attr_id); -+ goto error; -+ } -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_fancurve(&priv->ecram, priv->conf, &fancurve); -+ -+ if (err) { -+ pr_info("Reading fancurve failed\n"); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ switch (fancurve_attr_id) { -+ case FANCURVE_ATTR_PWM1: -+ valid = fancurve_set_rpm1(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_PWM2: -+ valid = fancurve_set_rpm2(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_CPU_TEMP: -+ valid = fancurve_set_cpu_temp_max(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_CPU_HYST: -+ valid = fancurve_set_cpu_temp_min(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_GPU_TEMP: -+ valid = fancurve_set_gpu_temp_max(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_GPU_HYST: -+ valid = fancurve_set_gpu_temp_min(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_IC_TEMP: -+ valid = fancurve_set_ic_temp_max(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_IC_HYST: -+ valid = fancurve_set_ic_temp_min(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_ACCEL: -+ valid = fancurve_set_accel(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_DECEL: -+ valid = fancurve_set_decel(&fancurve, point_id, value); -+ break; -+ case FANCURVE_SIZE: -+ valid = fancurve_set_size(&fancurve, value, true); -+ break; -+ default: -+ pr_info("Writing fancurve failed due to wrong attribute id: %d\n", -+ fancurve_attr_id); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ if (!valid) { -+ pr_info("Ignoring invalid fancurve value %d for attribute %d at point %d\n", -+ value, fancurve_attr_id, point_id); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ err = write_fancurve(&priv->ecram, priv->conf, &fancurve, false); -+ if (err) { -+ pr_info("Writing fancurve failed for accessing hwmon at point_id: %d\n", -+ point_id); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ mutex_unlock(&priv->fancurve_mutex); -+ return count; -+ -+error_mutex: -+ mutex_unlock(&priv->fancurve_mutex); -+error: -+ return count; -+} -+ -+// rpm1 -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 9); -+// rpm2 -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 9); -+// CPU temp -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 9); -+// CPU temp hyst -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 9); -+// GPU temp -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 9); -+// GPU temp hyst -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 9); -+// IC temp -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 9); -+// IC temp hyst -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 9); -+// accel -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 9); -+// decel -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 9); -+//size -+static SENSOR_DEVICE_ATTR_2_RW(auto_points_size, autopoint, FANCURVE_SIZE, 0); -+ -+static ssize_t minifancurve_show(struct device *dev, -+ struct device_attribute *devattr, char *buf) -+{ -+ bool value; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_minifancurve(&priv->ecram, priv->conf, &value); -+ if (err) { -+ err = -1; -+ pr_info("Reading minifancurve not succesful\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return sprintf(buf, "%d\n", value); -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+ return -1; -+} -+ -+static ssize_t minifancurve_store(struct device *dev, -+ struct device_attribute *devattr, -+ const char *buf, size_t count) -+{ -+ int value; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ err = kstrtoint(buf, 0, &value); -+ if (err) { -+ err = -1; -+ pr_info("Parse for hwmon store is not succesful: error:%d\n", -+ err); -+ goto error; -+ } -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = write_minifancurve(&priv->ecram, priv->conf, value); -+ if (err) { -+ err = -1; -+ pr_info("Writing minifancurve not succesful\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return count; -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+error: -+ return err; -+} -+ -+static SENSOR_DEVICE_ATTR_RW(minifancurve, minifancurve, 0); -+ -+static ssize_t pwm1_mode_show(struct device *dev, -+ struct device_attribute *devattr, char *buf) -+{ -+ bool value; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_maximumfanspeed(&priv->ecram, priv->conf, &value); -+ if (err) { -+ err = -1; -+ pr_info("Reading pwm1_mode/maximumfanspeed not succesful\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return sprintf(buf, "%d\n", value ? 0 : 2); -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+ return -1; -+} -+ -+static ssize_t pwm1_mode_store(struct device *dev, -+ struct device_attribute *devattr, -+ const char *buf, size_t count) -+{ -+ int value; -+ int is_maximumfanspeed; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ err = kstrtoint(buf, 0, &value); -+ if (err) { -+ err = -1; -+ pr_info("Parse for hwmon store is not succesful: error:%d\n", -+ err); -+ goto error; -+ } -+ is_maximumfanspeed = value == 0; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = write_maximumfanspeed(&priv->ecram, priv->conf, -+ is_maximumfanspeed); -+ if (err) { -+ err = -1; -+ pr_info("Writing pwm1_mode/maximumfanspeed not succesful\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return count; -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+error: -+ return err; -+} -+ -+static SENSOR_DEVICE_ATTR_RW(pwm1_mode, pwm1_mode, 0); -+ -+static struct attribute *fancurve_hwmon_attributes[] = { -+ &sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point3_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point4_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point5_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point6_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point7_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point8_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point9_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point10_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point1_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point3_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point4_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point5_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point6_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point7_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point8_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point9_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point10_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point1_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point2_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point3_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point4_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point5_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point6_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point7_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point8_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point9_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point10_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point1_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point3_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point4_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point5_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point6_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point7_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point8_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point9_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point10_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point1_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point2_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point3_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point4_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point5_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point6_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point7_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point8_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point9_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point10_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_decel.dev_attr.attr, -+ // -+ &sensor_dev_attr_auto_points_size.dev_attr.attr, -+ &sensor_dev_attr_minifancurve.dev_attr.attr, -+ &sensor_dev_attr_pwm1_mode.dev_attr.attr, NULL -+}; -+ -+static umode_t legion_is_visible(struct kobject *kobj, struct attribute *attr, -+ int idx) -+{ -+ bool supported = true; -+ struct device *dev = kobj_to_dev(kobj); -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ if (attr == &sensor_dev_attr_minifancurve.dev_attr.attr) -+ supported = priv->conf->has_minifancurve; -+ -+ return supported ? attr->mode : 0; -+} -+ -+static const struct attribute_group legion_hwmon_sensor_group = { -+ .attrs = sensor_hwmon_attributes, -+ .is_visible = NULL -+}; -+ -+static const struct attribute_group legion_hwmon_fancurve_group = { -+ .attrs = fancurve_hwmon_attributes, -+ .is_visible = legion_is_visible, -+}; -+ -+static const struct attribute_group *legion_hwmon_groups[] = { -+ &legion_hwmon_sensor_group, &legion_hwmon_fancurve_group, NULL -+}; -+ -+ssize_t legion_hwmon_init(struct legion_private *priv) -+{ -+ //TODO: use hwmon_device_register_with_groups or -+ // hwmon_device_register_with_info (latter means all hwmon functions have to be -+ // changed) -+ // some laptop driver do it in one way, some in the other -+ // TODO: Use devm_hwmon_device_register_with_groups ? -+ // some laptop drivers use this, some -+ struct device *hwmon_dev = hwmon_device_register_with_groups( -+ &priv->platform_device->dev, "legion_hwmon", priv, -+ legion_hwmon_groups); -+ if (IS_ERR_OR_NULL(hwmon_dev)) { -+ pr_err("hwmon_device_register failed!\n"); -+ return PTR_ERR(hwmon_dev); -+ } -+ dev_set_drvdata(hwmon_dev, priv); -+ priv->hwmon_dev = hwmon_dev; -+ return 0; -+} -+ -+void legion_hwmon_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion hwon\n"); -+ if (priv->hwmon_dev) { -+ hwmon_device_unregister(priv->hwmon_dev); -+ priv->hwmon_dev = NULL; -+ } -+ pr_info("Unloading legion hwon done\n"); -+} -+ -+/* ============================= */ -+/* Platform driver */ -+/* ============================ */ -+ -+int legion_add(struct platform_device *pdev) -+{ -+ struct legion_private *priv; -+ const struct dmi_system_id *dmi_sys; -+ int err; -+ u16 ec_read_id; -+ bool is_denied = true; -+ bool is_allowed = false; -+ bool do_load_by_list = false; -+ bool do_load = false; -+ //struct legion_private *priv = dev_get_drvdata(&pdev->dev); -+ dev_info(&pdev->dev, "legion_laptop platform driver probing\n"); -+ -+ dev_info(&pdev->dev, "Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n", -+ dmi_get_system_info(DMI_SYS_VENDOR), -+ dmi_get_system_info(DMI_PRODUCT_NAME), -+ dmi_get_system_info(DMI_BIOS_VERSION)); -+ -+ // TODO: allocate? -+ priv = &_priv; -+ priv->platform_device = pdev; -+ err = legion_shared_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "legion_laptop is forced to load.\n"); -+ goto err_legion_shared_init; -+ } -+ dev_set_drvdata(&pdev->dev, priv); -+ -+ // TODO: remove -+ pr_info("Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n", -+ dmi_get_system_info(DMI_SYS_VENDOR), -+ dmi_get_system_info(DMI_PRODUCT_NAME), -+ dmi_get_system_info(DMI_BIOS_VERSION)); -+ -+ dmi_sys = dmi_first_match(optimistic_allowlist); -+ is_allowed = dmi_sys != NULL; -+ is_denied = dmi_check_system(denylist); -+ do_load_by_list = is_allowed && !is_denied; -+ do_load = do_load_by_list || force; -+ -+ dev_info( -+ &pdev->dev, -+ "is_denied: %d; is_allowed: %d; do_load_by_list: %d; do_load: %d\n", -+ is_denied, is_allowed, do_load_by_list, do_load); -+ -+ if (!(do_load)) { -+ dev_info( -+ &pdev->dev, -+ "Module not useable for this laptop because it is not in allowlist. Notify maintainer if you want to add your device or force load with param force.\n"); -+ err = -ENOMEM; -+ goto err_model_mismtach; -+ } -+ -+ if (force) -+ dev_info(&pdev->dev, "legion_laptop is forced to load.\n"); -+ -+ if (!do_load_by_list && do_load) { -+ dev_info( -+ &pdev->dev, -+ "legion_laptop is forced to load and would otherwise be not loaded\n"); -+ } -+ -+ // if forced and no module found, use config for first model -+ if (dmi_sys == NULL) -+ dmi_sys = &optimistic_allowlist[0]; -+ dev_info(&pdev->dev, "Using configuration for system: %s\n", -+ dmi_sys->ident); -+ -+ priv->conf = dmi_sys->driver_data; -+ -+ err = ecram_init(&priv->ecram, priv->conf->memoryio_physical_ec_start, -+ priv->conf->memoryio_size); -+ if (err) { -+ dev_info(&pdev->dev, -+ "Could not init access to embedded controller\n"); -+ goto err_ecram_init; -+ } -+ -+ ec_read_id = read_ec_id(&priv->ecram, priv->conf); -+ dev_info(&pdev->dev, "Read embedded controller ID 0x%x\n", ec_read_id); -+ if (priv->conf->check_embedded_controller_id && -+ !(ec_read_id == priv->conf->embedded_controller_id)) { -+ err = -ENOMEM; -+ dev_info(&pdev->dev, "Expected EC chip id 0x%x but read 0x%x\n", -+ priv->conf->embedded_controller_id, ec_read_id); -+ goto err_ecram_id; -+ } -+ if (!priv->conf->check_embedded_controller_id) { -+ dev_info(&pdev->dev, -+ "Skipped checking embedded controller id\n"); -+ } -+ -+ dev_info(&pdev->dev, "Creating debugfs inteface\n"); -+ legion_debugfs_init(priv); -+ -+ pr_info("Creating sysfs inteface\n"); -+ err = legion_sysfs_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "Creating sysfs interface failed\n"); -+ goto err_sysfs_init; -+ } -+ -+ pr_info("Creating hwmon interface"); -+ err = legion_hwmon_init(priv); -+ if (err) -+ goto err_hwmon_init; -+ -+ pr_info("Creating platform profile support\n"); -+ err = legion_platform_profile_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "Creating platform profile failed\n"); -+ goto err_platform_profile; -+ } -+ -+ pr_info("Init WMI driver support\n"); -+ err = legion_wmi_init(); -+ if (err) { -+ dev_info(&pdev->dev, "Init WMI driver failed\n"); -+ goto err_wmi; -+ } -+ -+ dev_info(&pdev->dev, "legion_laptop loaded for this device\n"); -+ return 0; -+ -+ // TODO: remove eventually -+ legion_wmi_exit(); -+err_wmi: -+ legion_platform_profile_exit(priv); -+err_platform_profile: -+ legion_hwmon_exit(priv); -+err_hwmon_init: -+ legion_sysfs_exit(priv); -+err_sysfs_init: -+ legion_debugfs_exit(priv); -+err_ecram_id: -+ ecram_exit(&priv->ecram); -+err_ecram_init: -+ legion_shared_exit(priv); -+err_legion_shared_init: -+err_model_mismtach: -+ dev_info(&pdev->dev, "legion_laptop not loaded for this device\n"); -+ return err; -+} -+ -+int legion_remove(struct platform_device *pdev) -+{ -+ struct legion_private *priv = dev_get_drvdata(&pdev->dev); -+ -+ mutex_lock(&legion_shared_mutex); -+ priv->loaded = false; -+ mutex_unlock(&legion_shared_mutex); -+ -+ // first unregister wmi, so toggling powermode does not -+ // generate events anymore that even might be delayed -+ legion_wmi_exit(); -+ legion_platform_profile_exit(priv); -+ -+ // toggle power mode to load default setting from embedded controller -+ // again -+ toggle_powermode(&priv->ecram, priv->conf); -+ -+ legion_hwmon_exit(priv); -+ legion_sysfs_exit(priv); -+ legion_debugfs_exit(priv); -+ ecram_exit(&priv->ecram); -+ legion_shared_exit(priv); -+ -+ pr_info("Legion platform unloaded\n"); -+ return 0; -+} -+ -+int legion_resume(struct platform_device *pdev) -+{ -+ //struct legion_private *priv = dev_get_drvdata(&pdev->dev); -+ dev_info(&pdev->dev, "Resumed in legion-laptop\n"); -+ -+ return 0; -+} -+ -+#ifdef CONFIG_PM_SLEEP -+static int legion_pm_resume(struct device *dev) -+{ -+ //struct legion_private *priv = dev_get_drvdata(dev); -+ dev_info(dev, "Resumed PM in legion-laptop\n"); -+ -+ return 0; -+} -+#endif -+static SIMPLE_DEV_PM_OPS(legion_pm, NULL, legion_pm_resume); -+ -+// same as ideapad -+static const struct acpi_device_id legion_device_ids[] = { -+ { "PNP0C09", 0 }, // todo: change to "VPC2004" -+ { "", 0 }, -+}; -+MODULE_DEVICE_TABLE(acpi, legion_device_ids); -+ -+static struct platform_driver legion_driver = { -+ .probe = legion_add, -+ .remove = legion_remove, -+ .resume = legion_resume, -+ .driver = { -+ .name = "legion", -+ .pm = &legion_pm, -+ .acpi_match_table = ACPI_PTR(legion_device_ids), -+ }, -+}; -+ -+int __init legion_init(void) -+{ -+ int err; -+ -+ pr_info("legion_laptop starts loading\n"); -+ err = platform_driver_register(&legion_driver); -+ if (err) { -+ pr_info("legion_laptop: platform_driver_register failed\n"); -+ return err; -+ } -+ -+ return 0; -+} -+ -+module_init(legion_init); -+ -+void __exit legion_exit(void) -+{ -+ platform_driver_unregister(&legion_driver); -+ pr_info("legion_laptop exit\n"); -+} -+ -+module_exit(legion_exit); diff --git a/drivers/platform/x86/steamdeck.c b/drivers/platform/x86/steamdeck.c new file mode 100644 index 000000000000..77a6677ec19e @@ -9257,72 +7077,36 @@ index ab0c5bd1a60f..f4989f706d7f 100644 $(dst)/%.ko.gz: $(dst)/%.ko FORCE $(call cmd,gzip) -- -2.40.0 +2.40.1 -From d31de1cb3de2457a3d287d96b456e1a3732165e4 Mon Sep 17 00:00:00 2001 +From a6fac309dae53f34208de29f5b82d053ca55eed6 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Sat, 22 Apr 2023 11:43:21 +0200 -Subject: [PATCH 04/12] fixes +Date: Wed, 26 Apr 2023 22:04:18 +0200 +Subject: [PATCH 4/8] fixes Signed-off-by: Peter Jung --- Documentation/ABI/stable/sysfs-block | 10 + .../testing/sysfs-class-led-trigger-blkdev | 78 ++ - Documentation/admin-guide/mm/ksm.rst | 7 + Documentation/leds/index.rst | 1 + Documentation/leds/ledtrig-blkdev.rst | 158 +++ - Documentation/x86/topology.rst | 26 + - arch/x86/include/asm/cacheinfo.h | 1 + - arch/x86/kernel/cpu/amd.c | 1 + - arch/x86/kernel/cpu/cacheinfo.c | 36 + - arch/x86/kernel/cpu/hygon.c | 1 + + arch/x86/kernel/acpi/boot.c | 11 +- arch/x86/net/bpf_jit_comp.c | 5 +- drivers/bluetooth/btusb.c | 2 +- - .../drm/amd/display/dc/bios/bios_parser2.c | 7 +- - .../drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- - .../drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- - .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 +- drivers/leds/trigger/Kconfig | 9 + drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-blkdev.c | 1221 +++++++++++++++++ fs/eventpoll.c | 188 ++- - fs/proc/base.c | 1 + - include/linux/atomic/atomic-arch-fallback.h | 208 ++- - include/linux/atomic/atomic-instrumented.h | 68 +- - include/linux/atomic/atomic-long.h | 38 +- - include/linux/mm_types.h | 7 +- include/linux/pageblock-flags.h | 2 +- - include/linux/rcuref.h | 155 +++ - include/linux/types.h | 6 + - include/net/dst.h | 30 +- - include/net/ip6_fib.h | 3 - - include/net/ip6_route.h | 2 +- - include/net/route.h | 3 - - include/net/sock.h | 2 +- kernel/kheaders.c | 10 +- kernel/padata.c | 4 +- - lib/Makefile | 2 +- - lib/rcuref.c | 281 ++++ - mm/ksm.c | 185 ++- - net/bridge/br_nf_core.c | 2 +- - net/core/dst.c | 26 +- - net/core/rtnetlink.c | 2 +- - net/ipv4/route.c | 20 +- - net/ipv4/xfrm4_policy.c | 4 +- - net/ipv6/route.c | 32 +- - net/ipv6/xfrm6_policy.c | 4 +- - net/netfilter/ipvs/ip_vs_xmit.c | 4 +- + mm/page_alloc.c | 22 +- scripts/Makefile.vmlinux_o | 2 +- - scripts/atomic/atomics.tbl | 2 +- - scripts/atomic/fallbacks/add_negative | 11 +- sound/pci/hda/cs35l41_hda.c | 2 +- - .../selftests/mm/ksm_functional_tests.c | 96 +- - 51 files changed, 2751 insertions(+), 222 deletions(-) + 17 files changed, 1636 insertions(+), 90 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-led-trigger-blkdev create mode 100644 Documentation/leds/ledtrig-blkdev.rst create mode 100644 drivers/leds/trigger/ledtrig-blkdev.c - create mode 100644 include/linux/rcuref.h - create mode 100644 lib/rcuref.c diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 282de3680367..ac1dd2fbd855 100644 @@ -9429,24 +7213,6 @@ index 000000000000..28ce8c814fb7 + symbolic links in this directory are *kernel* names, which + may not match the device special file paths written to + link_device and unlink_device.) -diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst -index eed51a910c94..270560fef3b2 100644 ---- a/Documentation/admin-guide/mm/ksm.rst -+++ b/Documentation/admin-guide/mm/ksm.rst -@@ -171,6 +171,13 @@ stable_node_chains - the number of KSM pages that hit the ``max_page_sharing`` limit - stable_node_dups - number of duplicated KSM pages -+zero_pages_sharing -+ how many empty pages are sharing kernel zero page(s) instead of -+ with each other as it would happen normally. Only effective when -+ enabling ``use_zero_pages`` knob. -+ -+When enabling ``use_zero_pages``, the sum of ``pages_sharing`` + -+``zero_pages_sharing`` represents how much really saved by KSM. - - A high ratio of ``pages_sharing`` to ``pages_shared`` indicates good - sharing, but a high ratio of ``pages_unshared`` to ``pages_sharing`` diff --git a/Documentation/leds/index.rst b/Documentation/leds/index.rst index b9ca081fac71..5e37d8e7bd28 100644 --- a/Documentation/leds/index.rst @@ -9623,147 +7389,32 @@ index 000000000000..9ff5b99de451 +* The ``blkdev`` LED trigger supports many-to-many device/LED associations. + A device can be associated with multiple LEDs, and an LED can be associated + with multiple devices. -diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst -index 7f58010ea86a..9de14f3f7783 100644 ---- a/Documentation/x86/topology.rst -+++ b/Documentation/x86/topology.rst -@@ -33,6 +33,7 @@ historical nature and should be cleaned up. - The topology of a system is described in the units of: +diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c +index 0dac4ab5b55b..21b542a6866c 100644 +--- a/arch/x86/kernel/acpi/boot.c ++++ b/arch/x86/kernel/acpi/boot.c +@@ -1858,13 +1858,18 @@ early_param("acpi_sci", setup_acpi_sci); - - packages -+ - cluster - - cores - - threads + int __acpi_acquire_global_lock(unsigned int *lock) + { +- unsigned int old, new; ++ unsigned int old, new, val; -@@ -90,6 +91,22 @@ Package-related topology information in the kernel: - Cache. In general, it is a number identifying an LLC uniquely on the - system. - -+Clusters -+======== -+A cluster consists of threads of one or more cores sharing the same L2 cache. + old = READ_ONCE(*lock); + do { +- new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); ++ val = (old >> 1) & 0x1; ++ new = (old & ~0x3) + 2 + val; + } while (!try_cmpxchg(lock, &old, new)); +- return ((new & 0x3) < 3) ? -1 : 0; + -+Cluster-related topology information in the kernel: ++ if (val) ++ return 0; + -+ - cluster_id: -+ -+ A per-CPU variable containing: -+ -+ - Upper bits extracted from the APIC ID. CPUs which have the same value -+ in these bits share an L2 and have the same cluster_id. -+ -+ CPUs for which cluster information is unavailable will show 65535 -+ (BAD_APICID) as the cluster_id. -+ - Cores - ===== - A core consists of 1 or more threads. It does not matter whether the threads -@@ -125,6 +142,11 @@ Thread-related topology information in the kernel: - - The number of online threads is also printed in /proc/cpuinfo "siblings." - -+ - topology_cluster_cpumask(): -+ -+ The cpumask contains all online threads in the cluster to which a thread -+ belongs. -+ - - topology_sibling_cpumask(): - - The cpumask contains all online threads in the core to which a thread -@@ -138,6 +160,10 @@ Thread-related topology information in the kernel: - - The physical package ID to which a thread belongs. - -+ - topology_cluster_id(); -+ -+ The ID of the cluster to which a thread belongs. -+ - - topology_core_id(); - - The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo -diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h -index ce9685fc78d8..2034cd556c07 100644 ---- a/arch/x86/include/asm/cacheinfo.h -+++ b/arch/x86/include/asm/cacheinfo.h -@@ -7,6 +7,7 @@ extern unsigned int memory_caching_control; - #define CACHE_MTRR 0x01 - #define CACHE_PAT 0x02 - -+void cacheinfo_topoext_init_l2c_id(struct cpuinfo_x86 *c, int cpu); - void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); - void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); - -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 95cdd08c4cbb..d6594727f924 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -358,6 +358,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) - if (!err) - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - -+ cacheinfo_topoext_init_l2c_id(c, cpu); - cacheinfo_amd_init_llc_id(c, cpu); - - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { -diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c -index 4063e8991211..947a1f27278c 100644 ---- a/arch/x86/kernel/cpu/cacheinfo.c -+++ b/arch/x86/kernel/cpu/cacheinfo.c -@@ -659,6 +659,42 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) - return i; ++ return -1; } -+void cacheinfo_topoext_init_l2c_id(struct cpuinfo_x86 *c, int cpu) -+{ -+ u32 eax, ebx, ecx, edx, num_sharing_cache; -+ int i = 0, bits; -+ -+ /* Check if L2 cache identifiers exists. */ -+ if (!cpuid_ecx(0x80000006)) -+ return; -+ -+ while (true) { -+ u32 level; -+ -+ cpuid_count(0x8000001d, i, &eax, &ebx, &ecx, &edx); -+ if (!eax) -+ return; -+ -+ /* -+ * Check if the current leaf is for L2 cache using -+ * eax[7:5] used to describe the cache level. -+ */ -+ level = (eax >> 5) & 0x7; -+ if (level == 2) -+ break; -+ -+ ++i; -+ } -+ -+ /* -+ * L2 ID is calculated from the number of threads -+ * sharing the L2 cache. -+ */ -+ num_sharing_cache = ((eax >> 14) & 0xfff) + 1; -+ bits = get_count_order(num_sharing_cache); -+ per_cpu(cpu_l2c_id, cpu) = c->apicid >> bits; -+} -+ - void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) - { - /* -diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c -index 5a2962c492d3..cb0025b4a2fd 100644 ---- a/arch/x86/kernel/cpu/hygon.c -+++ b/arch/x86/kernel/cpu/hygon.c -@@ -89,6 +89,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) - /* Socket ID is ApicId[6] for these processors. */ - c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; - -+ cacheinfo_topoext_init_l2c_id(c, cpu); - cacheinfo_hygon_init_llc_id(c, cpu); - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; + int __acpi_release_global_lock(unsigned int *lock) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1056bbf55b17..212bfd1517ec 100644 --- a/arch/x86/net/bpf_jit_comp.c @@ -9794,64 +7445,6 @@ index 5c536151ef83..5a80379253a7 100644 gpiod_set_value_cansleep(reset_gpio, 1); return; -diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c -index e381de2429fa..ae3783a7d7f4 100644 ---- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c -+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c -@@ -515,11 +515,8 @@ static enum bp_result get_gpio_i2c_info( - info->i2c_slave_address = record->i2c_slave_addr; - - /* TODO: check how to get register offset for en, Y, etc. */ -- info->gpio_info.clk_a_register_index = -- le16_to_cpu( -- header->gpio_pin[table_index].data_a_reg_index); -- info->gpio_info.clk_a_shift = -- header->gpio_pin[table_index].gpio_bitshift; -+ info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index); -+ info->gpio_info.clk_a_shift = pin->gpio_bitshift; - - return BP_RESULT_OK; - } -diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c -index 3af24ef9cb2d..51838bef7fb0 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c -@@ -714,7 +714,7 @@ static const struct dc_debug_options debug_defaults_drv = { - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, -- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, -+ .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, -diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c -index 8f9244fe5c86..c10ff621cb1d 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c -@@ -642,7 +642,7 @@ static const struct dc_debug_options debug_defaults_drv = { - .clock_trace = true, - .disable_pplib_clock_request = true, - .min_disp_clk_khz = 100000, -- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, -+ .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c -index 95da6dd1cc65..c4000518dc56 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c -@@ -304,7 +304,8 @@ navi10_get_allowed_feature_mask(struct smu_context *smu, - | FEATURE_MASK(FEATURE_GFX_SS_BIT) - | FEATURE_MASK(FEATURE_APCC_DFLL_BIT) - | FEATURE_MASK(FEATURE_FW_CTF_BIT) -- | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); -+ | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT) -+ | FEATURE_MASK(FEATURE_TEMP_DEPENDENT_VMIN_BIT); - - if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index dc6816d36d06..bda249068182 100644 --- a/drivers/leds/trigger/Kconfig @@ -11461,454 +9054,6 @@ index 64659b110973..4cad490028ab 100644 break; case EPOLL_CTL_MOD: if (epi) { -diff --git a/fs/proc/base.c b/fs/proc/base.c -index 5e0e0ccd47aa..07463ad4a70a 100644 ---- a/fs/proc/base.c -+++ b/fs/proc/base.c -@@ -3207,6 +3207,7 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, - mm = get_task_mm(task); - if (mm) { - seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); -+ seq_printf(m, "zero_pages_sharing %lu\n", mm->ksm_zero_pages_sharing); - mmput(mm); - } - -diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h -index 77bc5522e61c..4226379a232d 100644 ---- a/include/linux/atomic/atomic-arch-fallback.h -+++ b/include/linux/atomic/atomic-arch-fallback.h -@@ -1208,15 +1208,21 @@ arch_atomic_inc_and_test(atomic_t *v) - #define arch_atomic_inc_and_test arch_atomic_inc_and_test - #endif - -+#ifndef arch_atomic_add_negative_relaxed -+#ifdef arch_atomic_add_negative -+#define arch_atomic_add_negative_acquire arch_atomic_add_negative -+#define arch_atomic_add_negative_release arch_atomic_add_negative -+#define arch_atomic_add_negative_relaxed arch_atomic_add_negative -+#endif /* arch_atomic_add_negative */ -+ - #ifndef arch_atomic_add_negative - /** -- * arch_atomic_add_negative - add and test if negative -+ * arch_atomic_add_negative - Add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * -- * Atomically adds @i to @v and returns true -- * if the result is negative, or false when -- * result is greater than or equal to zero. -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. - */ - static __always_inline bool - arch_atomic_add_negative(int i, atomic_t *v) -@@ -1226,6 +1232,95 @@ arch_atomic_add_negative(int i, atomic_t *v) - #define arch_atomic_add_negative arch_atomic_add_negative - #endif - -+#ifndef arch_atomic_add_negative_acquire -+/** -+ * arch_atomic_add_negative_acquire - Add and test if negative -+ * @i: integer value to add -+ * @v: pointer of type atomic_t -+ * -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. -+ */ -+static __always_inline bool -+arch_atomic_add_negative_acquire(int i, atomic_t *v) -+{ -+ return arch_atomic_add_return_acquire(i, v) < 0; -+} -+#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire -+#endif -+ -+#ifndef arch_atomic_add_negative_release -+/** -+ * arch_atomic_add_negative_release - Add and test if negative -+ * @i: integer value to add -+ * @v: pointer of type atomic_t -+ * -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. -+ */ -+static __always_inline bool -+arch_atomic_add_negative_release(int i, atomic_t *v) -+{ -+ return arch_atomic_add_return_release(i, v) < 0; -+} -+#define arch_atomic_add_negative_release arch_atomic_add_negative_release -+#endif -+ -+#ifndef arch_atomic_add_negative_relaxed -+/** -+ * arch_atomic_add_negative_relaxed - Add and test if negative -+ * @i: integer value to add -+ * @v: pointer of type atomic_t -+ * -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. -+ */ -+static __always_inline bool -+arch_atomic_add_negative_relaxed(int i, atomic_t *v) -+{ -+ return arch_atomic_add_return_relaxed(i, v) < 0; -+} -+#define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed -+#endif -+ -+#else /* arch_atomic_add_negative_relaxed */ -+ -+#ifndef arch_atomic_add_negative_acquire -+static __always_inline bool -+arch_atomic_add_negative_acquire(int i, atomic_t *v) -+{ -+ bool ret = arch_atomic_add_negative_relaxed(i, v); -+ __atomic_acquire_fence(); -+ return ret; -+} -+#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire -+#endif -+ -+#ifndef arch_atomic_add_negative_release -+static __always_inline bool -+arch_atomic_add_negative_release(int i, atomic_t *v) -+{ -+ __atomic_release_fence(); -+ return arch_atomic_add_negative_relaxed(i, v); -+} -+#define arch_atomic_add_negative_release arch_atomic_add_negative_release -+#endif -+ -+#ifndef arch_atomic_add_negative -+static __always_inline bool -+arch_atomic_add_negative(int i, atomic_t *v) -+{ -+ bool ret; -+ __atomic_pre_full_fence(); -+ ret = arch_atomic_add_negative_relaxed(i, v); -+ __atomic_post_full_fence(); -+ return ret; -+} -+#define arch_atomic_add_negative arch_atomic_add_negative -+#endif -+ -+#endif /* arch_atomic_add_negative_relaxed */ -+ - #ifndef arch_atomic_fetch_add_unless - /** - * arch_atomic_fetch_add_unless - add unless the number is already a given value -@@ -2329,15 +2424,21 @@ arch_atomic64_inc_and_test(atomic64_t *v) - #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test - #endif - -+#ifndef arch_atomic64_add_negative_relaxed -+#ifdef arch_atomic64_add_negative -+#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative -+#define arch_atomic64_add_negative_release arch_atomic64_add_negative -+#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative -+#endif /* arch_atomic64_add_negative */ -+ - #ifndef arch_atomic64_add_negative - /** -- * arch_atomic64_add_negative - add and test if negative -+ * arch_atomic64_add_negative - Add and test if negative - * @i: integer value to add - * @v: pointer of type atomic64_t - * -- * Atomically adds @i to @v and returns true -- * if the result is negative, or false when -- * result is greater than or equal to zero. -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. - */ - static __always_inline bool - arch_atomic64_add_negative(s64 i, atomic64_t *v) -@@ -2347,6 +2448,95 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v) - #define arch_atomic64_add_negative arch_atomic64_add_negative - #endif - -+#ifndef arch_atomic64_add_negative_acquire -+/** -+ * arch_atomic64_add_negative_acquire - Add and test if negative -+ * @i: integer value to add -+ * @v: pointer of type atomic64_t -+ * -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. -+ */ -+static __always_inline bool -+arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) -+{ -+ return arch_atomic64_add_return_acquire(i, v) < 0; -+} -+#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire -+#endif -+ -+#ifndef arch_atomic64_add_negative_release -+/** -+ * arch_atomic64_add_negative_release - Add and test if negative -+ * @i: integer value to add -+ * @v: pointer of type atomic64_t -+ * -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. -+ */ -+static __always_inline bool -+arch_atomic64_add_negative_release(s64 i, atomic64_t *v) -+{ -+ return arch_atomic64_add_return_release(i, v) < 0; -+} -+#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release -+#endif -+ -+#ifndef arch_atomic64_add_negative_relaxed -+/** -+ * arch_atomic64_add_negative_relaxed - Add and test if negative -+ * @i: integer value to add -+ * @v: pointer of type atomic64_t -+ * -+ * Atomically adds @i to @v and returns true if the result is negative, -+ * or false when the result is greater than or equal to zero. -+ */ -+static __always_inline bool -+arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) -+{ -+ return arch_atomic64_add_return_relaxed(i, v) < 0; -+} -+#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed -+#endif -+ -+#else /* arch_atomic64_add_negative_relaxed */ -+ -+#ifndef arch_atomic64_add_negative_acquire -+static __always_inline bool -+arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) -+{ -+ bool ret = arch_atomic64_add_negative_relaxed(i, v); -+ __atomic_acquire_fence(); -+ return ret; -+} -+#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire -+#endif -+ -+#ifndef arch_atomic64_add_negative_release -+static __always_inline bool -+arch_atomic64_add_negative_release(s64 i, atomic64_t *v) -+{ -+ __atomic_release_fence(); -+ return arch_atomic64_add_negative_relaxed(i, v); -+} -+#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release -+#endif -+ -+#ifndef arch_atomic64_add_negative -+static __always_inline bool -+arch_atomic64_add_negative(s64 i, atomic64_t *v) -+{ -+ bool ret; -+ __atomic_pre_full_fence(); -+ ret = arch_atomic64_add_negative_relaxed(i, v); -+ __atomic_post_full_fence(); -+ return ret; -+} -+#define arch_atomic64_add_negative arch_atomic64_add_negative -+#endif -+ -+#endif /* arch_atomic64_add_negative_relaxed */ -+ - #ifndef arch_atomic64_fetch_add_unless - /** - * arch_atomic64_fetch_add_unless - add unless the number is already a given value -@@ -2456,4 +2646,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) - #endif - - #endif /* _LINUX_ATOMIC_FALLBACK_H */ --// b5e87bdd5ede61470c29f7a7e4de781af3770f09 -+// 00071fffa021cec66f6290d706d69c91df87bade -diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h -index 7a139ec030b0..0496816738ca 100644 ---- a/include/linux/atomic/atomic-instrumented.h -+++ b/include/linux/atomic/atomic-instrumented.h -@@ -592,6 +592,28 @@ atomic_add_negative(int i, atomic_t *v) - return arch_atomic_add_negative(i, v); - } - -+static __always_inline bool -+atomic_add_negative_acquire(int i, atomic_t *v) -+{ -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic_add_negative_acquire(i, v); -+} -+ -+static __always_inline bool -+atomic_add_negative_release(int i, atomic_t *v) -+{ -+ kcsan_release(); -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic_add_negative_release(i, v); -+} -+ -+static __always_inline bool -+atomic_add_negative_relaxed(int i, atomic_t *v) -+{ -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic_add_negative_relaxed(i, v); -+} -+ - static __always_inline int - atomic_fetch_add_unless(atomic_t *v, int a, int u) - { -@@ -1211,6 +1233,28 @@ atomic64_add_negative(s64 i, atomic64_t *v) - return arch_atomic64_add_negative(i, v); - } - -+static __always_inline bool -+atomic64_add_negative_acquire(s64 i, atomic64_t *v) -+{ -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic64_add_negative_acquire(i, v); -+} -+ -+static __always_inline bool -+atomic64_add_negative_release(s64 i, atomic64_t *v) -+{ -+ kcsan_release(); -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic64_add_negative_release(i, v); -+} -+ -+static __always_inline bool -+atomic64_add_negative_relaxed(s64 i, atomic64_t *v) -+{ -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic64_add_negative_relaxed(i, v); -+} -+ - static __always_inline s64 - atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) - { -@@ -1830,6 +1874,28 @@ atomic_long_add_negative(long i, atomic_long_t *v) - return arch_atomic_long_add_negative(i, v); - } - -+static __always_inline bool -+atomic_long_add_negative_acquire(long i, atomic_long_t *v) -+{ -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic_long_add_negative_acquire(i, v); -+} -+ -+static __always_inline bool -+atomic_long_add_negative_release(long i, atomic_long_t *v) -+{ -+ kcsan_release(); -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic_long_add_negative_release(i, v); -+} -+ -+static __always_inline bool -+atomic_long_add_negative_relaxed(long i, atomic_long_t *v) -+{ -+ instrument_atomic_read_write(v, sizeof(*v)); -+ return arch_atomic_long_add_negative_relaxed(i, v); -+} -+ - static __always_inline long - atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) - { -@@ -2083,4 +2149,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) - }) - - #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ --// 764f741eb77a7ad565dc8d99ce2837d5542e8aee -+// 1b485de9cbaa4900de59e14ee2084357eaeb1c3a -diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h -index 800b8c35992d..2fc51ba66beb 100644 ---- a/include/linux/atomic/atomic-long.h -+++ b/include/linux/atomic/atomic-long.h -@@ -479,6 +479,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v) - return arch_atomic64_add_negative(i, v); - } - -+static __always_inline bool -+arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) -+{ -+ return arch_atomic64_add_negative_acquire(i, v); -+} -+ -+static __always_inline bool -+arch_atomic_long_add_negative_release(long i, atomic_long_t *v) -+{ -+ return arch_atomic64_add_negative_release(i, v); -+} -+ -+static __always_inline bool -+arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) -+{ -+ return arch_atomic64_add_negative_relaxed(i, v); -+} -+ - static __always_inline long - arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) - { -@@ -973,6 +991,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v) - return arch_atomic_add_negative(i, v); - } - -+static __always_inline bool -+arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) -+{ -+ return arch_atomic_add_negative_acquire(i, v); -+} -+ -+static __always_inline bool -+arch_atomic_long_add_negative_release(long i, atomic_long_t *v) -+{ -+ return arch_atomic_add_negative_release(i, v); -+} -+ -+static __always_inline bool -+arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) -+{ -+ return arch_atomic_add_negative_relaxed(i, v); -+} -+ - static __always_inline long - arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) - { -@@ -1011,4 +1047,4 @@ arch_atomic_long_dec_if_positive(atomic_long_t *v) - - #endif /* CONFIG_64BIT */ - #endif /* _LINUX_ATOMIC_LONG_H */ --// e8f0e08ff072b74d180eabe2ad001282b38c2c88 -+// a194c07d7d2f4b0e178d3c118c919775d5d65f50 -diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index a57e6ae78e65..22b2ac82bffd 100644 ---- a/include/linux/mm_types.h -+++ b/include/linux/mm_types.h -@@ -740,7 +740,7 @@ struct mm_struct { - #ifdef CONFIG_KSM - /* - * Represent how many pages of this process are involved in KSM -- * merging. -+ * merging (not including ksm_zero_pages_sharing). - */ - unsigned long ksm_merging_pages; - /* -@@ -748,6 +748,11 @@ struct mm_struct { - * including merged and not merged. - */ - unsigned long ksm_rmap_items; -+ /* -+ * Represent how many empty pages are merged with kernel zero -+ * pages when enabling KSM use_zero_pages. -+ */ -+ unsigned long ksm_zero_pages_sharing; - #endif - #ifdef CONFIG_LRU_GEN - struct { diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 5f1ae07d724b..97cda629c9e9 100644 --- a/include/linux/pageblock-flags.h @@ -11922,314 +9067,6 @@ index 5f1ae07d724b..97cda629c9e9 100644 #endif /* CONFIG_HUGETLB_PAGE */ -diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h -new file mode 100644 -index 000000000000..2c8bfd0f1b6b ---- /dev/null -+++ b/include/linux/rcuref.h -@@ -0,0 +1,155 @@ -+/* SPDX-License-Identifier: GPL-2.0-only */ -+#ifndef _LINUX_RCUREF_H -+#define _LINUX_RCUREF_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define RCUREF_ONEREF 0x00000000U -+#define RCUREF_MAXREF 0x7FFFFFFFU -+#define RCUREF_SATURATED 0xA0000000U -+#define RCUREF_RELEASED 0xC0000000U -+#define RCUREF_DEAD 0xE0000000U -+#define RCUREF_NOREF 0xFFFFFFFFU -+ -+/** -+ * rcuref_init - Initialize a rcuref reference count with the given reference count -+ * @ref: Pointer to the reference count -+ * @cnt: The initial reference count typically '1' -+ */ -+static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) -+{ -+ atomic_set(&ref->refcnt, cnt - 1); -+} -+ -+/** -+ * rcuref_read - Read the number of held reference counts of a rcuref -+ * @ref: Pointer to the reference count -+ * -+ * Return: The number of held references (0 ... N) -+ */ -+static inline unsigned int rcuref_read(rcuref_t *ref) -+{ -+ unsigned int c = atomic_read(&ref->refcnt); -+ -+ /* Return 0 if within the DEAD zone. */ -+ return c >= RCUREF_RELEASED ? 0 : c + 1; -+} -+ -+extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); -+ -+/** -+ * rcuref_get - Acquire one reference on a rcuref reference count -+ * @ref: Pointer to the reference count -+ * -+ * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF. -+ * -+ * Provides no memory ordering, it is assumed the caller has guaranteed the -+ * object memory to be stable (RCU, etc.). It does provide a control dependency -+ * and thereby orders future stores. See documentation in lib/rcuref.c -+ * -+ * Return: -+ * False if the attempt to acquire a reference failed. This happens -+ * when the last reference has been put already -+ * -+ * True if a reference was successfully acquired -+ */ -+static inline __must_check bool rcuref_get(rcuref_t *ref) -+{ -+ /* -+ * Unconditionally increase the reference count. The saturation and -+ * dead zones provide enough tolerance for this. -+ */ -+ if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt))) -+ return true; -+ -+ /* Handle the cases inside the saturation and dead zones */ -+ return rcuref_get_slowpath(ref); -+} -+ -+extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); -+ -+/* -+ * Internal helper. Do not invoke directly. -+ */ -+static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) -+{ -+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), -+ "suspicious rcuref_put_rcusafe() usage"); -+ /* -+ * Unconditionally decrease the reference count. The saturation and -+ * dead zones provide enough tolerance for this. -+ */ -+ if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) -+ return false; -+ -+ /* -+ * Handle the last reference drop and cases inside the saturation -+ * and dead zones. -+ */ -+ return rcuref_put_slowpath(ref); -+} -+ -+/** -+ * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe -+ * @ref: Pointer to the reference count -+ * -+ * Provides release memory ordering, such that prior loads and stores are done -+ * before, and provides an acquire ordering on success such that free() -+ * must come after. -+ * -+ * Can be invoked from contexts, which guarantee that no grace period can -+ * happen which would free the object concurrently if the decrement drops -+ * the last reference and the slowpath races against a concurrent get() and -+ * put() pair. rcu_read_lock()'ed and atomic contexts qualify. -+ * -+ * Return: -+ * True if this was the last reference with no future references -+ * possible. This signals the caller that it can safely release the -+ * object which is protected by the reference counter. -+ * -+ * False if there are still active references or the put() raced -+ * with a concurrent get()/put() pair. Caller is not allowed to -+ * release the protected object. -+ */ -+static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref) -+{ -+ return __rcuref_put(ref); -+} -+ -+/** -+ * rcuref_put -- Release one reference for a rcuref reference count -+ * @ref: Pointer to the reference count -+ * -+ * Can be invoked from any context. -+ * -+ * Provides release memory ordering, such that prior loads and stores are done -+ * before, and provides an acquire ordering on success such that free() -+ * must come after. -+ * -+ * Return: -+ * -+ * True if this was the last reference with no future references -+ * possible. This signals the caller that it can safely schedule the -+ * object, which is protected by the reference counter, for -+ * deconstruction. -+ * -+ * False if there are still active references or the put() raced -+ * with a concurrent get()/put() pair. Caller is not allowed to -+ * deconstruct the protected object. -+ */ -+static inline __must_check bool rcuref_put(rcuref_t *ref) -+{ -+ bool released; -+ -+ preempt_disable(); -+ released = __rcuref_put(ref); -+ preempt_enable(); -+ return released; -+} -+ -+#endif -diff --git a/include/linux/types.h b/include/linux/types.h -index ea8cf60a8a79..688fb943556a 100644 ---- a/include/linux/types.h -+++ b/include/linux/types.h -@@ -175,6 +175,12 @@ typedef struct { - } atomic64_t; - #endif - -+typedef struct { -+ atomic_t refcnt; -+} rcuref_t; -+ -+#define RCUREF_INIT(i) { .refcnt = ATOMIC_INIT(i - 1) } -+ - struct list_head { - struct list_head *next, *prev; - }; -diff --git a/include/net/dst.h b/include/net/dst.h -index d67fda89cd0f..78884429deed 100644 ---- a/include/net/dst.h -+++ b/include/net/dst.h -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -61,23 +62,36 @@ struct dst_entry { - unsigned short trailer_len; /* space to reserve at tail */ - - /* -- * __refcnt wants to be on a different cache line from -+ * __rcuref wants to be on a different cache line from - * input/output/ops or performance tanks badly - */ - #ifdef CONFIG_64BIT -- atomic_t __refcnt; /* 64-bit offset 64 */ -+ rcuref_t __rcuref; /* 64-bit offset 64 */ - #endif - int __use; - unsigned long lastuse; -- struct lwtunnel_state *lwtstate; - struct rcu_head rcu_head; - short error; - short __pad; - __u32 tclassid; - #ifndef CONFIG_64BIT -- atomic_t __refcnt; /* 32-bit offset 64 */ -+ struct lwtunnel_state *lwtstate; -+ rcuref_t __rcuref; /* 32-bit offset 64 */ - #endif - netdevice_tracker dev_tracker; -+ -+ /* -+ * Used by rtable and rt6_info. Moves lwtstate into the next cache -+ * line on 64bit so that lwtstate does not cause false sharing with -+ * __rcuref under contention of __rcuref. This also puts the -+ * frequently accessed members of rtable and rt6_info out of the -+ * __rcuref cache line. -+ */ -+ struct list_head rt_uncached; -+ struct uncached_list *rt_uncached_list; -+#ifdef CONFIG_64BIT -+ struct lwtunnel_state *lwtstate; -+#endif - }; - - struct dst_metrics { -@@ -225,10 +239,10 @@ static inline void dst_hold(struct dst_entry *dst) - { - /* - * If your kernel compilation stops here, please check -- * the placement of __refcnt in struct dst_entry -+ * the placement of __rcuref in struct dst_entry - */ -- BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); -- WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); -+ BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63); -+ WARN_ON(!rcuref_get(&dst->__rcuref)); - } - - static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) -@@ -292,7 +306,7 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb - */ - static inline bool dst_hold_safe(struct dst_entry *dst) - { -- return atomic_inc_not_zero(&dst->__refcnt); -+ return rcuref_get(&dst->__rcuref); - } - - /** -diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h -index 6268963d9599..79570cb4ea9c 100644 ---- a/include/net/ip6_fib.h -+++ b/include/net/ip6_fib.h -@@ -217,9 +217,6 @@ struct rt6_info { - struct inet6_dev *rt6i_idev; - u32 rt6i_flags; - -- struct list_head rt6i_uncached; -- struct uncached_list *rt6i_uncached_list; -- - /* more non-fragment space at head required */ - unsigned short rt6i_nfheader_len; - }; -diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h -index 81ee387a1fc4..3556595ce59a 100644 ---- a/include/net/ip6_route.h -+++ b/include/net/ip6_route.h -@@ -100,7 +100,7 @@ static inline struct dst_entry *ip6_route_output(struct net *net, - static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags) - { - if (!(flags & RT6_LOOKUP_F_DST_NOREF) || -- !list_empty(&rt->rt6i_uncached)) -+ !list_empty(&rt->dst.rt_uncached)) - ip6_rt_put(rt); - } - -diff --git a/include/net/route.h b/include/net/route.h -index fe00b0a2e475..bcc367cf3aa2 100644 ---- a/include/net/route.h -+++ b/include/net/route.h -@@ -78,9 +78,6 @@ struct rtable { - /* Miscellaneous cached information */ - u32 rt_mtu_locked:1, - rt_pmtu:31; -- -- struct list_head rt_uncached; -- struct uncached_list *rt_uncached_list; - }; - - static inline bool rt_is_input_route(const struct rtable *rt) -diff --git a/include/net/sock.h b/include/net/sock.h -index 573f2bf7e0de..5edf0038867c 100644 ---- a/include/net/sock.h -+++ b/include/net/sock.h -@@ -2131,7 +2131,7 @@ sk_dst_get(struct sock *sk) - - rcu_read_lock(); - dst = rcu_dereference(sk->sk_dst_cache); -- if (dst && !atomic_inc_not_zero(&dst->__refcnt)) -+ if (dst && !rcuref_get(&dst->__rcuref)) - dst = NULL; - rcu_read_unlock(); - return dst; diff --git a/kernel/kheaders.c b/kernel/kheaders.c index 8f69772af77b..42163c9e94e5 100644 --- a/kernel/kheaders.c @@ -12286,992 +9123,60 @@ index e007b8a4b738..7c80301ab084 100644 { struct padata_work *pw = container_of(w, struct padata_work, pw_work); struct padata_mt_job_state *ps = pw->pw_data; -diff --git a/lib/Makefile b/lib/Makefile -index baf2821f7a00..31a3a257fd49 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -47,7 +47,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \ - list_sort.o uuid.o iov_iter.o clz_ctz.o \ - bsearch.o find_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o rhashtable.o base64.o \ -- once.o refcount.o usercopy.o errseq.o bucket_locks.o \ -+ once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \ - generic-radix-tree.o - obj-$(CONFIG_STRING_SELFTEST) += test_string.o - obj-y += string_helpers.o -diff --git a/lib/rcuref.c b/lib/rcuref.c -new file mode 100644 -index 000000000000..5ec00a4a64d1 ---- /dev/null -+++ b/lib/rcuref.c -@@ -0,0 +1,281 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+ -+/* -+ * rcuref - A scalable reference count implementation for RCU managed objects -+ * -+ * rcuref is provided to replace open coded reference count implementations -+ * based on atomic_t. It protects explicitely RCU managed objects which can -+ * be visible even after the last reference has been dropped and the object -+ * is heading towards destruction. -+ * -+ * A common usage pattern is: -+ * -+ * get() -+ * rcu_read_lock(); -+ * p = get_ptr(); -+ * if (p && !atomic_inc_not_zero(&p->refcnt)) -+ * p = NULL; -+ * rcu_read_unlock(); -+ * return p; -+ * -+ * put() -+ * if (!atomic_dec_return(&->refcnt)) { -+ * remove_ptr(p); -+ * kfree_rcu((p, rcu); -+ * } -+ * -+ * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has -+ * O(N^2) behaviour under contention with N concurrent operations. -+ * -+ * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales -+ * better under contention. -+ * -+ * Why not refcount? -+ * ================= -+ * -+ * In principle it should be possible to make refcount use the rcuref -+ * scheme, but the destruction race described below cannot be prevented -+ * unless the protected object is RCU managed. -+ * -+ * Theory of operation -+ * =================== -+ * -+ * rcuref uses an unsigned integer reference counter. As long as the -+ * counter value is greater than or equal to RCUREF_ONEREF and not larger -+ * than RCUREF_MAXREF the reference is alive: -+ * -+ * ONEREF MAXREF SATURATED RELEASED DEAD NOREF -+ * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF -+ * <---valid --------> <-------saturation zone-------> <-----dead zone-----> -+ * -+ * The get() and put() operations do unconditional increments and -+ * decrements. The result is checked after the operation. This optimizes -+ * for the fast path. -+ * -+ * If the reference count is saturated or dead, then the increments and -+ * decrements are not harmful as the reference count still stays in the -+ * respective zones and is always set back to STATURATED resp. DEAD. The -+ * zones have room for 2^28 racing operations in each direction, which -+ * makes it practically impossible to escape the zones. -+ * -+ * Once the last reference is dropped the reference count becomes -+ * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The -+ * slowpath then tries to set the reference count from RCUREF_NOREF to -+ * RCUREF_DEAD via a cmpxchg(). This opens a small window where a -+ * concurrent rcuref_get() can acquire the reference count and bring it -+ * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD. -+ * -+ * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in -+ * DEAD + 1, which is inside the dead zone. If that happens the reference -+ * count is put back to DEAD. -+ * -+ * The actual race is possible due to the unconditional increment and -+ * decrements in rcuref_get() and rcuref_put(): -+ * -+ * T1 T2 -+ * get() put() -+ * if (atomic_add_negative(-1, &ref->refcnt)) -+ * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); -+ * -+ * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1 -+ * -+ * As the result of T1's add is negative, the get() goes into the slow path -+ * and observes refcnt being in the dead zone which makes the operation fail. -+ * -+ * Possible critical states: -+ * -+ * Context Counter References Operation -+ * T1 0 1 init() -+ * T2 1 2 get() -+ * T1 0 1 put() -+ * T2 -1 0 put() tries to mark dead -+ * T1 0 1 get() -+ * T2 0 1 put() mark dead fails -+ * T1 -1 0 put() tries to mark dead -+ * T1 DEAD 0 put() mark dead succeeds -+ * T2 DEAD+1 0 get() fails and puts it back to DEAD -+ * -+ * Of course there are more complex scenarios, but the above illustrates -+ * the working principle. The rest is left to the imagination of the -+ * reader. -+ * -+ * Deconstruction race -+ * =================== -+ * -+ * The release operation must be protected by prohibiting a grace period in -+ * order to prevent a possible use after free: -+ * -+ * T1 T2 -+ * put() get() -+ * // ref->refcnt = ONEREF -+ * if (!atomic_add_negative(-1, &ref->refcnt)) -+ * return false; <- Not taken -+ * -+ * // ref->refcnt == NOREF -+ * --> preemption -+ * // Elevates ref->refcnt to ONEREF -+ * if (!atomic_add_negative(1, &ref->refcnt)) -+ * return true; <- taken -+ * -+ * if (put(&p->ref)) { <-- Succeeds -+ * remove_pointer(p); -+ * kfree_rcu(p, rcu); -+ * } -+ * -+ * RCU grace period ends, object is freed -+ * -+ * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF -+ * -+ * This is prevented by disabling preemption around the put() operation as -+ * that's in most kernel configurations cheaper than a rcu_read_lock() / -+ * rcu_read_unlock() pair and in many cases even a NOOP. In any case it -+ * prevents the grace period which keeps the object alive until all put() -+ * operations complete. -+ * -+ * Saturation protection -+ * ===================== -+ * -+ * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX). -+ * Once this is exceedded the reference count becomes stale by setting it -+ * to RCUREF_SATURATED, which will cause a memory leak, but it prevents -+ * wrap arounds which obviously cause worse problems than a memory -+ * leak. When saturation is reached a warning is emitted. -+ * -+ * Race conditions -+ * =============== -+ * -+ * All reference count increment/decrement operations are unconditional and -+ * only verified after the fact. This optimizes for the good case and takes -+ * the occasional race vs. a dead or already saturated refcount into -+ * account. The saturation and dead zones are large enough to accomodate -+ * for that. -+ * -+ * Memory ordering -+ * =============== -+ * -+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions -+ * and provide only what is strictly required for refcounts. -+ * -+ * The increments are fully relaxed; these will not provide ordering. The -+ * rationale is that whatever is used to obtain the object to increase the -+ * reference count on will provide the ordering. For locked data -+ * structures, its the lock acquire, for RCU/lockless data structures its -+ * the dependent load. -+ * -+ * rcuref_get() provides a control dependency ordering future stores which -+ * ensures that the object is not modified when acquiring a reference -+ * fails. -+ * -+ * rcuref_put() provides release order, i.e. all prior loads and stores -+ * will be issued before. It also provides a control dependency ordering -+ * against the subsequent destruction of the object. -+ * -+ * If rcuref_put() successfully dropped the last reference and marked the -+ * object DEAD it also provides acquire ordering. -+ */ -+ -+#include -+#include -+ -+/** -+ * rcuref_get_slowpath - Slowpath of rcuref_get() -+ * @ref: Pointer to the reference count -+ * -+ * Invoked when the reference count is outside of the valid zone. -+ * -+ * Return: -+ * False if the reference count was already marked dead -+ * -+ * True if the reference count is saturated, which prevents the -+ * object from being deconstructed ever. -+ */ -+bool rcuref_get_slowpath(rcuref_t *ref) -+{ -+ unsigned int cnt = atomic_read(&ref->refcnt); -+ -+ /* -+ * If the reference count was already marked dead, undo the -+ * increment so it stays in the middle of the dead zone and return -+ * fail. -+ */ -+ if (cnt >= RCUREF_RELEASED) { -+ atomic_set(&ref->refcnt, RCUREF_DEAD); -+ return false; -+ } -+ -+ /* -+ * If it was saturated, warn and mark it so. In case the increment -+ * was already on a saturated value restore the saturation -+ * marker. This keeps it in the middle of the saturation zone and -+ * prevents the reference count from overflowing. This leaks the -+ * object memory, but prevents the obvious reference count overflow -+ * damage. -+ */ -+ if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory")) -+ atomic_set(&ref->refcnt, RCUREF_SATURATED); -+ return true; -+} -+EXPORT_SYMBOL_GPL(rcuref_get_slowpath); -+ -+/** -+ * rcuref_put_slowpath - Slowpath of __rcuref_put() -+ * @ref: Pointer to the reference count -+ * -+ * Invoked when the reference count is outside of the valid zone. -+ * -+ * Return: -+ * True if this was the last reference with no future references -+ * possible. This signals the caller that it can safely schedule the -+ * object, which is protected by the reference counter, for -+ * deconstruction. -+ * -+ * False if there are still active references or the put() raced -+ * with a concurrent get()/put() pair. Caller is not allowed to -+ * deconstruct the protected object. -+ */ -+bool rcuref_put_slowpath(rcuref_t *ref) -+{ -+ unsigned int cnt = atomic_read(&ref->refcnt); -+ -+ /* Did this drop the last reference? */ -+ if (likely(cnt == RCUREF_NOREF)) { -+ /* -+ * Carefully try to set the reference count to RCUREF_DEAD. -+ * -+ * This can fail if a concurrent get() operation has -+ * elevated it again or the corresponding put() even marked -+ * it dead already. Both are valid situations and do not -+ * require a retry. If this fails the caller is not -+ * allowed to deconstruct the object. -+ */ -+ if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF) -+ return false; -+ -+ /* -+ * The caller can safely schedule the object for -+ * deconstruction. Provide acquire ordering. -+ */ -+ smp_acquire__after_ctrl_dep(); -+ return true; -+ } -+ -+ /* -+ * If the reference count was already in the dead zone, then this -+ * put() operation is imbalanced. Warn, put the reference count back to -+ * DEAD and tell the caller to not deconstruct the object. -+ */ -+ if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) { -+ atomic_set(&ref->refcnt, RCUREF_DEAD); -+ return false; -+ } -+ -+ /* -+ * This is a put() operation on a saturated refcount. Restore the -+ * mean saturation value and tell the caller to not deconstruct the -+ * object. -+ */ -+ if (cnt > RCUREF_MAXREF) -+ atomic_set(&ref->refcnt, RCUREF_SATURATED); -+ return false; -+} -+EXPORT_SYMBOL_GPL(rcuref_put_slowpath); -diff --git a/mm/ksm.c b/mm/ksm.c -index 2b8d30068cbb..82029f1d454b 100644 ---- a/mm/ksm.c -+++ b/mm/ksm.c -@@ -214,6 +214,7 @@ struct ksm_rmap_item { - #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ - #define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */ - #define STABLE_FLAG 0x200 /* is listed from the stable tree */ -+#define ZERO_PAGE_FLAG 0x400 /* is zero page placed by KSM */ - - /* The stable and unstable tree heads */ - static struct rb_root one_stable_tree[1] = { RB_ROOT }; -@@ -275,6 +276,9 @@ static unsigned int zero_checksum __read_mostly; - /* Whether to merge empty (zeroed) pages with actual zero pages */ - static bool ksm_use_zero_pages __read_mostly; - -+/* The number of zero pages placed by KSM use_zero_pages */ -+static unsigned long ksm_zero_pages_sharing; -+ - #ifdef CONFIG_NUMA - /* Zeroed when merging across nodes is not allowed */ - static unsigned int ksm_merge_across_nodes = 1; -@@ -420,6 +424,11 @@ static inline bool ksm_test_exit(struct mm_struct *mm) - return atomic_read(&mm->mm_users) == 0; - } - -+enum break_ksm_pmd_entry_return_flag { -+ HAVE_KSM_PAGE = 1, -+ HAVE_ZERO_PAGE -+}; -+ - static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, - struct mm_walk *walk) +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 8e39705c7bdc..68d86fd93ef6 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3138,6 +3138,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, { -@@ -427,6 +436,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex - spinlock_t *ptl; - pte_t *pte; - int ret; -+ bool is_zero_page = false; + unsigned long flags; + int i, allocated = 0; ++ struct list_head *prev_tail = list->prev; ++ struct page *pos, *n; - if (pmd_leaf(*pmd) || !pmd_present(*pmd)) - return 0; -@@ -434,6 +444,8 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (pte_present(*pte)) { - page = vm_normal_page(walk->vma, addr, *pte); -+ if (!page) -+ is_zero_page = is_zero_pfn(pte_pfn(*pte)); - } else if (!pte_none(*pte)) { - swp_entry_t entry = pte_to_swp_entry(*pte); - -@@ -444,7 +456,14 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex - if (is_migration_entry(entry)) - page = pfn_swap_entry_to_page(entry); - } -- ret = page && PageKsm(page); -+ -+ if (page && PageKsm(page)) -+ ret = HAVE_KSM_PAGE; -+ else if (is_zero_page) -+ ret = HAVE_ZERO_PAGE; -+ else -+ ret = 0; -+ - pte_unmap_unlock(pte, ptl); - return ret; - } -@@ -466,19 +485,22 @@ static const struct mm_walk_ops break_ksm_ops = { - * of the process that owns 'vma'. We also do not want to enforce - * protection keys here anyway. - */ --static int break_ksm(struct vm_area_struct *vma, unsigned long addr) -+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, -+ bool unshare_zero_page) - { - vm_fault_t ret = 0; - - do { -- int ksm_page; -+ int walk_result; - - cond_resched(); -- ksm_page = walk_page_range_vma(vma, addr, addr + 1, -+ walk_result = walk_page_range_vma(vma, addr, addr + 1, - &break_ksm_ops, NULL); -- if (WARN_ON_ONCE(ksm_page < 0)) -- return ksm_page; -- if (!ksm_page) -+ if (WARN_ON_ONCE(walk_result < 0)) -+ return walk_result; -+ if (!walk_result) -+ return 0; -+ if (walk_result == HAVE_ZERO_PAGE && !unshare_zero_page) - return 0; - ret = handle_mm_fault(vma, addr, - FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, -@@ -539,7 +561,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item) - mmap_read_lock(mm); - vma = find_mergeable_vma(mm, addr); - if (vma) -- break_ksm(vma, addr); -+ break_ksm(vma, addr, false); - mmap_read_unlock(mm); - } - -@@ -764,6 +786,33 @@ static struct page *get_ksm_page(struct ksm_stable_node *stable_node, - return NULL; - } - -+/* -+ * Cleaning the rmap_item's ZERO_PAGE_FLAG -+ * This function will be called when unshare or writing on zero pages. -+ */ -+static inline void clean_rmap_item_zero_flag(struct ksm_rmap_item *rmap_item) -+{ -+ if (rmap_item->address & ZERO_PAGE_FLAG) { -+ ksm_zero_pages_sharing--; -+ rmap_item->mm->ksm_zero_pages_sharing--; -+ rmap_item->address &= PAGE_MASK; -+ } -+} -+ -+/* Only called when rmap_item is going to be freed */ -+static inline void unshare_zero_pages(struct ksm_rmap_item *rmap_item) -+{ -+ struct vm_area_struct *vma; -+ -+ if (rmap_item->address & ZERO_PAGE_FLAG) { -+ vma = vma_lookup(rmap_item->mm, rmap_item->address); -+ if (vma && !ksm_test_exit(rmap_item->mm)) -+ break_ksm(vma, rmap_item->address, true); -+ } -+ /* Put at last. */ -+ clean_rmap_item_zero_flag(rmap_item); -+} -+ - /* - * Removing rmap_item from stable or unstable tree. - * This function will clean the information from the stable/unstable tree. -@@ -824,6 +873,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list) - struct ksm_rmap_item *rmap_item = *rmap_list; - *rmap_list = rmap_item->rmap_list; - remove_rmap_item_from_tree(rmap_item); -+ unshare_zero_pages(rmap_item); - free_rmap_item(rmap_item); - } - } -@@ -853,7 +903,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, - if (signal_pending(current)) - err = -ERESTARTSYS; - else -- err = break_ksm(vma, addr); -+ err = break_ksm(vma, addr, false); - } - return err; - } -@@ -2050,6 +2100,42 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item, - rmap_item->mm->ksm_merging_pages++; - } - -+static int try_to_merge_with_kernel_zero_page(struct ksm_rmap_item *rmap_item, -+ struct page *page) -+{ -+ struct mm_struct *mm = rmap_item->mm; -+ int err = 0; -+ -+ /* -+ * It should not take ZERO_PAGE_FLAG because on one hand, -+ * get_next_rmap_item don't return zero pages' rmap_item. -+ * On the other hand, even if zero page was writen as -+ * anonymous page, rmap_item has been cleaned after -+ * stable_tree_search -+ */ -+ if (!WARN_ON_ONCE(rmap_item->address & ZERO_PAGE_FLAG)) { -+ struct vm_area_struct *vma; -+ -+ mmap_read_lock(mm); -+ vma = find_mergeable_vma(mm, rmap_item->address); -+ if (vma) { -+ err = try_to_merge_one_page(vma, page, -+ ZERO_PAGE(rmap_item->address)); -+ if (!err) { -+ rmap_item->address |= ZERO_PAGE_FLAG; -+ ksm_zero_pages_sharing++; -+ rmap_item->mm->ksm_zero_pages_sharing++; -+ } -+ } else { -+ /* If the vma is out of date, we do not need to continue. */ -+ err = 0; -+ } -+ mmap_read_unlock(mm); -+ } -+ -+ return err; -+} -+ - /* - * cmp_and_merge_page - first see if page can be merged into the stable tree; - * if not, compare checksum to previous and if it's the same, see if page can -@@ -2061,7 +2147,6 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item, - */ - static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item) - { -- struct mm_struct *mm = rmap_item->mm; - struct ksm_rmap_item *tree_rmap_item; - struct page *tree_page = NULL; - struct ksm_stable_node *stable_node; -@@ -2098,6 +2183,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite - } - - remove_rmap_item_from_tree(rmap_item); -+ clean_rmap_item_zero_flag(rmap_item); - - if (kpage) { - if (PTR_ERR(kpage) == -EBUSY) -@@ -2134,29 +2220,16 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite - * Same checksum as an empty page. We attempt to merge it with the - * appropriate zero page if the user enabled this via sysfs. - */ -- if (ksm_use_zero_pages && (checksum == zero_checksum)) { -- struct vm_area_struct *vma; -- -- mmap_read_lock(mm); -- vma = find_mergeable_vma(mm, rmap_item->address); -- if (vma) { -- err = try_to_merge_one_page(vma, page, -- ZERO_PAGE(rmap_item->address)); -- } else { -+ if (ksm_use_zero_pages) { -+ if (checksum == zero_checksum) - /* -- * If the vma is out of date, we do not need to -- * continue. -+ * In case of failure, the page was not really empty, so we -+ * need to continue. Otherwise we're done. - */ -- err = 0; -- } -- mmap_read_unlock(mm); -- /* -- * In case of failure, the page was not really empty, so we -- * need to continue. Otherwise we're done. -- */ -- if (!err) -- return; -+ if (!try_to_merge_with_kernel_zero_page(rmap_item, page)) -+ return; - } -+ - tree_rmap_item = - unstable_tree_search_insert(rmap_item, page, &tree_page); - if (tree_rmap_item) { -@@ -2220,23 +2293,39 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite - } - } - --static struct ksm_rmap_item *get_next_rmap_item(struct ksm_mm_slot *mm_slot, -- struct ksm_rmap_item **rmap_list, -- unsigned long addr) -+static struct ksm_rmap_item *try_to_get_old_rmap_item(unsigned long addr, -+ struct ksm_rmap_item **rmap_list) - { -- struct ksm_rmap_item *rmap_item; -- - while (*rmap_list) { -- rmap_item = *rmap_list; -+ struct ksm_rmap_item *rmap_item = *rmap_list; -+ - if ((rmap_item->address & PAGE_MASK) == addr) - return rmap_item; - if (rmap_item->address > addr) + spin_lock_irqsave(&zone->lock, flags); + for (i = 0; i < count; ++i) { +@@ -3146,9 +3148,6 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + if (unlikely(page == NULL)) break; - *rmap_list = rmap_item->rmap_list; -+ /* -+ * If we end up here, the VMA is MADV_UNMERGEABLE or its page -+ * is ineligible or discarded, e.g. MADV_DONTNEED. -+ */ - remove_rmap_item_from_tree(rmap_item); -+ unshare_zero_pages(rmap_item); - free_rmap_item(rmap_item); - } -+ return NULL; -+} -+ -+static struct ksm_rmap_item *get_next_rmap_item(struct ksm_mm_slot *mm_slot, -+ struct ksm_rmap_item **rmap_list, -+ unsigned long addr) -+{ -+ struct ksm_rmap_item *rmap_item; -+ -+ rmap_item = try_to_get_old_rmap_item(addr, rmap_list); -+ if (rmap_item) -+ return rmap_item; -+ - rmap_item = alloc_rmap_item(); - if (rmap_item) { - /* It has already been zeroed */ -@@ -2343,6 +2432,22 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) - } - if (is_zone_device_page(*page)) - goto next_page; -+ if (is_zero_pfn(page_to_pfn(*page))) { -+ /* -+ * To monitor ksm zero pages which becomes non-anonymous, -+ * we have to save each rmap_item of zero pages by -+ * try_to_get_old_rmap_item() walking on -+ * ksm_scan.rmap_list, otherwise their rmap_items will be -+ * freed by the next turn of get_next_rmap_item(). The -+ * function get_next_rmap_item() will free all "skipped" -+ * rmap_items because it thinks its areas as UNMERGEABLE. -+ */ -+ rmap_item = try_to_get_old_rmap_item(ksm_scan.address, -+ ksm_scan.rmap_list); -+ if (rmap_item && (rmap_item->address & ZERO_PAGE_FLAG)) -+ ksm_scan.rmap_list = &rmap_item->rmap_list; -+ goto next_page; -+ } - if (PageAnon(*page)) { - flush_anon_page(vma, *page, ksm_scan.address); - flush_dcache_page(*page); -@@ -3139,6 +3244,13 @@ static ssize_t pages_volatile_show(struct kobject *kobj, - } - KSM_ATTR_RO(pages_volatile); - -+static ssize_t zero_pages_sharing_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sysfs_emit(buf, "%ld\n", ksm_zero_pages_sharing); -+} -+KSM_ATTR_RO(zero_pages_sharing); -+ - static ssize_t stable_node_dups_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) - { -@@ -3194,6 +3306,7 @@ static struct attribute *ksm_attrs[] = { - &pages_sharing_attr.attr, - &pages_unshared_attr.attr, - &pages_volatile_attr.attr, -+ &zero_pages_sharing_attr.attr, - &full_scans_attr.attr, - #ifdef CONFIG_NUMA - &merge_across_nodes_attr.attr, -diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c -index 8c69f0c95a8e..98aea5485aae 100644 ---- a/net/bridge/br_nf_core.c -+++ b/net/bridge/br_nf_core.c -@@ -73,7 +73,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) - { - struct rtable *rt = &br->fake_rtable; - -- atomic_set(&rt->dst.__refcnt, 1); -+ rcuref_init(&rt->dst.__rcuref, 1); - rt->dst.dev = br->dev; - dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; -diff --git a/net/core/dst.c b/net/core/dst.c -index 31c08a3386d3..3247e84045ca 100644 ---- a/net/core/dst.c -+++ b/net/core/dst.c -@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, - dst->tclassid = 0; - #endif - dst->lwtstate = NULL; -- atomic_set(&dst->__refcnt, initial_ref); -+ rcuref_init(&dst->__rcuref, initial_ref); - dst->__use = 0; - dst->lastuse = jiffies; - dst->flags = flags; -@@ -162,31 +162,15 @@ EXPORT_SYMBOL(dst_dev_put); - - void dst_release(struct dst_entry *dst) - { -- if (dst) { -- int newrefcnt; +- if (unlikely(check_pcp_refill(page, order))) +- continue; - -- newrefcnt = atomic_dec_return(&dst->__refcnt); -- if (WARN_ONCE(newrefcnt < 0, "dst_release underflow")) -- net_warn_ratelimited("%s: dst:%p refcnt:%d\n", -- __func__, dst, newrefcnt); -- if (!newrefcnt) -- call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); -- } -+ if (dst && rcuref_put(&dst->__rcuref)) -+ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); - } - EXPORT_SYMBOL(dst_release); - - void dst_release_immediate(struct dst_entry *dst) - { -- if (dst) { -- int newrefcnt; -- -- newrefcnt = atomic_dec_return(&dst->__refcnt); -- if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow")) -- net_warn_ratelimited("%s: dst:%p refcnt:%d\n", -- __func__, dst, newrefcnt); -- if (!newrefcnt) -- dst_destroy(dst); -- } -+ if (dst && rcuref_put(&dst->__rcuref)) -+ dst_destroy(dst); - } - EXPORT_SYMBOL(dst_release_immediate); - -diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c -index 6e44e92ebdf5..a6380dd47e7f 100644 ---- a/net/core/rtnetlink.c -+++ b/net/core/rtnetlink.c -@@ -840,7 +840,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, - if (dst) { - ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); - ci.rta_used = dst->__use; -- ci.rta_clntref = atomic_read(&dst->__refcnt); -+ ci.rta_clntref = rcuref_read(&dst->__rcuref); - } - if (expires) { - unsigned long clock; -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index de6e3515ab4f..0f0cb629e0ad 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -1508,20 +1508,20 @@ void rt_add_uncached_list(struct rtable *rt) - { - struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); - -- rt->rt_uncached_list = ul; -+ rt->dst.rt_uncached_list = ul; - - spin_lock_bh(&ul->lock); -- list_add_tail(&rt->rt_uncached, &ul->head); -+ list_add_tail(&rt->dst.rt_uncached, &ul->head); - spin_unlock_bh(&ul->lock); + /* + * Split buddy pages returned by expand() are received here in + * physical page order. The page is added to the tail of +@@ -3160,7 +3159,6 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + * pages are ordered properly. + */ + list_add_tail(&page->pcp_list, list); +- allocated++; + if (is_migrate_cma(get_pcppage_migratetype(page))) + __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, + -(1 << order)); +@@ -3174,6 +3172,22 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + */ + __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); + spin_unlock_irqrestore(&zone->lock, flags); ++ ++ /* ++ * Pages are appended to the pcp list without checking to reduce the ++ * time holding the zone lock. Checking the appended pages happens right ++ * after the critical section while still holding the pcp lock. ++ */ ++ pos = list_first_entry(prev_tail, struct page, pcp_list); ++ list_for_each_entry_safe_from(pos, n, list, pcp_list) { ++ if (unlikely(check_pcp_refill(pos, order))) { ++ list_del(&pos->pcp_list); ++ continue; ++ } ++ ++ allocated++; ++ } ++ + return allocated; } - void rt_del_uncached_list(struct rtable *rt) - { -- if (!list_empty(&rt->rt_uncached)) { -- struct uncached_list *ul = rt->rt_uncached_list; -+ if (!list_empty(&rt->dst.rt_uncached)) { -+ struct uncached_list *ul = rt->dst.rt_uncached_list; - - spin_lock_bh(&ul->lock); -- list_del_init(&rt->rt_uncached); -+ list_del_init(&rt->dst.rt_uncached); - spin_unlock_bh(&ul->lock); - } - } -@@ -1546,13 +1546,13 @@ void rt_flush_dev(struct net_device *dev) - continue; - - spin_lock_bh(&ul->lock); -- list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) { -+ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { - if (rt->dst.dev != dev) - continue; - rt->dst.dev = blackhole_netdev; - netdev_ref_replace(dev, blackhole_netdev, - &rt->dst.dev_tracker, GFP_ATOMIC); -- list_move(&rt->rt_uncached, &ul->quarantine); -+ list_move(&rt->dst.rt_uncached, &ul->quarantine); - } - spin_unlock_bh(&ul->lock); - } -@@ -1644,7 +1644,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, - rt->rt_uses_gateway = 0; - rt->rt_gw_family = 0; - rt->rt_gw4 = 0; -- INIT_LIST_HEAD(&rt->rt_uncached); -+ INIT_LIST_HEAD(&rt->dst.rt_uncached); - - rt->dst.output = ip_output; - if (flags & RTCF_LOCAL) -@@ -1675,7 +1675,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) - new_rt->rt_gw4 = rt->rt_gw4; - else if (rt->rt_gw_family == AF_INET6) - new_rt->rt_gw6 = rt->rt_gw6; -- INIT_LIST_HEAD(&new_rt->rt_uncached); -+ INIT_LIST_HEAD(&new_rt->dst.rt_uncached); - - new_rt->dst.input = rt->dst.input; - new_rt->dst.output = rt->dst.output; -@@ -2859,7 +2859,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or - else if (rt->rt_gw_family == AF_INET6) - rt->rt_gw6 = ort->rt_gw6; - -- INIT_LIST_HEAD(&rt->rt_uncached); -+ INIT_LIST_HEAD(&rt->dst.rt_uncached); - } - - dst_release(dst_orig); -diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c -index 3d0dfa6cf9f9..47861c8b7340 100644 ---- a/net/ipv4/xfrm4_policy.c -+++ b/net/ipv4/xfrm4_policy.c -@@ -91,7 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - xdst->u.rt.rt_gw6 = rt->rt_gw6; - xdst->u.rt.rt_pmtu = rt->rt_pmtu; - xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; -- INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); -+ INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached); - rt_add_uncached_list(&xdst->u.rt); - - return 0; -@@ -121,7 +121,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - - dst_destroy_metrics_generic(dst); -- if (xdst->u.rt.rt_uncached_list) -+ if (xdst->u.rt.dst.rt_uncached_list) - rt_del_uncached_list(&xdst->u.rt); - xfrm_dst_destroy(xdst); - } -diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index 0fdb03df2287..b9d22a0a6c09 100644 ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -139,20 +139,20 @@ void rt6_uncached_list_add(struct rt6_info *rt) - { - struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); - -- rt->rt6i_uncached_list = ul; -+ rt->dst.rt_uncached_list = ul; - - spin_lock_bh(&ul->lock); -- list_add_tail(&rt->rt6i_uncached, &ul->head); -+ list_add_tail(&rt->dst.rt_uncached, &ul->head); - spin_unlock_bh(&ul->lock); - } - - void rt6_uncached_list_del(struct rt6_info *rt) - { -- if (!list_empty(&rt->rt6i_uncached)) { -- struct uncached_list *ul = rt->rt6i_uncached_list; -+ if (!list_empty(&rt->dst.rt_uncached)) { -+ struct uncached_list *ul = rt->dst.rt_uncached_list; - - spin_lock_bh(&ul->lock); -- list_del_init(&rt->rt6i_uncached); -+ list_del_init(&rt->dst.rt_uncached); - spin_unlock_bh(&ul->lock); - } - } -@@ -169,7 +169,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) - continue; - - spin_lock_bh(&ul->lock); -- list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) { -+ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { - struct inet6_dev *rt_idev = rt->rt6i_idev; - struct net_device *rt_dev = rt->dst.dev; - bool handled = false; -@@ -188,7 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) - handled = true; - } - if (handled) -- list_move(&rt->rt6i_uncached, -+ list_move(&rt->dst.rt_uncached, - &ul->quarantine); - } - spin_unlock_bh(&ul->lock); -@@ -293,7 +293,7 @@ static const struct fib6_info fib6_null_entry_template = { - - static const struct rt6_info ip6_null_entry_template = { - .dst = { -- .__refcnt = ATOMIC_INIT(1), -+ .__rcuref = RCUREF_INIT(1), - .__use = 1, - .obsolete = DST_OBSOLETE_FORCE_CHK, - .error = -ENETUNREACH, -@@ -307,7 +307,7 @@ static const struct rt6_info ip6_null_entry_template = { - - static const struct rt6_info ip6_prohibit_entry_template = { - .dst = { -- .__refcnt = ATOMIC_INIT(1), -+ .__rcuref = RCUREF_INIT(1), - .__use = 1, - .obsolete = DST_OBSOLETE_FORCE_CHK, - .error = -EACCES, -@@ -319,7 +319,7 @@ static const struct rt6_info ip6_prohibit_entry_template = { - - static const struct rt6_info ip6_blk_hole_entry_template = { - .dst = { -- .__refcnt = ATOMIC_INIT(1), -+ .__rcuref = RCUREF_INIT(1), - .__use = 1, - .obsolete = DST_OBSOLETE_FORCE_CHK, - .error = -EINVAL, -@@ -334,7 +334,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { - static void rt6_info_init(struct rt6_info *rt) - { - memset_after(rt, 0, dst); -- INIT_LIST_HEAD(&rt->rt6i_uncached); -+ INIT_LIST_HEAD(&rt->dst.rt_uncached); - } - - /* allocate dst with ip6_dst_ops */ -@@ -2638,7 +2638,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, - dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; - /* For dst cached in uncached_list, refcnt is already taken. */ -- if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { -+ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { - dst = &net->ipv6.ip6_null_entry->dst; - dst_hold(dst); - } -@@ -2748,7 +2748,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, - from = rcu_dereference(rt->from); - - if (from && (rt->rt6i_flags & RTF_PCPU || -- unlikely(!list_empty(&rt->rt6i_uncached)))) -+ unlikely(!list_empty(&rt->dst.rt_uncached)))) - dst_ret = rt6_dst_from_check(rt, from, cookie); - else - dst_ret = rt6_check(rt, from, cookie); -@@ -6477,7 +6477,7 @@ static int __net_init ip6_route_net_init(struct net *net) - net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_init_metrics(&net->ipv6.ip6_null_entry->dst, - ip6_template_metrics, true); -- INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached); -+ INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached); - - #ifdef CONFIG_IPV6_MULTIPLE_TABLES - net->ipv6.fib6_has_custom_rules = false; -@@ -6489,7 +6489,7 @@ static int __net_init ip6_route_net_init(struct net *net) - net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, - ip6_template_metrics, true); -- INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached); -+ INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached); - - net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, - sizeof(*net->ipv6.ip6_blk_hole_entry), -@@ -6499,7 +6499,7 @@ static int __net_init ip6_route_net_init(struct net *net) - net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, - ip6_template_metrics, true); -- INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached); -+ INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached); - #ifdef CONFIG_IPV6_SUBTREES - net->ipv6.fib6_routes_require_src = 0; - #endif -diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c -index ea435eba3053..2b493f8d0091 100644 ---- a/net/ipv6/xfrm6_policy.c -+++ b/net/ipv6/xfrm6_policy.c -@@ -89,7 +89,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; - xdst->u.rt6.rt6i_dst = rt->rt6i_dst; - xdst->u.rt6.rt6i_src = rt->rt6i_src; -- INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); -+ INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached); - rt6_uncached_list_add(&xdst->u.rt6); - - return 0; -@@ -121,7 +121,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) - if (likely(xdst->u.rt6.rt6i_idev)) - in6_dev_put(xdst->u.rt6.rt6i_idev); - dst_destroy_metrics_generic(dst); -- if (xdst->u.rt6.rt6i_uncached_list) -+ if (xdst->u.rt6.dst.rt_uncached_list) - rt6_uncached_list_del(&xdst->u.rt6); - xfrm_dst_destroy(xdst); - } -diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c -index 80448885c3d7..99c349c0d968 100644 ---- a/net/netfilter/ipvs/ip_vs_xmit.c -+++ b/net/netfilter/ipvs/ip_vs_xmit.c -@@ -339,7 +339,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, - spin_unlock_bh(&dest->dst_lock); - IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", - &dest->addr.ip, &dest_dst->dst_saddr.ip, -- atomic_read(&rt->dst.__refcnt)); -+ rcuref_read(&rt->dst.__rcuref)); - } - if (ret_saddr) - *ret_saddr = dest_dst->dst_saddr.ip; -@@ -507,7 +507,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, - spin_unlock_bh(&dest->dst_lock); - IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", - &dest->addr.in6, &dest_dst->dst_saddr.in6, -- atomic_read(&rt->dst.__refcnt)); -+ rcuref_read(&rt->dst.__rcuref)); - } - if (ret_saddr) - *ret_saddr = dest_dst->dst_saddr.in6; diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 0edfdb40364b..ae52d3b3f063 100644 --- a/scripts/Makefile.vmlinux_o @@ -13285,45 +9190,6 @@ index 0edfdb40364b..ae52d3b3f063 100644 targets := .tmp_initcalls.lds -diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl -index fbee2f6190d9..85ca8d9b5c27 100644 ---- a/scripts/atomic/atomics.tbl -+++ b/scripts/atomic/atomics.tbl -@@ -33,7 +33,7 @@ try_cmpxchg B v p:old i:new - sub_and_test b i v - dec_and_test b v - inc_and_test b v --add_negative b i v -+add_negative B i v - add_unless fb v i:a i:u - inc_not_zero b v - inc_unless_negative b v -diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative -index 15caa2eb2371..e5980abf5904 100755 ---- a/scripts/atomic/fallbacks/add_negative -+++ b/scripts/atomic/fallbacks/add_negative -@@ -1,16 +1,15 @@ - cat <bst_type = CS35L41_EXT_BOOST; hw_cfg->gpio1.func = CS35l41_VSPK_SWITCH; hw_cfg->gpio1.valid = true; -diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c -index d8b5b4930412..05048ebc24d8 100644 ---- a/tools/testing/selftests/mm/ksm_functional_tests.c -+++ b/tools/testing/selftests/mm/ksm_functional_tests.c -@@ -24,9 +24,12 @@ - - #define KiB 1024u - #define MiB (1024 * KiB) -+#define PageSize (4 * KiB) - - static int ksm_fd; - static int ksm_full_scans_fd; -+static int ksm_zero_pages_fd; -+static int ksm_use_zero_pages_fd; - static int pagemap_fd; - static size_t pagesize; - -@@ -57,6 +60,21 @@ static bool range_maps_duplicates(char *addr, unsigned long size) - return false; - } - -+static long ksm_get_zero_pages(void) -+{ -+ char buf[20]; -+ ssize_t read_size; -+ unsigned long ksm_zero_pages; -+ -+ read_size = pread(ksm_zero_pages_fd, buf, sizeof(buf) - 1, 0); -+ if (read_size < 0) -+ return -errno; -+ buf[read_size] = 0; -+ ksm_zero_pages = strtol(buf, NULL, 10); -+ -+ return ksm_zero_pages; -+} -+ - static long ksm_get_full_scans(void) - { - char buf[10]; -@@ -70,15 +88,12 @@ static long ksm_get_full_scans(void) - return strtol(buf, NULL, 10); - } - --static int ksm_merge(void) -+static int wait_two_full_scans(void) - { - long start_scans, end_scans; - -- /* Wait for two full scans such that any possible merging happened. */ - start_scans = ksm_get_full_scans(); - if (start_scans < 0) -- return start_scans; -- if (write(ksm_fd, "1", 1) != 1) - return -errno; - do { - end_scans = ksm_get_full_scans(); -@@ -89,6 +104,34 @@ static int ksm_merge(void) - return 0; - } - -+static inline int ksm_merge(void) -+{ -+ /* Wait for two full scans such that any possible merging happened. */ -+ if (write(ksm_fd, "1", 1) != 1) -+ return -errno; -+ -+ return wait_two_full_scans(); -+} -+ -+static int unmerge_zero_page(char *start, unsigned long size) -+{ -+ int ret; -+ -+ ret = madvise(start, size, MADV_UNMERGEABLE); -+ if (ret) { -+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); -+ return ret; -+ } -+ -+ /* -+ * Wait for two full scans such that any possible unmerging of zero -+ * pages happened. Why? Because the unmerge action of zero pages is not -+ * done in the context of madvise(), but in the context of -+ * unshare_zero_pages() of the ksmd thread. -+ */ -+ return wait_two_full_scans(); -+} -+ - static char *mmap_and_merge_range(char val, unsigned long size) - { - char *map; -@@ -146,6 +189,48 @@ static void test_unmerge(void) - munmap(map, size); - } - -+static void test_unmerge_zero_pages(void) -+{ -+ const unsigned int size = 2 * MiB; -+ char *map; -+ unsigned long pages_expected; -+ -+ ksft_print_msg("[RUN] %s\n", __func__); -+ -+ /* Confirm the interfaces*/ -+ if (ksm_zero_pages_fd < 0) { -+ ksft_test_result_skip("open(\"/sys/kernel/mm/ksm/zero_pages_sharing\") failed\n"); -+ return; -+ } -+ if (ksm_use_zero_pages_fd < 0) { -+ ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); -+ return; -+ } -+ if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { -+ ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); -+ return; -+ } -+ -+ /* Mmap zero pages*/ -+ map = mmap_and_merge_range(0x00, size); -+ if (map == MAP_FAILED) -+ return; -+ -+ if (unmerge_zero_page(map + size / 2, size / 2)) -+ goto unmap; -+ -+ /* Check if zero_pages_sharing can be update correctly when unmerge */ -+ pages_expected = (size / 2) / PageSize; -+ ksft_test_result(pages_expected == ksm_get_zero_pages(), -+ "zero page count react to unmerge\n"); -+ -+ /* Check if ksm zero pages are really unmerged */ -+ ksft_test_result(!range_maps_duplicates(map + size / 2, size / 2), -+ "KSM zero pages were unmerged\n"); -+unmap: -+ munmap(map, size); -+} -+ - static void test_unmerge_discarded(void) - { - const unsigned int size = 2 * MiB; -@@ -264,8 +349,11 @@ int main(int argc, char **argv) - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - if (pagemap_fd < 0) - ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); -+ ksm_zero_pages_fd = open("/sys/kernel/mm/ksm/zero_pages_sharing", O_RDONLY); -+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); - - test_unmerge(); -+ test_unmerge_zero_pages(); - test_unmerge_discarded(); - #ifdef __NR_userfaultfd - test_unmerge_uffd_wp(); -- -2.40.0 +2.40.1 -From 7fef8f4cdc6f7d630f4d11b805f4a7707b9b5e7b Mon Sep 17 00:00:00 2001 +From 75780f643d87d4f249b25a14bcc99b767209fa2b Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 17 Apr 2023 18:32:06 +0200 -Subject: [PATCH 05/12] Implement amd-pstate guided driver +Subject: [PATCH 5/8] Implement amd-pstate guided driver Signed-off-by: Peter Jung --- @@ -13508,7 +9222,7 @@ Signed-off-by: Peter Jung 6 files changed, 312 insertions(+), 92 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 736233f95d59..cb4d7e74c71a 100644 +index 97303fa40350..dddaba21a9a7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -339,6 +339,29 @@ @@ -13541,7 +9255,7 @@ index 736233f95d59..cb4d7e74c71a 100644 amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , -@@ -7074,20 +7097,3 @@ +@@ -7077,20 +7100,3 @@ xmon commands. off xmon is disabled. @@ -14157,12 +9871,12 @@ index f5f22418e64b..c10ebf8c42e6 100644 }; #endif /* _LINUX_AMD_PSTATE_H */ -- -2.40.0 +2.40.1 -From d2c339f6d5b8f4f030e6f4cfcf7fe12277dd5e39 Mon Sep 17 00:00:00 2001 +From bf906393dd0d9e24858f3cfd6a9a5d890817cbf6 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 17 Apr 2023 18:28:52 +0200 -Subject: [PATCH 06/12] ksm +Subject: [PATCH 6/8] ksm Signed-off-by: Peter Jung --- @@ -14422,10 +10136,10 @@ index 860b2dcf3ac4..810e1fcaff94 100644 COND_SYSCALL(mbind); COND_SYSCALL(get_mempolicy); diff --git a/mm/ksm.c b/mm/ksm.c -index 82029f1d454b..0c206bd8007d 100644 +index 2b8d30068cbb..ab9a157873f4 100644 --- a/mm/ksm.c +++ b/mm/ksm.c -@@ -2576,52 +2576,76 @@ static int ksm_scan_thread(void *nothing) +@@ -2471,52 +2471,76 @@ static int ksm_scan_thread(void *nothing) return 0; } @@ -14657,2812 +10371,12 @@ index 340125d08c03..36e756355f04 100644 +subsys_initcall(pmadv_sysfs_init); +#endif /* CONFIG_KSM */ -- -2.40.0 +2.40.1 -From f64a9cb164da867b7437208dd63cf58a4faa33f2 Mon Sep 17 00:00:00 2001 +From 2f73f41267f19f290a306fde77bc648cc321f8d6 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 24 Apr 2023 12:49:39 +0200 -Subject: [PATCH 07/12] maple-lru - -Signed-off-by: Peter Jung ---- - Documentation/mm/multigen_lru.rst | 44 ++++++++-- - include/linux/mmzone.h | 10 +-- - lib/maple_tree.c | 78 ++++++----------- - lib/test_maple_tree.c | 27 ++++-- - mm/vmscan.c | 136 +++++++++++------------------- - tools/testing/radix-tree/maple.c | 24 ++++++ - 6 files changed, 163 insertions(+), 156 deletions(-) - -diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst -index 5f1f6ecbb79b..52ed5092022f 100644 ---- a/Documentation/mm/multigen_lru.rst -+++ b/Documentation/mm/multigen_lru.rst -@@ -103,7 +103,8 @@ moving across tiers only involves atomic operations on - ``folio->flags`` and therefore has a negligible cost. A feedback loop - modeled after the PID controller monitors refaults over all the tiers - from anon and file types and decides which tiers from which types to --evict or protect. -+evict or protect. The desired effect is to balance refault percentages -+between anon and file types proportional to the swappiness level. - - There are two conceptually independent procedures: the aging and the - eviction. They form a closed-loop system, i.e., the page reclaim. -@@ -156,6 +157,27 @@ This time-based approach has the following advantages: - and memory sizes. - 2. It is more reliable because it is directly wired to the OOM killer. - -+``mm_struct`` list -+------------------ -+An ``mm_struct`` list is maintained for each memcg, and an -+``mm_struct`` follows its owner task to the new memcg when this task -+is migrated. -+ -+A page table walker iterates ``lruvec_memcg()->mm_list`` and calls -+``walk_page_range()`` with each ``mm_struct`` on this list to scan -+PTEs. When multiple page table walkers iterate the same list, each of -+them gets a unique ``mm_struct``, and therefore they can run in -+parallel. -+ -+Page table walkers ignore any misplaced pages, e.g., if an -+``mm_struct`` was migrated, pages left in the previous memcg will be -+ignored when the current memcg is under reclaim. Similarly, page table -+walkers will ignore pages from nodes other than the one under reclaim. -+ -+This infrastructure also tracks the usage of ``mm_struct`` between -+context switches so that page table walkers can skip processes that -+have been sleeping since the last iteration. -+ - Rmap/PT walk feedback - --------------------- - Searching the rmap for PTEs mapping each page on an LRU list (to test -@@ -170,7 +192,7 @@ promotes hot pages. If the scan was done cacheline efficiently, it - adds the PMD entry pointing to the PTE table to the Bloom filter. This - forms a feedback loop between the eviction and the aging. - --Bloom Filters -+Bloom filters - ------------- - Bloom filters are a space and memory efficient data structure for set - membership test, i.e., test if an element is not in the set or may be -@@ -186,6 +208,18 @@ is false positive, the cost is an additional scan of a range of PTEs, - which may yield hot pages anyway. Parameters of the filter itself can - control the false positive rate in the limit. - -+PID controller -+-------------- -+A feedback loop modeled after the Proportional-Integral-Derivative -+(PID) controller monitors refaults over anon and file types and -+decides which type to evict when both types are available from the -+same generation. -+ -+The PID controller uses generations rather than the wall clock as the -+time domain because a CPU can scan pages at different rates under -+varying memory pressure. It calculates a moving average for each new -+generation to avoid being permanently locked in a suboptimal state. -+ - Memcg LRU - --------- - An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs, -@@ -223,9 +257,9 @@ parts: - - * Generations - * Rmap walks --* Page table walks --* Bloom filters --* PID controller -+* Page table walks via ``mm_struct`` list -+* Bloom filters for rmap/PT walk feedback -+* PID controller for refault feedback - - The aging and the eviction form a producer-consumer model; - specifically, the latter drives the former by the sliding window over -diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index 9fb1b03b83b2..cabe7f51ea66 100644 ---- a/include/linux/mmzone.h -+++ b/include/linux/mmzone.h -@@ -453,18 +453,14 @@ enum { - struct lru_gen_mm_state { - /* set to max_seq after each iteration */ - unsigned long seq; -- /* where the current iteration continues (inclusive) */ -+ /* where the current iteration continues after */ - struct list_head *head; -- /* where the last iteration ended (exclusive) */ -+ /* where the last iteration ended before */ - struct list_head *tail; -- /* to wait for the last page table walker to finish */ -- struct wait_queue_head wait; - /* Bloom filters flip after each iteration */ - unsigned long *filters[NR_BLOOM_FILTERS]; - /* the mm stats for debugging */ - unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; -- /* the number of concurrent page table walkers */ -- int nr_walkers; - }; - - struct lru_gen_mm_walk { -@@ -1369,7 +1365,7 @@ typedef struct pglist_data { - - #ifdef CONFIG_LRU_GEN - /* kswap mm walk data */ -- struct lru_gen_mm_walk mm_walk; -+ struct lru_gen_mm_walk mm_walk; - /* lru_gen_folio list */ - struct lru_gen_memcg memcg_lru; - #endif -diff --git a/lib/maple_tree.c b/lib/maple_tree.c -index 1281a40d5735..110a36479dce 100644 ---- a/lib/maple_tree.c -+++ b/lib/maple_tree.c -@@ -2312,9 +2312,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) - static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) - { - struct ma_state *mas = wr_mas->mas; -- unsigned char count; -- unsigned char offset; -- unsigned long index, min, max; -+ unsigned char count, offset; - - if (unlikely(ma_is_dense(wr_mas->type))) { - wr_mas->r_max = wr_mas->r_min = mas->index; -@@ -2327,34 +2325,12 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) - count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, - wr_mas->pivots, mas->max); - offset = mas->offset; -- min = mas_safe_min(mas, wr_mas->pivots, offset); -- if (unlikely(offset == count)) -- goto max; -- -- max = wr_mas->pivots[offset]; -- index = mas->index; -- if (unlikely(index <= max)) -- goto done; -- -- if (unlikely(!max && offset)) -- goto max; -- -- min = max + 1; -- while (++offset < count) { -- max = wr_mas->pivots[offset]; -- if (index <= max) -- goto done; -- else if (unlikely(!max)) -- break; - -- min = max + 1; -- } -+ while (offset < count && mas->index > wr_mas->pivots[offset]) -+ offset++; - --max: -- max = mas->max; --done: -- wr_mas->r_max = max; -- wr_mas->r_min = min; -+ wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; -+ wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); - wr_mas->offset_end = mas->offset = offset; - } - -@@ -3282,7 +3258,7 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end - - if (tmp < max_p) - memset(pivs + tmp, 0, -- sizeof(unsigned long *) * (max_p - tmp)); -+ sizeof(unsigned long) * (max_p - tmp)); - - if (tmp < mt_slots[mt]) - memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); -@@ -5274,25 +5250,28 @@ static inline void mas_fill_gap(struct ma_state *mas, void *entry, - * @size: The size of the gap - * @fwd: Searching forward or back - */ --static inline void mas_sparse_area(struct ma_state *mas, unsigned long min, -+static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, - unsigned long max, unsigned long size, bool fwd) - { -- unsigned long start = 0; -- -- if (!unlikely(mas_is_none(mas))) -- start++; -+ if (!unlikely(mas_is_none(mas)) && min == 0) { -+ min++; -+ /* -+ * At this time, min is increased, we need to recheck whether -+ * the size is satisfied. -+ */ -+ if (min > max || max - min + 1 < size) -+ return -EBUSY; -+ } - /* mas_is_ptr */ - -- if (start < min) -- start = min; -- - if (fwd) { -- mas->index = start; -- mas->last = start + size - 1; -- return; -+ mas->index = min; -+ mas->last = min + size - 1; -+ } else { -+ mas->last = max; -+ mas->index = max - size + 1; - } -- -- mas->index = max; -+ return 0; - } - - /* -@@ -5321,10 +5300,8 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, - return -EBUSY; - - /* Empty set */ -- if (mas_is_none(mas) || mas_is_ptr(mas)) { -- mas_sparse_area(mas, min, max, size, true); -- return 0; -- } -+ if (mas_is_none(mas) || mas_is_ptr(mas)) -+ return mas_sparse_area(mas, min, max, size, true); - - /* The start of the window can only be within these values */ - mas->index = min; -@@ -5380,10 +5357,8 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, - } - - /* Empty set. */ -- if (mas_is_none(mas) || mas_is_ptr(mas)) { -- mas_sparse_area(mas, min, max, size, false); -- return 0; -- } -+ if (mas_is_none(mas) || mas_is_ptr(mas)) -+ return mas_sparse_area(mas, min, max, size, false); - - /* The start of the window can only be within these values. */ - mas->index = min; -@@ -5815,6 +5790,7 @@ int mas_preallocate(struct ma_state *mas, gfp_t gfp) - mas_reset(mas); - return ret; - } -+EXPORT_SYMBOL_GPL(mas_preallocate); - - /* - * mas_destroy() - destroy a maple state. -diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c -index f1db333270e9..4d85d04b26f8 100644 ---- a/lib/test_maple_tree.c -+++ b/lib/test_maple_tree.c -@@ -102,7 +102,7 @@ static noinline void check_mtree_alloc_rrange(struct maple_tree *mt, - unsigned long result = expected + 1; - int ret; - -- ret = mtree_alloc_rrange(mt, &result, ptr, size, start, end - 1, -+ ret = mtree_alloc_rrange(mt, &result, ptr, size, start, end, - GFP_KERNEL); - MT_BUG_ON(mt, ret != eret); - if (ret) -@@ -680,7 +680,7 @@ static noinline void check_alloc_rev_range(struct maple_tree *mt) - 0, /* Return value success. */ - - 0x0, /* Min */ -- 0x565234AF1 << 12, /* Max */ -+ 0x565234AF0 << 12, /* Max */ - 0x3000, /* Size */ - 0x565234AEE << 12, /* max - 3. */ - 0, /* Return value success. */ -@@ -692,14 +692,14 @@ static noinline void check_alloc_rev_range(struct maple_tree *mt) - 0, /* Return value success. */ - - 0x0, /* Min */ -- 0x7F36D510A << 12, /* Max */ -+ 0x7F36D5109 << 12, /* Max */ - 0x4000, /* Size */ - 0x7F36D5106 << 12, /* First rev hole of size 0x4000 */ - 0, /* Return value success. */ - - /* Ascend test. */ - 0x0, -- 34148798629 << 12, -+ 34148798628 << 12, - 19 << 12, - 34148797418 << 12, - 0x0, -@@ -711,6 +711,12 @@ static noinline void check_alloc_rev_range(struct maple_tree *mt) - 0x0, - -EBUSY, - -+ /* Single space test. */ -+ 34148798725 << 12, -+ 34148798725 << 12, -+ 1 << 12, -+ 34148798725 << 12, -+ 0, - }; - - int i, range_count = ARRAY_SIZE(range); -@@ -759,9 +765,9 @@ static noinline void check_alloc_rev_range(struct maple_tree *mt) - mas_unlock(&mas); - for (i = 0; i < req_range_count; i += 5) { - #if DEBUG_REV_RANGE -- pr_debug("\tReverse request between %lu-%lu size %lu, should get %lu\n", -- req_range[i] >> 12, -- (req_range[i + 1] >> 12) - 1, -+ pr_debug("\tReverse request %d between %lu-%lu size %lu, should get %lu\n", -+ i, req_range[i] >> 12, -+ (req_range[i + 1] >> 12), - req_range[i+2] >> 12, - req_range[i+3] >> 12); - #endif -@@ -880,6 +886,13 @@ static noinline void check_alloc_range(struct maple_tree *mt) - 4503599618982063UL << 12, /* Size */ - 34359052178 << 12, /* Expected location */ - -EBUSY, /* Return failure. */ -+ -+ /* Test a single entry */ -+ 34148798648 << 12, /* Min */ -+ 34148798648 << 12, /* Max */ -+ 4096, /* Size of 1 */ -+ 34148798648 << 12, /* Location is the same as min/max */ -+ 0, /* Success */ - }; - int i, range_count = ARRAY_SIZE(range); - int req_range_count = ARRAY_SIZE(req_range); -diff --git a/mm/vmscan.c b/mm/vmscan.c -index 71a7f4517e5a..ae60ddff831a 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -3398,18 +3398,13 @@ void lru_gen_del_mm(struct mm_struct *mm) - for_each_node(nid) { - struct lruvec *lruvec = get_lruvec(memcg, nid); - -- /* where the last iteration ended (exclusive) */ -+ /* where the current iteration continues after */ -+ if (lruvec->mm_state.head == &mm->lru_gen.list) -+ lruvec->mm_state.head = lruvec->mm_state.head->prev; -+ -+ /* where the last iteration ended before */ - if (lruvec->mm_state.tail == &mm->lru_gen.list) - lruvec->mm_state.tail = lruvec->mm_state.tail->next; -- -- /* where the current iteration continues (inclusive) */ -- if (lruvec->mm_state.head != &mm->lru_gen.list) -- continue; -- -- lruvec->mm_state.head = lruvec->mm_state.head->next; -- /* the deletion ends the current iteration */ -- if (lruvec->mm_state.head == &mm_list->fifo) -- WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); - } - - list_del_init(&mm->lru_gen.list); -@@ -3505,68 +3500,54 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, - struct mm_struct **iter) - { - bool first = false; -- bool last = true; -+ bool last = false; - struct mm_struct *mm = NULL; - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - struct lru_gen_mm_list *mm_list = get_mm_list(memcg); - struct lru_gen_mm_state *mm_state = &lruvec->mm_state; - - /* -- * There are four interesting cases for this page table walker: -- * 1. It tries to start a new iteration of mm_list with a stale max_seq; -- * there is nothing left to do. -- * 2. It's the first of the current generation, and it needs to reset -- * the Bloom filter for the next generation. -- * 3. It reaches the end of mm_list, and it needs to increment -- * mm_state->seq; the iteration is done. -- * 4. It's the last of the current generation, and it needs to reset the -- * mm stats counters for the next generation. -+ * mm_state->seq is incremented after each iteration of mm_list. There -+ * are three interesting cases for this page table walker: -+ * 1. It tries to start a new iteration with a stale max_seq: there is -+ * nothing left to do. -+ * 2. It started the next iteration: it needs to reset the Bloom filter -+ * so that a fresh set of PTE tables can be recorded. -+ * 3. It ended the current iteration: it needs to reset the mm stats -+ * counters and tell its caller to increment max_seq. - */ - spin_lock(&mm_list->lock); - - VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq); -- VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq); -- VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers); - -- if (walk->max_seq <= mm_state->seq) { -- if (!*iter) -- last = false; -+ if (walk->max_seq <= mm_state->seq) - goto done; -- } - -- if (!mm_state->nr_walkers) { -- VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); -+ if (!mm_state->head) -+ mm_state->head = &mm_list->fifo; - -- mm_state->head = mm_list->fifo.next; -+ if (mm_state->head == &mm_list->fifo) - first = true; -- } -- -- while (!mm && mm_state->head != &mm_list->fifo) { -- mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); - -+ do { - mm_state->head = mm_state->head->next; -+ if (mm_state->head == &mm_list->fifo) { -+ WRITE_ONCE(mm_state->seq, mm_state->seq + 1); -+ last = true; -+ break; -+ } - - /* force scan for those added after the last iteration */ -- if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { -- mm_state->tail = mm_state->head; -+ if (!mm_state->tail || mm_state->tail == mm_state->head) { -+ mm_state->tail = mm_state->head->next; - walk->force_scan = true; - } - -+ mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); - if (should_skip_mm(mm, walk)) - mm = NULL; -- } -- -- if (mm_state->head == &mm_list->fifo) -- WRITE_ONCE(mm_state->seq, mm_state->seq + 1); -+ } while (!mm); - done: -- if (*iter && !mm) -- mm_state->nr_walkers--; -- if (!*iter && mm) -- mm_state->nr_walkers++; -- -- if (mm_state->nr_walkers) -- last = false; -- - if (*iter || last) - reset_mm_stats(lruvec, walk, last); - -@@ -3594,9 +3575,9 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) - - VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); - -- if (max_seq > mm_state->seq && !mm_state->nr_walkers) { -- VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); -- -+ if (max_seq > mm_state->seq) { -+ mm_state->head = NULL; -+ mm_state->tail = NULL; - WRITE_ONCE(mm_state->seq, mm_state->seq + 1); - reset_mm_stats(lruvec, NULL, true); - success = true; -@@ -3608,7 +3589,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) - } - - /****************************************************************************** -- * refault feedback loop -+ * PID controller - ******************************************************************************/ - - /* -@@ -4196,10 +4177,6 @@ static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, - - walk_pmd_range(&val, addr, next, args); - -- /* a racy check to curtail the waiting time */ -- if (wq_has_sleeper(&walk->lruvec->mm_state.wait)) -- return 1; -- - if (need_resched() || walk->batched >= MAX_LRU_BATCH) { - end = (addr | ~PUD_MASK) + 1; - goto done; -@@ -4232,8 +4209,14 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ - walk->next_addr = FIRST_USER_ADDRESS; - - do { -+ DEFINE_MAX_SEQ(lruvec); -+ - err = -EBUSY; - -+ /* another thread might have called inc_max_seq() */ -+ if (walk->max_seq != max_seq) -+ break; -+ - /* folio_update_gen() requires stable folio_memcg() */ - if (!mem_cgroup_trylock_pages(memcg)) - break; -@@ -4466,25 +4449,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, - success = iterate_mm_list(lruvec, walk, &mm); - if (mm) - walk_mm(lruvec, mm, walk); -- -- cond_resched(); - } while (mm); - done: -- if (!success) { -- if (sc->priority <= DEF_PRIORITY - 2) -- wait_event_killable(lruvec->mm_state.wait, -- max_seq < READ_ONCE(lrugen->max_seq)); -- return false; -- } -- -- VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); -+ if (success) -+ inc_max_seq(lruvec, can_swap, force_scan); - -- inc_max_seq(lruvec, can_swap, force_scan); -- /* either this sees any waiters or they will see updated max_seq */ -- if (wq_has_sleeper(&lruvec->mm_state.wait)) -- wake_up_all(&lruvec->mm_state.wait); -- -- return true; -+ return success; - } - - /****************************************************************************** -@@ -5671,14 +5641,14 @@ static void lru_gen_change_state(bool enabled) - * sysfs interface - ******************************************************************************/ - --static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -+static ssize_t min_ttl_ms_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) - { -- return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); -+ return sysfs_emit(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); - } - - /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ --static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, -- const char *buf, size_t len) -+static ssize_t min_ttl_ms_store(struct kobject *kobj, struct kobj_attribute *attr, -+ const char *buf, size_t len) - { - unsigned int msecs; - -@@ -5690,11 +5660,9 @@ static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, - return len; - } - --static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR( -- min_ttl_ms, 0644, show_min_ttl, store_min_ttl --); -+static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR_RW(min_ttl_ms); - --static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -+static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) - { - unsigned int caps = 0; - -@@ -5711,7 +5679,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c - } - - /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ --static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr, -+static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t len) - { - int i; -@@ -5738,9 +5706,7 @@ static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr, - return len; - } - --static struct kobj_attribute lru_gen_enabled_attr = __ATTR( -- enabled, 0644, show_enabled, store_enabled --); -+static struct kobj_attribute lru_gen_enabled_attr = __ATTR_RW(enabled); - - static struct attribute *lru_gen_attrs[] = { - &lru_gen_min_ttl_attr.attr, -@@ -5748,7 +5714,7 @@ static struct attribute *lru_gen_attrs[] = { - NULL - }; - --static struct attribute_group lru_gen_attr_group = { -+static const struct attribute_group lru_gen_attr_group = { - .name = "lru_gen", - .attrs = lru_gen_attrs, - }; -@@ -6130,7 +6096,6 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) - INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); - - lruvec->mm_state.seq = MIN_NR_GENS; -- init_waitqueue_head(&lruvec->mm_state.wait); - } - - #ifdef CONFIG_MEMCG -@@ -6163,7 +6128,6 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) - for_each_node(nid) { - struct lruvec *lruvec = get_lruvec(memcg, nid); - -- VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers); - VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, - sizeof(lruvec->lrugen.nr_pages))); - -diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c -index 4c89ff333f6f..9286d3baa12d 100644 ---- a/tools/testing/radix-tree/maple.c -+++ b/tools/testing/radix-tree/maple.c -@@ -55,6 +55,28 @@ struct rcu_reader_struct { - struct rcu_test_struct2 *test; - }; - -+static int get_alloc_node_count(struct ma_state *mas) -+{ -+ int count = 1; -+ struct maple_alloc *node = mas->alloc; -+ -+ if (!node || ((unsigned long)node & 0x1)) -+ return 0; -+ while (node->node_count) { -+ count += node->node_count; -+ node = node->slot[0]; -+ } -+ return count; -+} -+ -+static void check_mas_alloc_node_count(struct ma_state *mas) -+{ -+ mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 1, GFP_KERNEL); -+ mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 3, GFP_KERNEL); -+ MT_BUG_ON(mas->tree, get_alloc_node_count(mas) != mas->alloc->total); -+ mas_destroy(mas); -+} -+ - /* - * check_new_node() - Check the creation of new nodes and error path - * verification. -@@ -69,6 +91,8 @@ static noinline void check_new_node(struct maple_tree *mt) - - MA_STATE(mas, mt, 0, 0); - -+ check_mas_alloc_node_count(&mas); -+ - /* Try allocating 3 nodes */ - mtree_lock(mt); - mt_set_non_kernel(0); --- -2.40.0 - -From 3d3a131234eb5f74bcd6bd84c60aa0c9ccd97eac Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 24 Apr 2023 12:50:36 +0200 -Subject: [PATCH 08/12] Per-VMA locks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Previous versions: -v3: https://lore.kernel.org/all/20230216051750.3125598-1-surenb@google.com/ -v2: https://lore.kernel.org/lkml/20230127194110.533103-1-surenb@google.com/ -v1: https://lore.kernel.org/all/20230109205336.3665937-1-surenb@google.com/ -RFC: https://lore.kernel.org/all/20220901173516.702122-1-surenb@google.com/ - -LWN article describing the feature: -https://lwn.net/Articles/906852/ - -Per-vma locks idea that was discussed during SPF [1] discussion at LSF/MM -last year [2], which concluded with suggestion that “a reader/writer -semaphore could be put into the VMA itself; that would have the effect of -using the VMA as a sort of range lock. There would still be contention at -the VMA level, but it would be an improvement.” This patchset implements -this suggested approach. - -When handling page faults we lookup the VMA that contains the faulting -page under RCU protection and try to acquire its lock. If that fails we -fall back to using mmap_lock, similar to how SPF handled this situation. - -One notable way the implementation deviates from the proposal is the way -VMAs are read-locked. During some of mm updates, multiple VMAs need to be -locked until the end of the update (e.g. vma_merge, split_vma, etc). -Tracking all the locked VMAs, avoiding recursive locks, figuring out when -it's safe to unlock previously locked VMAs would make the code more -complex. So, instead of the usual lock/unlock pattern, the proposed -solution marks a VMA as locked and provides an efficient way to: -1. Identify locked VMAs. -2. Unlock all locked VMAs in bulk. -We also postpone unlocking the locked VMAs until the end of the update, -when we do mmap_write_unlock. Potentially this keeps a VMA locked for -longer than is absolutely necessary but it results in a big reduction of -code complexity. -Read-locking a VMA is done using two sequence numbers - one in the -vm_area_struct and one in the mm_struct. VMA is considered read-locked -when these sequence numbers are equal. To read-lock a VMA we set the -sequence number in vm_area_struct to be equal to the sequence number in -mm_struct. To unlock all VMAs we increment mm_struct's seq number. This -allows for an efficient way to track locked VMAs and to drop the locks on -all VMAs at the end of the update. - -The patchset implements per-VMA locking only for anonymous pages which -are not in swap and avoids userfaultfs as their implementation is more -complex. Additional support for file-back page faults, swapped and user -pages can be added incrementally. - -Performance benchmarks show similar although slightly smaller benefits as -with SPF patchset (~75% of SPF benefits). Still, with lower complexity -this approach might be more desirable. - -Since RFC was posted in September 2022, two separate Google teams outside -of Android evaluated the patchset and confirmed positive results. Here are -the known usecases when per-VMA locks show benefits: - -Android: -Apps with high number of threads (~100) launch times improve by up to 20%. -Each thread mmaps several areas upon startup (Stack and Thread-local -storage (TLS), thread signal stack, indirect ref table), which requires -taking mmap_lock in write mode. Page faults take mmap_lock in read mode. -During app launch, both thread creation and page faults establishing the -active workinget are happening in parallel and that causes lock contention -between mm writers and readers even if updates and page faults are -happening in different VMAs. Per-vma locks prevent this contention by -providing more granular lock. - -Google Fibers: -We have several dynamically sized thread pools that spawn new threads -under increased load and reduce their number when idling. For example, -Google's in-process scheduling/threading framework, UMCG/Fibers, is backed -by such a thread pool. When idling, only a small number of idle worker -threads are available; when a spike of incoming requests arrive, each -request is handled in its own "fiber", which is a work item posted onto a -UMCG worker thread; quite often these spikes lead to a number of new -threads spawning. Each new thread needs to allocate and register an RSEQ -section on its TLS, then register itself with the kernel as a UMCG worker -thread, and only after that it can be considered by the in-process -UMCG/Fiber scheduler as available to do useful work. In short, during an -incoming workload spike new threads have to be spawned, and they perform -several syscalls (RSEQ registration, UMCG worker registration, memory -allocations) before they can actually start doing useful work. Removing -any bottlenecks on this thread startup path will greatly improve our -services' latencies when faced with request/workload spikes. -At high scale, mmap_lock contention during thread creation and stack page -faults leads to user-visible multi-second serving latencies in a similar -pattern to Android app startup. Per-VMA locking patchset has been run -successfully in limited experiments with user-facing production workloads. -In these experiments, we observed that the peak thread creation rate was -high enough that thread creation is no longer a bottleneck. - -TCP zerocopy receive: -From the point of view of TCP zerocopy receive, the per-vma lock patch is -massively beneficial. -In today's implementation, a process with N threads where N - 1 are -performing zerocopy receive and 1 thread is performing madvise() with the -write lock taken (e.g. needs to change vm_flags) will result in all N -1 -receive threads blocking until the madvise is done. Conversely, on a busy -process receiving a lot of data, an madvise operation that does need to -take the mmap lock in write mode will need to wait for all of the receives -to be done - a lose:lose proposition. Per-VMA locking _removes_ by -definition this source of contention entirely. -There are other benefits for receive as well, chiefly a reduction in -cacheline bouncing across receiving threads for locking/unlocking the -single mmap lock. On an RPC style synthetic workload with 4KB RPCs: -1a) The find+lock+unlock VMA path in the base case, without the per-vma -lock patchset, is about 0.7% of cycles as measured by perf. -1b) mmap_read_lock + mmap_read_unlock in the base case is about 0.5% -cycles overall - most of this is within the TCP read hotpath (a small -fraction is 'other' usage in the system). -2a) The find+lock+unlock VMA path, with the per-vma patchset and a trivial -patch written to take advantage of it in TCP, is about 0.4% of cycles -(down from 0.7% above) -2b) mmap_read_lock + mmap_read_unlock in the per-vma patchset is < 0.1% -cycles and is out of the TCP read hotpath entirely (down from 0.5% before, -the remaining usage is the 'other' usage in the system). -So, in addition to entirely removing an onerous source of contention, it -also reduces the CPU cycles of TCP receive zerocopy by about 0.5%+ -(compared to overall cycles in perf) for the 'small' RPC scenario. - -The patchset structure is: -0001-0008: Enable maple-tree RCU mode -0009-0031: Main per-vma locks patchset -0032-0033: Performance optimizations - -Changes since v3: -- Changed patch [3] to move vma_prepare before vma_adjust_trans_huge -- Dropped patch [4] from the set as unnecessary, per Hyeonggon Yoo -- Changed patch [5] to do VMA locking inside vma_prepare, per Liam Howlett -- Dropped patch [6] from the set as unnecessary, per Liam Howlett - -[1] https://lore.kernel.org/all/20220128131006.67712-1-michel@lespinasse.org/ -[2] https://lwn.net/Articles/893906/ -[3] https://lore.kernel.org/all/20230216051750.3125598-15-surenb@google.com/ -[4] https://lore.kernel.org/all/20230216051750.3125598-17-surenb@google.com/ -[5] https://lore.kernel.org/all/20230216051750.3125598-18-surenb@google.com/ -[6] https://lore.kernel.org/all/20230216051750.3125598-22-surenb@google.com/ - -The patchset applies cleanly over mm-unstable branch. - -Laurent Dufour (1): - powerc/mm: try VMA lock-based page fault handling first - -Liam Howlett (4): - maple_tree: Be more cautious about dead nodes - maple_tree: Detect dead nodes in mas_start() - maple_tree: Fix freeing of nodes in rcu mode - maple_tree: remove extra smp_wmb() from mas_dead_leaves() - -Liam R. Howlett (4): - maple_tree: Fix write memory barrier of nodes once dead for RCU mode - maple_tree: Add smp_rmb() to dead node detection - maple_tree: Add RCU lock checking to rcu callback functions - mm: Enable maple tree RCU mode by default. - -Michel Lespinasse (1): - mm: rcu safe VMA freeing - -Suren Baghdasaryan (23): - mm: introduce CONFIG_PER_VMA_LOCK - mm: move mmap_lock assert function definitions - mm: add per-VMA lock and helper functions to control it - mm: mark VMA as being written when changing vm_flags - mm/mmap: move vma_prepare before vma_adjust_trans_huge - mm/khugepaged: write-lock VMA while collapsing a huge page - mm/mmap: write-lock VMAs in vma_prepare before modifying them - mm/mremap: write-lock VMA while remapping it to a new address range - mm: write-lock VMAs before removing them from VMA tree - mm: conditionally write-lock VMA in free_pgtables - kernel/fork: assert no VMA readers during its destruction - mm/mmap: prevent pagefault handler from racing with mmu_notifier - registration - mm: introduce vma detached flag - mm: introduce lock_vma_under_rcu to be used from arch-specific code - mm: fall back to mmap_lock if vma->anon_vma is not yet set - mm: add FAULT_FLAG_VMA_LOCK flag - mm: prevent do_swap_page from handling page faults under VMA lock - mm: prevent userfaults to be handled under per-vma lock - mm: introduce per-VMA lock statistics - x86/mm: try VMA lock-based page fault handling first - arm64/mm: try VMA lock-based page fault handling first - mm/mmap: free vm_area_struct without call_rcu in exit_mmap - mm: separate vma->lock from vm_area_struct - -Signed-off-by: Peter Jung ---- - Documentation/admin-guide/mm/userfaultfd.rst | 17 ++ - arch/arm64/Kconfig | 1 + - arch/arm64/mm/fault.c | 36 ++++ - arch/powerpc/mm/fault.c | 37 ++++ - arch/powerpc/platforms/powernv/Kconfig | 1 + - arch/powerpc/platforms/pseries/Kconfig | 1 + - arch/s390/Kconfig | 1 + - arch/s390/mm/fault.c | 24 +++ - arch/x86/Kconfig | 1 + - arch/x86/mm/fault.c | 36 ++++ - fs/userfaultfd.c | 16 ++ - include/linux/mm.h | 127 ++++++++++++- - include/linux/mm_inline.h | 6 + - include/linux/mm_types.h | 30 ++- - include/linux/mmap_lock.h | 37 ++-- - include/linux/userfaultfd_k.h | 23 +++ - include/linux/vm_event_item.h | 6 + - include/linux/vmstat.h | 6 + - include/uapi/linux/userfaultfd.h | 10 +- - kernel/fork.c | 96 ++++++++-- - mm/Kconfig | 12 ++ - mm/Kconfig.debug | 6 + - mm/filemap.c | 6 + - mm/hugetlb.c | 4 + - mm/init-mm.c | 3 + - mm/internal.h | 2 +- - mm/khugepaged.c | 10 +- - mm/memory.c | 187 +++++++++++++++---- - mm/mmap.c | 48 +++-- - mm/mprotect.c | 51 ++++- - mm/mremap.c | 1 + - mm/rmap.c | 31 +-- - mm/vmstat.c | 6 + - tools/testing/selftests/mm/userfaultfd.c | 45 ++++- - 34 files changed, 811 insertions(+), 113 deletions(-) - -diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst -index 7dc823b56ca4..bd2226299583 100644 ---- a/Documentation/admin-guide/mm/userfaultfd.rst -+++ b/Documentation/admin-guide/mm/userfaultfd.rst -@@ -219,6 +219,23 @@ former will have ``UFFD_PAGEFAULT_FLAG_WP`` set, the latter - you still need to supply a page when ``UFFDIO_REGISTER_MODE_MISSING`` was - used. - -+Userfaultfd write-protect mode currently behave differently on none ptes -+(when e.g. page is missing) over different types of memories. -+ -+For anonymous memory, ``ioctl(UFFDIO_WRITEPROTECT)`` will ignore none ptes -+(e.g. when pages are missing and not populated). For file-backed memories -+like shmem and hugetlbfs, none ptes will be write protected just like a -+present pte. In other words, there will be a userfaultfd write fault -+message generated when writing to a missing page on file typed memories, -+as long as the page range was write-protected before. Such a message will -+not be generated on anonymous memories by default. -+ -+If the application wants to be able to write protect none ptes on anonymous -+memory, one can pre-populate the memory with e.g. MADV_POPULATE_READ. On -+newer kernels, one can also detect the feature UFFD_FEATURE_WP_UNPOPULATED -+and set the feature bit in advance to make sure none ptes will also be -+write protected even upon anonymous memory. -+ - QEMU/KVM - ======== - -diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig -index 1023e896d46b..6f104c829731 100644 ---- a/arch/arm64/Kconfig -+++ b/arch/arm64/Kconfig -@@ -95,6 +95,7 @@ config ARM64 - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 - select ARCH_SUPPORTS_NUMA_BALANCING - select ARCH_SUPPORTS_PAGE_TABLE_CHECK -+ select ARCH_SUPPORTS_PER_VMA_LOCK - select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT - select ARCH_WANT_DEFAULT_BPF_JIT - select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c -index f4cb0f85ccf4..9e0db5c387e3 100644 ---- a/arch/arm64/mm/fault.c -+++ b/arch/arm64/mm/fault.c -@@ -535,6 +535,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, - unsigned long vm_flags; - unsigned int mm_flags = FAULT_FLAG_DEFAULT; - unsigned long addr = untagged_addr(far); -+#ifdef CONFIG_PER_VMA_LOCK -+ struct vm_area_struct *vma; -+#endif - - if (kprobe_page_fault(regs, esr)) - return 0; -@@ -585,6 +588,36 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - -+#ifdef CONFIG_PER_VMA_LOCK -+ if (!(mm_flags & FAULT_FLAG_USER)) -+ goto lock_mmap; -+ -+ vma = lock_vma_under_rcu(mm, addr); -+ if (!vma) -+ goto lock_mmap; -+ -+ if (!(vma->vm_flags & vm_flags)) { -+ vma_end_read(vma); -+ goto lock_mmap; -+ } -+ fault = handle_mm_fault(vma, addr & PAGE_MASK, -+ mm_flags | FAULT_FLAG_VMA_LOCK, regs); -+ vma_end_read(vma); -+ -+ if (!(fault & VM_FAULT_RETRY)) { -+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS); -+ goto done; -+ } -+ count_vm_vma_lock_event(VMA_LOCK_RETRY); -+ -+ /* Quick path to respond to signals */ -+ if (fault_signal_pending(fault, regs)) { -+ if (!user_mode(regs)) -+ goto no_context; -+ return 0; -+ } -+lock_mmap: -+#endif /* CONFIG_PER_VMA_LOCK */ - /* - * As per x86, we may deadlock here. However, since the kernel only - * validly references user space from well defined areas of the code, -@@ -628,6 +661,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, - } - mmap_read_unlock(mm); - -+#ifdef CONFIG_PER_VMA_LOCK -+done: -+#endif - /* - * Handle the "normal" (no error) case first. - */ -diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c -index af46aa88422b..531177a4ee08 100644 ---- a/arch/powerpc/mm/fault.c -+++ b/arch/powerpc/mm/fault.c -@@ -474,6 +474,40 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, - if (is_exec) - flags |= FAULT_FLAG_INSTRUCTION; - -+#ifdef CONFIG_PER_VMA_LOCK -+ if (!(flags & FAULT_FLAG_USER)) -+ goto lock_mmap; -+ -+ vma = lock_vma_under_rcu(mm, address); -+ if (!vma) -+ goto lock_mmap; -+ -+ if (unlikely(access_pkey_error(is_write, is_exec, -+ (error_code & DSISR_KEYFAULT), vma))) { -+ vma_end_read(vma); -+ goto lock_mmap; -+ } -+ -+ if (unlikely(access_error(is_write, is_exec, vma))) { -+ vma_end_read(vma); -+ goto lock_mmap; -+ } -+ -+ fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); -+ vma_end_read(vma); -+ -+ if (!(fault & VM_FAULT_RETRY)) { -+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS); -+ goto done; -+ } -+ count_vm_vma_lock_event(VMA_LOCK_RETRY); -+ -+ if (fault_signal_pending(fault, regs)) -+ return user_mode(regs) ? 0 : SIGBUS; -+ -+lock_mmap: -+#endif /* CONFIG_PER_VMA_LOCK */ -+ - /* When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunately, in the case of an -@@ -550,6 +584,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, - - mmap_read_unlock(current->mm); - -+#ifdef CONFIG_PER_VMA_LOCK -+done: -+#endif - if (unlikely(fault & VM_FAULT_ERROR)) - return mm_fault_error(regs, address, fault); - -diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig -index ae248a161b43..70a46acc70d6 100644 ---- a/arch/powerpc/platforms/powernv/Kconfig -+++ b/arch/powerpc/platforms/powernv/Kconfig -@@ -16,6 +16,7 @@ config PPC_POWERNV - select PPC_DOORBELL - select MMU_NOTIFIER - select FORCE_SMP -+ select ARCH_SUPPORTS_PER_VMA_LOCK - default y - - config OPAL_PRD -diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig -index 21b22bf16ce6..4ebf2ef2845d 100644 ---- a/arch/powerpc/platforms/pseries/Kconfig -+++ b/arch/powerpc/platforms/pseries/Kconfig -@@ -22,6 +22,7 @@ config PPC_PSERIES - select HOTPLUG_CPU - select FORCE_SMP - select SWIOTLB -+ select ARCH_SUPPORTS_PER_VMA_LOCK - default y - - config PARAVIRT -diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig -index 9809c74e1240..548b5b587003 100644 ---- a/arch/s390/Kconfig -+++ b/arch/s390/Kconfig -@@ -120,6 +120,7 @@ config S390 - select ARCH_SUPPORTS_DEBUG_PAGEALLOC - select ARCH_SUPPORTS_HUGETLBFS - select ARCH_SUPPORTS_NUMA_BALANCING -+ select ARCH_SUPPORTS_PER_VMA_LOCK - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_WANTS_DYNAMIC_TASK_STRUCT -diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c -index a2632fd97d00..b65144c392b0 100644 ---- a/arch/s390/mm/fault.c -+++ b/arch/s390/mm/fault.c -@@ -407,6 +407,30 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) - access = VM_WRITE; - if (access == VM_WRITE) - flags |= FAULT_FLAG_WRITE; -+#ifdef CONFIG_PER_VMA_LOCK -+ if (!(flags & FAULT_FLAG_USER)) -+ goto lock_mmap; -+ vma = lock_vma_under_rcu(mm, address); -+ if (!vma) -+ goto lock_mmap; -+ if (!(vma->vm_flags & access)) { -+ vma_end_read(vma); -+ goto lock_mmap; -+ } -+ fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); -+ vma_end_read(vma); -+ if (!(fault & VM_FAULT_RETRY)) { -+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS); -+ goto out; -+ } -+ count_vm_vma_lock_event(VMA_LOCK_RETRY); -+ /* Quick path to respond to signals */ -+ if (fault_signal_pending(fault, regs)) { -+ fault = VM_FAULT_SIGNAL; -+ goto out; -+ } -+lock_mmap: -+#endif /* CONFIG_PER_VMA_LOCK */ - mmap_read_lock(mm); - - gmap = NULL; -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index a825bf031f49..df21fba77db1 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -27,6 +27,7 @@ config X86_64 - # Options that are inherently 64-bit kernel only: - select ARCH_HAS_GIGANTIC_PAGE - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 -+ select ARCH_SUPPORTS_PER_VMA_LOCK - select ARCH_USE_CMPXCHG_LOCKREF - select HAVE_ARCH_SOFT_DIRTY - select MODULES_USE_ELF_RELA -diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c -index a498ae1fbe66..e4399983c50c 100644 ---- a/arch/x86/mm/fault.c -+++ b/arch/x86/mm/fault.c -@@ -19,6 +19,7 @@ - #include /* faulthandler_disabled() */ - #include /* efi_crash_gracefully_on_page_fault()*/ - #include -+#include /* find_and_lock_vma() */ - - #include /* boot_cpu_has, ... */ - #include /* dotraplinkage, ... */ -@@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, - } - #endif - -+#ifdef CONFIG_PER_VMA_LOCK -+ if (!(flags & FAULT_FLAG_USER)) -+ goto lock_mmap; -+ -+ vma = lock_vma_under_rcu(mm, address); -+ if (!vma) -+ goto lock_mmap; -+ -+ if (unlikely(access_error(error_code, vma))) { -+ vma_end_read(vma); -+ goto lock_mmap; -+ } -+ fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); -+ vma_end_read(vma); -+ -+ if (!(fault & VM_FAULT_RETRY)) { -+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS); -+ goto done; -+ } -+ count_vm_vma_lock_event(VMA_LOCK_RETRY); -+ -+ /* Quick path to respond to signals */ -+ if (fault_signal_pending(fault, regs)) { -+ if (!user_mode(regs)) -+ kernelmode_fixup_or_oops(regs, error_code, address, -+ SIGBUS, BUS_ADRERR, -+ ARCH_DEFAULT_PKEY); -+ return; -+ } -+lock_mmap: -+#endif /* CONFIG_PER_VMA_LOCK */ -+ - /* - * Kernel-mode access to the user address space should only occur - * on well-defined single instructions listed in the exception -@@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, - } - - mmap_read_unlock(mm); -+#ifdef CONFIG_PER_VMA_LOCK -+done: -+#endif - if (likely(!(fault & VM_FAULT_ERROR))) - return; - -diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c -index 40f9e1a2ebdd..7cecde5026dd 100644 ---- a/fs/userfaultfd.c -+++ b/fs/userfaultfd.c -@@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) - return ctx->features & UFFD_FEATURE_INITIALIZED; - } - -+/* -+ * Whether WP_UNPOPULATED is enabled on the uffd context. It is only -+ * meaningful when userfaultfd_wp()==true on the vma and when it's -+ * anonymous. -+ */ -+bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) -+{ -+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; -+ -+ if (!ctx) -+ return false; -+ -+ return ctx->features & UFFD_FEATURE_WP_UNPOPULATED; -+} -+ - static void userfaultfd_set_vm_flags(struct vm_area_struct *vma, - vm_flags_t flags) - { -@@ -1973,6 +1988,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, - #endif - #ifndef CONFIG_PTE_MARKER_UFFD_WP - uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; -+ uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; - #endif - uffdio_api.ioctls = UFFD_API_IOCTLS; - ret = -EFAULT; -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 1f79667824eb..c4c9de7d1916 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -256,6 +256,8 @@ void setup_initial_init_mm(void *start_code, void *end_code, - struct vm_area_struct *vm_area_alloc(struct mm_struct *); - struct vm_area_struct *vm_area_dup(struct vm_area_struct *); - void vm_area_free(struct vm_area_struct *); -+/* Use only if VMA has no other users */ -+void __vm_area_free(struct vm_area_struct *vma); - - #ifndef CONFIG_MMU - extern struct rb_root nommu_region_tree; -@@ -478,7 +480,8 @@ static inline bool fault_flag_allow_retry_first(enum fault_flag flags) - { FAULT_FLAG_USER, "USER" }, \ - { FAULT_FLAG_REMOTE, "REMOTE" }, \ - { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ -- { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } -+ { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ -+ { FAULT_FLAG_VMA_LOCK, "VMA_LOCK" } - - /* - * vm_fault is filled by the pagefault handler and passed to the vma's -@@ -623,6 +626,117 @@ struct vm_operations_struct { - unsigned long addr); - }; - -+#ifdef CONFIG_PER_VMA_LOCK -+/* -+ * Try to read-lock a vma. The function is allowed to occasionally yield false -+ * locked result to avoid performance overhead, in which case we fall back to -+ * using mmap_lock. The function should never yield false unlocked result. -+ */ -+static inline bool vma_start_read(struct vm_area_struct *vma) -+{ -+ /* Check before locking. A race might cause false locked result. */ -+ if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) -+ return false; -+ -+ if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) -+ return false; -+ -+ /* -+ * Overflow might produce false locked result. -+ * False unlocked result is impossible because we modify and check -+ * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq -+ * modification invalidates all existing locks. -+ */ -+ if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) { -+ up_read(&vma->vm_lock->lock); -+ return false; -+ } -+ return true; -+} -+ -+static inline void vma_end_read(struct vm_area_struct *vma) -+{ -+ rcu_read_lock(); /* keeps vma alive till the end of up_read */ -+ up_read(&vma->vm_lock->lock); -+ rcu_read_unlock(); -+} -+ -+static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) -+{ -+ mmap_assert_write_locked(vma->vm_mm); -+ -+ /* -+ * current task is holding mmap_write_lock, both vma->vm_lock_seq and -+ * mm->mm_lock_seq can't be concurrently modified. -+ */ -+ *mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq); -+ return (vma->vm_lock_seq == *mm_lock_seq); -+} -+ -+static inline void vma_start_write(struct vm_area_struct *vma) -+{ -+ int mm_lock_seq; -+ -+ if (__is_vma_write_locked(vma, &mm_lock_seq)) -+ return; -+ -+ down_write(&vma->vm_lock->lock); -+ vma->vm_lock_seq = mm_lock_seq; -+ up_write(&vma->vm_lock->lock); -+} -+ -+static inline bool vma_try_start_write(struct vm_area_struct *vma) -+{ -+ int mm_lock_seq; -+ -+ if (__is_vma_write_locked(vma, &mm_lock_seq)) -+ return true; -+ -+ if (!down_write_trylock(&vma->vm_lock->lock)) -+ return false; -+ -+ vma->vm_lock_seq = mm_lock_seq; -+ up_write(&vma->vm_lock->lock); -+ return true; -+} -+ -+static inline void vma_assert_write_locked(struct vm_area_struct *vma) -+{ -+ int mm_lock_seq; -+ -+ VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); -+} -+ -+static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) -+{ -+ /* When detaching vma should be write-locked */ -+ if (detached) -+ vma_assert_write_locked(vma); -+ vma->detached = detached; -+} -+ -+struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, -+ unsigned long address); -+ -+#else /* CONFIG_PER_VMA_LOCK */ -+ -+static inline void vma_init_lock(struct vm_area_struct *vma) {} -+static inline bool vma_start_read(struct vm_area_struct *vma) -+ { return false; } -+static inline void vma_end_read(struct vm_area_struct *vma) {} -+static inline void vma_start_write(struct vm_area_struct *vma) {} -+static inline bool vma_try_start_write(struct vm_area_struct *vma) -+ { return true; } -+static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} -+static inline void vma_mark_detached(struct vm_area_struct *vma, -+ bool detached) {} -+ -+#endif /* CONFIG_PER_VMA_LOCK */ -+ -+/* -+ * WARNING: vma_init does not initialize vma->vm_lock. -+ * Use vm_area_alloc()/vm_area_free() if vma needs locking. -+ */ - static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) - { - static const struct vm_operations_struct dummy_vm_ops = {}; -@@ -631,6 +745,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) - vma->vm_mm = mm; - vma->vm_ops = &dummy_vm_ops; - INIT_LIST_HEAD(&vma->anon_vma_chain); -+ vma_mark_detached(vma, false); - } - - /* Use when VMA is not part of the VMA tree and needs no locking */ -@@ -644,28 +759,28 @@ static inline void vm_flags_init(struct vm_area_struct *vma, - static inline void vm_flags_reset(struct vm_area_struct *vma, - vm_flags_t flags) - { -- mmap_assert_write_locked(vma->vm_mm); -+ vma_start_write(vma); - vm_flags_init(vma, flags); - } - - static inline void vm_flags_reset_once(struct vm_area_struct *vma, - vm_flags_t flags) - { -- mmap_assert_write_locked(vma->vm_mm); -+ vma_start_write(vma); - WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); - } - - static inline void vm_flags_set(struct vm_area_struct *vma, - vm_flags_t flags) - { -- mmap_assert_write_locked(vma->vm_mm); -+ vma_start_write(vma); - ACCESS_PRIVATE(vma, __vm_flags) |= flags; - } - - static inline void vm_flags_clear(struct vm_area_struct *vma, - vm_flags_t flags) - { -- mmap_assert_write_locked(vma->vm_mm); -+ vma_start_write(vma); - ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; - } - -@@ -686,7 +801,7 @@ static inline void __vm_flags_mod(struct vm_area_struct *vma, - static inline void vm_flags_mod(struct vm_area_struct *vma, - vm_flags_t set, vm_flags_t clear) - { -- mmap_assert_write_locked(vma->vm_mm); -+ vma_start_write(vma); - __vm_flags_mod(vma, set, clear); - } - -diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h -index de1e622dd366..0e1d239a882c 100644 ---- a/include/linux/mm_inline.h -+++ b/include/linux/mm_inline.h -@@ -557,6 +557,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, - /* The current status of the pte should be "cleared" before calling */ - WARN_ON_ONCE(!pte_none(*pte)); - -+ /* -+ * NOTE: userfaultfd_wp_unpopulated() doesn't need this whole -+ * thing, because when zapping either it means it's dropping the -+ * page, or in TTU where the present pte will be quickly replaced -+ * with a swap pte. There's no way of leaking the bit. -+ */ - if (vma_is_anonymous(vma) || !userfaultfd_wp(vma)) - return; - -diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index 22b2ac82bffd..ef74ea892c5b 100644 ---- a/include/linux/mm_types.h -+++ b/include/linux/mm_types.h -@@ -471,6 +471,10 @@ struct anon_vma_name { - char name[]; - }; - -+struct vma_lock { -+ struct rw_semaphore lock; -+}; -+ - /* - * This struct describes a virtual memory area. There is one of these - * per VM-area/task. A VM area is any part of the process virtual memory -@@ -480,9 +484,16 @@ struct anon_vma_name { - struct vm_area_struct { - /* The first cache line has the info for VMA tree walking. */ - -- unsigned long vm_start; /* Our start address within vm_mm. */ -- unsigned long vm_end; /* The first byte after our end address -- within vm_mm. */ -+ union { -+ struct { -+ /* VMA covers [vm_start; vm_end) addresses within mm */ -+ unsigned long vm_start; -+ unsigned long vm_end; -+ }; -+#ifdef CONFIG_PER_VMA_LOCK -+ struct rcu_head vm_rcu; /* Used for deferred freeing. */ -+#endif -+ }; - - struct mm_struct *vm_mm; /* The address space we belong to. */ - -@@ -501,6 +512,14 @@ struct vm_area_struct { - vm_flags_t __private __vm_flags; - }; - -+#ifdef CONFIG_PER_VMA_LOCK -+ int vm_lock_seq; -+ struct vma_lock *vm_lock; -+ -+ /* Flag to indicate areas detached from the mm->mm_mt tree */ -+ bool detached; -+#endif -+ - /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. -@@ -637,6 +656,9 @@ struct mm_struct { - * init_mm.mmlist, and are protected - * by mmlist_lock - */ -+#ifdef CONFIG_PER_VMA_LOCK -+ int mm_lock_seq; -+#endif - - - unsigned long hiwater_rss; /* High-watermark of RSS usage */ -@@ -1042,6 +1064,7 @@ typedef struct { - * mapped after the fault. - * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. - * We should only access orig_pte if this flag set. -+ * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock. - * - * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify - * whether we would allow page faults to retry by specifying these two -@@ -1079,6 +1102,7 @@ enum fault_flag { - FAULT_FLAG_INTERRUPTIBLE = 1 << 9, - FAULT_FLAG_UNSHARE = 1 << 10, - FAULT_FLAG_ORIG_PTE_VALID = 1 << 11, -+ FAULT_FLAG_VMA_LOCK = 1 << 12, - }; - - typedef unsigned int __bitwise zap_flags_t; -diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h -index 96e113e23d04..aab8f1b28d26 100644 ---- a/include/linux/mmap_lock.h -+++ b/include/linux/mmap_lock.h -@@ -60,6 +60,29 @@ static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) - - #endif /* CONFIG_TRACING */ - -+static inline void mmap_assert_locked(struct mm_struct *mm) -+{ -+ lockdep_assert_held(&mm->mmap_lock); -+ VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); -+} -+ -+static inline void mmap_assert_write_locked(struct mm_struct *mm) -+{ -+ lockdep_assert_held_write(&mm->mmap_lock); -+ VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); -+} -+ -+#ifdef CONFIG_PER_VMA_LOCK -+static inline void vma_end_write_all(struct mm_struct *mm) -+{ -+ mmap_assert_write_locked(mm); -+ /* No races during update due to exclusive mmap_lock being held */ -+ WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1); -+} -+#else -+static inline void vma_end_write_all(struct mm_struct *mm) {} -+#endif -+ - static inline void mmap_init_lock(struct mm_struct *mm) - { - init_rwsem(&mm->mmap_lock); -@@ -102,12 +125,14 @@ static inline bool mmap_write_trylock(struct mm_struct *mm) - static inline void mmap_write_unlock(struct mm_struct *mm) - { - __mmap_lock_trace_released(mm, true); -+ vma_end_write_all(mm); - up_write(&mm->mmap_lock); - } - - static inline void mmap_write_downgrade(struct mm_struct *mm) - { - __mmap_lock_trace_acquire_returned(mm, false, true); -+ vma_end_write_all(mm); - downgrade_write(&mm->mmap_lock); - } - -@@ -150,18 +175,6 @@ static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) - up_read_non_owner(&mm->mmap_lock); - } - --static inline void mmap_assert_locked(struct mm_struct *mm) --{ -- lockdep_assert_held(&mm->mmap_lock); -- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); --} -- --static inline void mmap_assert_write_locked(struct mm_struct *mm) --{ -- lockdep_assert_held_write(&mm->mmap_lock); -- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); --} -- - static inline int mmap_lock_is_contended(struct mm_struct *mm) - { - return rwsem_is_contended(&mm->mmap_lock); -diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h -index 3767f18114ef..0cf8880219da 100644 ---- a/include/linux/userfaultfd_k.h -+++ b/include/linux/userfaultfd_k.h -@@ -179,6 +179,7 @@ extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, - unsigned long end, struct list_head *uf); - extern void userfaultfd_unmap_complete(struct mm_struct *mm, - struct list_head *uf); -+extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); - - #else /* CONFIG_USERFAULTFD */ - -@@ -274,8 +275,30 @@ static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) - return false; - } - -+static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) -+{ -+ return false; -+} -+ - #endif /* CONFIG_USERFAULTFD */ - -+static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) -+{ -+ /* Only wr-protect mode uses pte markers */ -+ if (!userfaultfd_wp(vma)) -+ return false; -+ -+ /* File-based uffd-wp always need markers */ -+ if (!vma_is_anonymous(vma)) -+ return true; -+ -+ /* -+ * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED -+ * enabled (to apply markers on zero pages). -+ */ -+ return userfaultfd_wp_unpopulated(vma); -+} -+ - static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) - { - #ifdef CONFIG_PTE_MARKER_UFFD_WP -diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h -index 7f5d1caf5890..8abfa1240040 100644 ---- a/include/linux/vm_event_item.h -+++ b/include/linux/vm_event_item.h -@@ -149,6 +149,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, - #ifdef CONFIG_X86 - DIRECT_MAP_LEVEL2_SPLIT, - DIRECT_MAP_LEVEL3_SPLIT, -+#endif -+#ifdef CONFIG_PER_VMA_LOCK_STATS -+ VMA_LOCK_SUCCESS, -+ VMA_LOCK_ABORT, -+ VMA_LOCK_RETRY, -+ VMA_LOCK_MISS, - #endif - NR_VM_EVENT_ITEMS - }; -diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h -index 19cf5b6892ce..fed855bae6d8 100644 ---- a/include/linux/vmstat.h -+++ b/include/linux/vmstat.h -@@ -125,6 +125,12 @@ static inline void vm_events_fold_cpu(int cpu) - #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) - #endif - -+#ifdef CONFIG_PER_VMA_LOCK_STATS -+#define count_vm_vma_lock_event(x) count_vm_event(x) -+#else -+#define count_vm_vma_lock_event(x) do {} while (0) -+#endif -+ - #define __count_zid_vm_events(item, zid, delta) \ - __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) - -diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h -index 005e5e306266..90c958952bfc 100644 ---- a/include/uapi/linux/userfaultfd.h -+++ b/include/uapi/linux/userfaultfd.h -@@ -38,7 +38,8 @@ - UFFD_FEATURE_MINOR_HUGETLBFS | \ - UFFD_FEATURE_MINOR_SHMEM | \ - UFFD_FEATURE_EXACT_ADDRESS | \ -- UFFD_FEATURE_WP_HUGETLBFS_SHMEM) -+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ -+ UFFD_FEATURE_WP_UNPOPULATED) - #define UFFD_API_IOCTLS \ - ((__u64)1 << _UFFDIO_REGISTER | \ - (__u64)1 << _UFFDIO_UNREGISTER | \ -@@ -203,6 +204,12 @@ struct uffdio_api { - * - * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd - * write-protection mode is supported on both shmem and hugetlbfs. -+ * -+ * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd -+ * write-protection mode will always apply to unpopulated pages -+ * (i.e. empty ptes). This will be the default behavior for shmem -+ * & hugetlbfs, so this flag only affects anonymous memory behavior -+ * when userfault write-protection mode is registered. - */ - #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) - #define UFFD_FEATURE_EVENT_FORK (1<<1) -@@ -217,6 +224,7 @@ struct uffdio_api { - #define UFFD_FEATURE_MINOR_SHMEM (1<<10) - #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) - #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) -+#define UFFD_FEATURE_WP_UNPOPULATED (1<<13) - __u64 features; - - __u64 ioctls; -diff --git a/kernel/fork.c b/kernel/fork.c -index 349945168239..ebd353730887 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -455,13 +455,49 @@ static struct kmem_cache *vm_area_cachep; - /* SLAB cache for mm_struct structures (tsk->mm) */ - static struct kmem_cache *mm_cachep; - -+#ifdef CONFIG_PER_VMA_LOCK -+ -+/* SLAB cache for vm_area_struct.lock */ -+static struct kmem_cache *vma_lock_cachep; -+ -+static bool vma_lock_alloc(struct vm_area_struct *vma) -+{ -+ vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL); -+ if (!vma->vm_lock) -+ return false; -+ -+ init_rwsem(&vma->vm_lock->lock); -+ vma->vm_lock_seq = -1; -+ -+ return true; -+} -+ -+static inline void vma_lock_free(struct vm_area_struct *vma) -+{ -+ kmem_cache_free(vma_lock_cachep, vma->vm_lock); -+} -+ -+#else /* CONFIG_PER_VMA_LOCK */ -+ -+static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; } -+static inline void vma_lock_free(struct vm_area_struct *vma) {} -+ -+#endif /* CONFIG_PER_VMA_LOCK */ -+ - struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) - { - struct vm_area_struct *vma; - - vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); -- if (vma) -- vma_init(vma, mm); -+ if (!vma) -+ return NULL; -+ -+ vma_init(vma, mm); -+ if (!vma_lock_alloc(vma)) { -+ kmem_cache_free(vm_area_cachep, vma); -+ return NULL; -+ } -+ - return vma; - } - -@@ -469,26 +505,54 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) - { - struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - -- if (new) { -- ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); -- ASSERT_EXCLUSIVE_WRITER(orig->vm_file); -- /* -- * orig->shared.rb may be modified concurrently, but the clone -- * will be reinitialized. -- */ -- data_race(memcpy(new, orig, sizeof(*new))); -- INIT_LIST_HEAD(&new->anon_vma_chain); -- dup_anon_vma_name(orig, new); -+ if (!new) -+ return NULL; -+ -+ ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); -+ ASSERT_EXCLUSIVE_WRITER(orig->vm_file); -+ /* -+ * orig->shared.rb may be modified concurrently, but the clone -+ * will be reinitialized. -+ */ -+ data_race(memcpy(new, orig, sizeof(*new))); -+ if (!vma_lock_alloc(new)) { -+ kmem_cache_free(vm_area_cachep, new); -+ return NULL; - } -+ INIT_LIST_HEAD(&new->anon_vma_chain); -+ dup_anon_vma_name(orig, new); -+ - return new; - } - --void vm_area_free(struct vm_area_struct *vma) -+void __vm_area_free(struct vm_area_struct *vma) - { - free_anon_vma_name(vma); -+ vma_lock_free(vma); - kmem_cache_free(vm_area_cachep, vma); - } - -+#ifdef CONFIG_PER_VMA_LOCK -+static void vm_area_free_rcu_cb(struct rcu_head *head) -+{ -+ struct vm_area_struct *vma = container_of(head, struct vm_area_struct, -+ vm_rcu); -+ -+ /* The vma should not be locked while being destroyed. */ -+ VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma); -+ __vm_area_free(vma); -+} -+#endif -+ -+void vm_area_free(struct vm_area_struct *vma) -+{ -+#ifdef CONFIG_PER_VMA_LOCK -+ call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb); -+#else -+ __vm_area_free(vma); -+#endif -+} -+ - static void account_kernel_stack(struct task_struct *tsk, int account) - { - if (IS_ENABLED(CONFIG_VMAP_STACK)) { -@@ -1132,6 +1196,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, - seqcount_init(&mm->write_protect_seq); - mmap_init_lock(mm); - INIT_LIST_HEAD(&mm->mmlist); -+#ifdef CONFIG_PER_VMA_LOCK -+ mm->mm_lock_seq = 0; -+#endif - mm_pgtables_bytes_init(mm); - mm->map_count = 0; - mm->locked_vm = 0; -@@ -3074,6 +3141,9 @@ void __init proc_caches_init(void) - NULL); - - vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); -+#ifdef CONFIG_PER_VMA_LOCK -+ vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT); -+#endif - mmap_init(); - nsproxy_cache_init(); - } -diff --git a/mm/Kconfig b/mm/Kconfig -index cf2e47030fe8..459af2123189 100644 ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -1202,6 +1202,18 @@ config LRU_GEN_STATS - This option has a per-memcg and per-node memory overhead. - # } - -+config ARCH_SUPPORTS_PER_VMA_LOCK -+ def_bool n -+ -+config PER_VMA_LOCK -+ def_bool y -+ depends on ARCH_SUPPORTS_PER_VMA_LOCK && MMU && SMP -+ help -+ Allow per-vma locking during page fault handling. -+ -+ This feature allows locking each virtual memory area separately when -+ handling page faults instead of taking mmap_lock. -+ - source "mm/damon/Kconfig" - - endmenu -diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug -index c3547a373c9c..4965a7333a3f 100644 ---- a/mm/Kconfig.debug -+++ b/mm/Kconfig.debug -@@ -279,3 +279,9 @@ config DEBUG_KMEMLEAK_AUTO_SCAN - - If unsure, say Y. - -+config PER_VMA_LOCK_STATS -+ bool "Statistics for per-vma locks" -+ depends on PER_VMA_LOCK -+ default y -+ help -+ Statistics for per-vma locks. -diff --git a/mm/filemap.c b/mm/filemap.c -index 2723104cc06a..7d898f26755b 100644 ---- a/mm/filemap.c -+++ b/mm/filemap.c -@@ -1706,6 +1706,8 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) - * mmap_lock has been released (mmap_read_unlock(), unless flags had both - * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in - * which case mmap_lock is still held. -+ * If flags had FAULT_FLAG_VMA_LOCK set, meaning the operation is performed -+ * with VMA lock only, the VMA lock is still held. - * - * If neither ALLOW_RETRY nor KILLABLE are set, will always return true - * with the folio locked and the mmap_lock unperturbed. -@@ -1713,6 +1715,10 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) - bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, - unsigned int flags) - { -+ /* Can't do this if not holding mmap_lock */ -+ if (flags & FAULT_FLAG_VMA_LOCK) -+ return false; -+ - if (fault_flag_allow_retry_first(flags)) { - /* - * CAUTION! In this case, mmap_lock is not released -diff --git a/mm/hugetlb.c b/mm/hugetlb.c -index 245038a9fe4e..4d860b53a14a 100644 ---- a/mm/hugetlb.c -+++ b/mm/hugetlb.c -@@ -6004,6 +6004,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, - int need_wait_lock = 0; - unsigned long haddr = address & huge_page_mask(h); - -+ /* TODO: Handle faults under the VMA lock */ -+ if (flags & FAULT_FLAG_VMA_LOCK) -+ return VM_FAULT_RETRY; -+ - /* - * Serialize hugepage allocation and instantiation, so that we don't - * get spurious allocation failures if two CPUs race to instantiate -diff --git a/mm/init-mm.c b/mm/init-mm.c -index c9327abb771c..33269314e060 100644 ---- a/mm/init-mm.c -+++ b/mm/init-mm.c -@@ -37,6 +37,9 @@ struct mm_struct init_mm = { - .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), - .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), - .mmlist = LIST_HEAD_INIT(init_mm.mmlist), -+#ifdef CONFIG_PER_VMA_LOCK -+ .mm_lock_seq = 0, -+#endif - .user_ns = &init_user_ns, - .cpu_bitmap = CPU_BITS_NONE, - #ifdef CONFIG_IOMMU_SVA -diff --git a/mm/internal.h b/mm/internal.h -index 7920a8b7982e..0c455d6e4e3e 100644 ---- a/mm/internal.h -+++ b/mm/internal.h -@@ -105,7 +105,7 @@ void folio_activate(struct folio *folio); - - void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, - struct vm_area_struct *start_vma, unsigned long floor, -- unsigned long ceiling); -+ unsigned long ceiling, bool mm_wr_locked); - void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); - - struct zap_details; -diff --git a/mm/khugepaged.c b/mm/khugepaged.c -index 0ec69b96b497..624acc3f116a 100644 ---- a/mm/khugepaged.c -+++ b/mm/khugepaged.c -@@ -1053,6 +1053,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, - if (result != SCAN_SUCCEED) - goto out_up_write; - -+ vma_start_write(vma); - anon_vma_lock_write(vma->anon_vma); - - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address, -@@ -1176,7 +1177,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, - * enabled swap entries. Please see - * comment below for pte_uffd_wp(). - */ -- if (pte_swp_uffd_wp(pteval)) { -+ if (pte_swp_uffd_wp_any(pteval)) { - result = SCAN_PTE_UFFD_WP; - goto out_unmap; - } -@@ -1516,6 +1517,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, - goto drop_hpage; - } - -+ /* Lock the vma before taking i_mmap and page table locks */ -+ vma_start_write(vma); -+ - /* - * We need to lock the mapping so that from here on, only GUP-fast and - * hardware page walks can access the parts of the page tables that -@@ -1693,6 +1697,10 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, - result = SCAN_PTE_MAPPED_HUGEPAGE; - if ((cc->is_khugepaged || is_target) && - mmap_write_trylock(mm)) { -+ /* trylock for the same lock inversion as above */ -+ if (!vma_try_start_write(vma)) -+ goto unlock_next; -+ - /* - * Re-check whether we have an ->anon_vma, because - * collapse_and_free_pmd() requires that either no -diff --git a/mm/memory.c b/mm/memory.c -index 01a23ad48a04..9deb0d0f3f7f 100644 ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -104,6 +104,20 @@ EXPORT_SYMBOL(mem_map); - #endif - - static vm_fault_t do_fault(struct vm_fault *vmf); -+static vm_fault_t do_anonymous_page(struct vm_fault *vmf); -+static bool vmf_pte_changed(struct vm_fault *vmf); -+ -+/* -+ * Return true if the original pte was a uffd-wp pte marker (so the pte was -+ * wr-protected). -+ */ -+static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) -+{ -+ if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) -+ return false; -+ -+ return pte_marker_uffd_wp(vmf->orig_pte); -+} - - /* - * A number of key systems in x86 including ioremap() rely on the assumption -@@ -348,7 +362,7 @@ void free_pgd_range(struct mmu_gather *tlb, - - void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, - struct vm_area_struct *vma, unsigned long floor, -- unsigned long ceiling) -+ unsigned long ceiling, bool mm_wr_locked) - { - MA_STATE(mas, mt, vma->vm_end, vma->vm_end); - -@@ -366,6 +380,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, - * Hide vma from rmap and truncate_pagecache before freeing - * pgtables - */ -+ if (mm_wr_locked) -+ vma_start_write(vma); - unlink_anon_vmas(vma); - unlink_file_vma(vma); - -@@ -380,6 +396,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, - && !is_vm_hugetlb_page(next)) { - vma = next; - next = mas_find(&mas, ceiling - 1); -+ if (mm_wr_locked) -+ vma_start_write(vma); - unlink_anon_vmas(vma); - unlink_file_vma(vma); - } -@@ -1345,6 +1363,10 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte, - struct zap_details *details, pte_t pteval) - { -+ /* Zap on anonymous always means dropping everything */ -+ if (vma_is_anonymous(vma)) -+ return; -+ - if (zap_drop_file_uffd_wp(details)) - return; - -@@ -1451,8 +1473,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, - continue; - rss[mm_counter(page)]--; - } else if (pte_marker_entry_uffd_wp(entry)) { -- /* Only drop the uffd-wp marker if explicitly requested */ -- if (!zap_drop_file_uffd_wp(details)) -+ /* -+ * For anon: always drop the marker; for file: only -+ * drop the marker if explicitly requested. -+ */ -+ if (!vma_is_anonymous(vma) && -+ !zap_drop_file_uffd_wp(details)) - continue; - } else if (is_hwpoison_entry(entry) || - is_swapin_error_entry(entry)) { -@@ -3322,6 +3348,9 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) - struct vm_area_struct *vma = vmf->vma; - struct folio *folio = NULL; - -+ if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) -+ return VM_FAULT_RETRY; -+ - if (likely(!unshare)) { - if (userfaultfd_pte_wp(vma, *vmf->pte)) { - pte_unmap_unlock(vmf->pte, vmf->ptl); -@@ -3633,6 +3662,14 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf) - return 0; - } - -+static vm_fault_t do_pte_missing(struct vm_fault *vmf) -+{ -+ if (vma_is_anonymous(vmf->vma)) -+ return do_anonymous_page(vmf); -+ else -+ return do_fault(vmf); -+} -+ - /* - * This is actually a page-missing access, but with uffd-wp special pte - * installed. It means this pte was wr-protected before being unmapped. -@@ -3643,11 +3680,10 @@ static vm_fault_t pte_marker_handle_uffd_wp(struct vm_fault *vmf) - * Just in case there're leftover special ptes even after the region - * got unregistered - we can simply clear them. - */ -- if (unlikely(!userfaultfd_wp(vmf->vma) || vma_is_anonymous(vmf->vma))) -+ if (unlikely(!userfaultfd_wp(vmf->vma))) - return pte_marker_clear(vmf); - -- /* do_fault() can handle pte markers too like none pte */ -- return do_fault(vmf); -+ return do_pte_missing(vmf); - } - - static vm_fault_t handle_pte_marker(struct vm_fault *vmf) -@@ -4012,6 +4048,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) - */ - static vm_fault_t do_anonymous_page(struct vm_fault *vmf) - { -+ bool uffd_wp = vmf_orig_pte_uffd_wp(vmf); - struct vm_area_struct *vma = vmf->vma; - struct folio *folio; - vm_fault_t ret = 0; -@@ -4045,7 +4082,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) - vma->vm_page_prot)); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); -- if (!pte_none(*vmf->pte)) { -+ if (vmf_pte_changed(vmf)) { - update_mmu_tlb(vma, vmf->address, vmf->pte); - goto unlock; - } -@@ -4085,7 +4122,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) - - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); -- if (!pte_none(*vmf->pte)) { -+ if (vmf_pte_changed(vmf)) { - update_mmu_tlb(vma, vmf->address, vmf->pte); - goto release; - } -@@ -4105,6 +4142,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) - folio_add_new_anon_rmap(folio, vma, vmf->address); - folio_add_lru_vma(folio, vma); - setpte: -+ if (uffd_wp) -+ entry = pte_mkuffd_wp(entry); - set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); - - /* No need to invalidate - it was non-present before */ -@@ -4272,7 +4311,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) - void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) - { - struct vm_area_struct *vma = vmf->vma; -- bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte); -+ bool uffd_wp = vmf_orig_pte_uffd_wp(vmf); - bool write = vmf->flags & FAULT_FLAG_WRITE; - bool prefault = vmf->address != addr; - pte_t entry; -@@ -4503,6 +4542,8 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) - return ret; - } - -+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) -+ return VM_FAULT_RETRY; - ret = __do_fault(vmf); - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) - return ret; -@@ -4519,6 +4560,9 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) - struct vm_area_struct *vma = vmf->vma; - vm_fault_t ret; - -+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) -+ return VM_FAULT_RETRY; -+ - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; - -@@ -4558,6 +4602,9 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) - struct vm_area_struct *vma = vmf->vma; - vm_fault_t ret, tmp; - -+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) -+ return VM_FAULT_RETRY; -+ - ret = __do_fault(vmf); - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) - return ret; -@@ -4916,12 +4963,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) - } - } - -- if (!vmf->pte) { -- if (vma_is_anonymous(vmf->vma)) -- return do_anonymous_page(vmf); -- else -- return do_fault(vmf); -- } -+ if (!vmf->pte) -+ return do_pte_missing(vmf); - - if (!pte_present(vmf->orig_pte)) - return do_swap_page(vmf); -@@ -4929,6 +4972,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) - if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) - return do_numa_page(vmf); - -+ if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) -+ return VM_FAULT_RETRY; -+ - vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); - spin_lock(vmf->ptl); - entry = vmf->orig_pte; -@@ -4965,10 +5011,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) - } - - /* -- * By the time we get here, we already hold the mm semaphore -- * -- * The mmap_lock may have been released depending on flags and our -- * return value. See filemap_fault() and __folio_lock_or_retry(). -+ * On entry, we hold either the VMA lock or the mmap_lock -+ * (FAULT_FLAG_VMA_LOCK tells you which). If VM_FAULT_RETRY is set in -+ * the result, the mmap_lock is not held on exit. See filemap_fault() -+ * and __folio_lock_or_retry(). - */ - static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) -@@ -5080,24 +5126,31 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, - * updates. However, note that the handling of PERF_COUNT_SW_PAGE_FAULTS should - * still be in per-arch page fault handlers at the entry of page fault. - */ --static inline void mm_account_fault(struct pt_regs *regs, -+static inline void mm_account_fault(struct mm_struct *mm, struct pt_regs *regs, - unsigned long address, unsigned int flags, - vm_fault_t ret) - { - bool major; - -+ /* Incomplete faults will be accounted upon completion. */ -+ if (ret & VM_FAULT_RETRY) -+ return; -+ - /* -- * We don't do accounting for some specific faults: -- * -- * - Unsuccessful faults (e.g. when the address wasn't valid). That -- * includes arch_vma_access_permitted() failing before reaching here. -- * So this is not a "this many hardware page faults" counter. We -- * should use the hw profiling for that. -- * -- * - Incomplete faults (VM_FAULT_RETRY). They will only be counted -- * once they're completed. -+ * To preserve the behavior of older kernels, PGFAULT counters record -+ * both successful and failed faults, as opposed to perf counters, -+ * which ignore failed cases. -+ */ -+ count_vm_event(PGFAULT); -+ count_memcg_event_mm(mm, PGFAULT); -+ -+ /* -+ * Do not account for unsuccessful faults (e.g. when the address wasn't -+ * valid). That includes arch_vma_access_permitted() failing before -+ * reaching here. So this is not a "this many hardware page faults" -+ * counter. We should use the hw profiling for that. - */ -- if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) -+ if (ret & VM_FAULT_ERROR) - return; - - /* -@@ -5180,21 +5233,22 @@ static vm_fault_t sanitize_fault_flags(struct vm_area_struct *vma, - vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags, struct pt_regs *regs) - { -+ /* If the fault handler drops the mmap_lock, vma may be freed */ -+ struct mm_struct *mm = vma->vm_mm; - vm_fault_t ret; - - __set_current_state(TASK_RUNNING); - -- count_vm_event(PGFAULT); -- count_memcg_event_mm(vma->vm_mm, PGFAULT); -- - ret = sanitize_fault_flags(vma, &flags); - if (ret) -- return ret; -+ goto out; - - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, - flags & FAULT_FLAG_INSTRUCTION, -- flags & FAULT_FLAG_REMOTE)) -- return VM_FAULT_SIGSEGV; -+ flags & FAULT_FLAG_REMOTE)) { -+ ret = VM_FAULT_SIGSEGV; -+ goto out; -+ } - - /* - * Enable the memcg OOM handling for faults triggered in user -@@ -5223,13 +5277,70 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) - mem_cgroup_oom_synchronize(false); - } -- -- mm_account_fault(regs, address, flags, ret); -+out: -+ mm_account_fault(mm, regs, address, flags, ret); - - return ret; - } - EXPORT_SYMBOL_GPL(handle_mm_fault); - -+#ifdef CONFIG_PER_VMA_LOCK -+/* -+ * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be -+ * stable and not isolated. If the VMA is not found or is being modified the -+ * function returns NULL. -+ */ -+struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, -+ unsigned long address) -+{ -+ MA_STATE(mas, &mm->mm_mt, address, address); -+ struct vm_area_struct *vma; -+ -+ rcu_read_lock(); -+retry: -+ vma = mas_walk(&mas); -+ if (!vma) -+ goto inval; -+ -+ /* find_mergeable_anon_vma uses adjacent vmas which are not locked */ -+ if (vma_is_anonymous(vma) && !vma->anon_vma) -+ goto inval; -+ -+ if (!vma_start_read(vma)) -+ goto inval; -+ -+ /* -+ * Due to the possibility of userfault handler dropping mmap_lock, avoid -+ * it for now and fall back to page fault handling under mmap_lock. -+ */ -+ if (userfaultfd_armed(vma)) { -+ vma_end_read(vma); -+ goto inval; -+ } -+ -+ /* Check since vm_start/vm_end might change before we lock the VMA */ -+ if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { -+ vma_end_read(vma); -+ goto inval; -+ } -+ -+ /* Check if the VMA got isolated after we found it */ -+ if (vma->detached) { -+ vma_end_read(vma); -+ count_vm_vma_lock_event(VMA_LOCK_MISS); -+ /* The area was replaced with another one */ -+ goto retry; -+ } -+ -+ rcu_read_unlock(); -+ return vma; -+inval: -+ rcu_read_unlock(); -+ count_vm_vma_lock_event(VMA_LOCK_ABORT); -+ return NULL; -+} -+#endif /* CONFIG_PER_VMA_LOCK */ -+ - #ifndef __PAGETABLE_P4D_FOLDED - /* - * Allocate p4d page table. -diff --git a/mm/mmap.c b/mm/mmap.c -index d5475fbf5729..cbac45aa39ae 100644 ---- a/mm/mmap.c -+++ b/mm/mmap.c -@@ -133,7 +133,7 @@ void unlink_file_vma(struct vm_area_struct *vma) - /* - * Close a vm structure and free it. - */ --static void remove_vma(struct vm_area_struct *vma) -+static void remove_vma(struct vm_area_struct *vma, bool unreachable) - { - might_sleep(); - if (vma->vm_ops && vma->vm_ops->close) -@@ -141,7 +141,10 @@ static void remove_vma(struct vm_area_struct *vma) - if (vma->vm_file) - fput(vma->vm_file); - mpol_put(vma_policy(vma)); -- vm_area_free(vma); -+ if (unreachable) -+ __vm_area_free(vma); -+ else -+ vm_area_free(vma); - } - - static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi, -@@ -502,6 +505,15 @@ static inline void init_vma_prep(struct vma_prepare *vp, - */ - static inline void vma_prepare(struct vma_prepare *vp) - { -+ vma_start_write(vp->vma); -+ if (vp->adj_next) -+ vma_start_write(vp->adj_next); -+ /* vp->insert is always a newly created VMA, no need for locking */ -+ if (vp->remove) -+ vma_start_write(vp->remove); -+ if (vp->remove2) -+ vma_start_write(vp->remove2); -+ - if (vp->file) { - uprobe_munmap(vp->vma, vp->vma->vm_start, vp->vma->vm_end); - -@@ -590,6 +602,7 @@ static inline void vma_complete(struct vma_prepare *vp, - - if (vp->remove) { - again: -+ vma_mark_detached(vp->remove, true); - if (vp->file) { - uprobe_munmap(vp->remove, vp->remove->vm_start, - vp->remove->vm_end); -@@ -683,12 +696,12 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, - if (vma_iter_prealloc(vmi)) - goto nomem; - -+ vma_prepare(&vp); - vma_adjust_trans_huge(vma, start, end, 0); - /* VMA iterator points to previous, so set to start if necessary */ - if (vma_iter_addr(vmi) != start) - vma_iter_set(vmi, start); - -- vma_prepare(&vp); - vma->vm_start = start; - vma->vm_end = end; - vma->vm_pgoff = pgoff; -@@ -723,8 +736,8 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, - return -ENOMEM; - - init_vma_prep(&vp, vma); -- vma_adjust_trans_huge(vma, start, end, 0); - vma_prepare(&vp); -+ vma_adjust_trans_huge(vma, start, end, 0); - - if (vma->vm_start < start) - vma_iter_clear(vmi, vma->vm_start, start); -@@ -994,12 +1007,12 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, - if (vma_iter_prealloc(vmi)) - return NULL; - -- vma_adjust_trans_huge(vma, vma_start, vma_end, adj_next); - init_multi_vma_prep(&vp, vma, adjust, remove, remove2); - VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && - vp.anon_vma != adjust->anon_vma); - - vma_prepare(&vp); -+ vma_adjust_trans_huge(vma, vma_start, vma_end, adj_next); - if (vma_start < vma->vm_start || vma_end > vma->vm_end) - vma_expanded = true; - -@@ -2157,7 +2170,7 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) - if (vma->vm_flags & VM_ACCOUNT) - nr_accounted += nrpages; - vm_stat_account(mm, vma->vm_flags, -nrpages); -- remove_vma(vma); -+ remove_vma(vma, false); - } - vm_unacct_memory(nr_accounted); - validate_mm(mm); -@@ -2180,7 +2193,8 @@ static void unmap_region(struct mm_struct *mm, struct maple_tree *mt, - update_hiwater_rss(mm); - unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked); - free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, -- next ? next->vm_start : USER_PGTABLES_CEILING); -+ next ? next->vm_start : USER_PGTABLES_CEILING, -+ mm_wr_locked); - tlb_finish_mmu(&tlb); - } - -@@ -2236,10 +2250,10 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, - if (new->vm_ops && new->vm_ops->open) - new->vm_ops->open(new); - -- vma_adjust_trans_huge(vma, vma->vm_start, addr, 0); - init_vma_prep(&vp, vma); - vp.insert = new; - vma_prepare(&vp); -+ vma_adjust_trans_huge(vma, vma->vm_start, addr, 0); - - if (new_below) { - vma->vm_start = addr; -@@ -2283,10 +2297,12 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, - static inline int munmap_sidetree(struct vm_area_struct *vma, - struct ma_state *mas_detach) - { -+ vma_start_write(vma); - mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1); - if (mas_store_gfp(mas_detach, vma, GFP_KERNEL)) - return -ENOMEM; - -+ vma_mark_detached(vma, true); - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm -= vma_pages(vma); - -@@ -2942,9 +2958,9 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, - if (vma_iter_prealloc(vmi)) - goto unacct_fail; - -- vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0); - init_vma_prep(&vp, vma); - vma_prepare(&vp); -+ vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0); - vma->vm_end = addr + len; - vm_flags_set(vma, VM_SOFTDIRTY); - vma_iter_store(vmi, vma); -@@ -3077,7 +3093,7 @@ void exit_mmap(struct mm_struct *mm) - mmap_write_lock(mm); - mt_clear_in_rcu(&mm->mm_mt); - free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, -- USER_PGTABLES_CEILING); -+ USER_PGTABLES_CEILING, true); - tlb_finish_mmu(&tlb); - - /* -@@ -3088,7 +3104,7 @@ void exit_mmap(struct mm_struct *mm) - do { - if (vma->vm_flags & VM_ACCOUNT) - nr_accounted += vma_pages(vma); -- remove_vma(vma); -+ remove_vma(vma, true); - count++; - cond_resched(); - } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL); -@@ -3211,6 +3227,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, - get_file(new_vma->vm_file); - if (new_vma->vm_ops && new_vma->vm_ops->open) - new_vma->vm_ops->open(new_vma); -+ vma_start_write(new_vma); - if (vma_link(mm, new_vma)) - goto out_vma_link; - *need_rmap_locks = false; -@@ -3505,6 +3522,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) - * of mm/rmap.c: - * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for - * hugetlb mapping); -+ * - all vmas marked locked - * - all i_mmap_rwsem locks; - * - all anon_vma->rwseml - * -@@ -3527,6 +3545,13 @@ int mm_take_all_locks(struct mm_struct *mm) - - mutex_lock(&mm_all_locks_mutex); - -+ mas_for_each(&mas, vma, ULONG_MAX) { -+ if (signal_pending(current)) -+ goto out_unlock; -+ vma_start_write(vma); -+ } -+ -+ mas_set(&mas, 0); - mas_for_each(&mas, vma, ULONG_MAX) { - if (signal_pending(current)) - goto out_unlock; -@@ -3616,6 +3641,7 @@ void mm_drop_all_locks(struct mm_struct *mm) - if (vma->vm_file && vma->vm_file->f_mapping) - vm_unlock_mapping(vma->vm_file->f_mapping); - } -+ vma_end_write_all(mm); - - mutex_unlock(&mm_all_locks_mutex); - } -diff --git a/mm/mprotect.c b/mm/mprotect.c -index 36351a00c0e8..204194155863 100644 ---- a/mm/mprotect.c -+++ b/mm/mprotect.c -@@ -276,7 +276,15 @@ static long change_pte_range(struct mmu_gather *tlb, - } else { - /* It must be an none page, or what else?.. */ - WARN_ON_ONCE(!pte_none(oldpte)); -- if (unlikely(uffd_wp && !vma_is_anonymous(vma))) { -+ -+ /* -+ * Nobody plays with any none ptes besides -+ * userfaultfd when applying the protections. -+ */ -+ if (likely(!uffd_wp)) -+ continue; -+ -+ if (userfaultfd_wp_use_markers(vma)) { - /* - * For file-backed mem, we need to be able to - * wr-protect a none pte, because even if the -@@ -320,23 +328,46 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) - return 0; - } - --/* Return true if we're uffd wr-protecting file-backed memory, or false */ -+/* -+ * Return true if we want to split THPs into PTE mappings in change -+ * protection procedure, false otherwise. -+ */ - static inline bool --uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) -+pgtable_split_needed(struct vm_area_struct *vma, unsigned long cp_flags) - { -+ /* -+ * pte markers only resides in pte level, if we need pte markers, -+ * we need to split. We cannot wr-protect shmem thp because file -+ * thp is handled differently when split by erasing the pmd so far. -+ */ - return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma); - } - - /* -- * If wr-protecting the range for file-backed, populate pgtable for the case -- * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc() -- * failed we treat it the same way as pgtable allocation failures during -- * page faults by kicking OOM and returning error. -+ * Return true if we want to populate pgtables in change protection -+ * procedure, false otherwise -+ */ -+static inline bool -+pgtable_populate_needed(struct vm_area_struct *vma, unsigned long cp_flags) -+{ -+ /* If not within ioctl(UFFDIO_WRITEPROTECT), then don't bother */ -+ if (!(cp_flags & MM_CP_UFFD_WP)) -+ return false; -+ -+ /* Populate if the userfaultfd mode requires pte markers */ -+ return userfaultfd_wp_use_markers(vma); -+} -+ -+/* -+ * Populate the pgtable underneath for whatever reason if requested. -+ * When {pte|pmd|...}_alloc() failed we treat it the same way as pgtable -+ * allocation failures during page faults by kicking OOM and returning -+ * error. - */ - #define change_pmd_prepare(vma, pmd, cp_flags) \ - ({ \ - long err = 0; \ -- if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ -+ if (unlikely(pgtable_populate_needed(vma, cp_flags))) { \ - if (pte_alloc(vma->vm_mm, pmd)) \ - err = -ENOMEM; \ - } \ -@@ -351,7 +382,7 @@ uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) - #define change_prepare(vma, high, low, addr, cp_flags) \ - ({ \ - long err = 0; \ -- if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ -+ if (unlikely(pgtable_populate_needed(vma, cp_flags))) { \ - low##_t *p = low##_alloc(vma->vm_mm, high, addr); \ - if (p == NULL) \ - err = -ENOMEM; \ -@@ -404,7 +435,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb, - - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { - if ((next - addr != HPAGE_PMD_SIZE) || -- uffd_wp_protect_file(vma, cp_flags)) { -+ pgtable_split_needed(vma, cp_flags)) { - __split_huge_pmd(vma, pmd, addr, false, NULL); - /* - * For file-backed, the pmd could have been -diff --git a/mm/mremap.c b/mm/mremap.c -index 411a85682b58..dd541e59edda 100644 ---- a/mm/mremap.c -+++ b/mm/mremap.c -@@ -623,6 +623,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, - return -ENOMEM; - } - -+ vma_start_write(vma); - new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); - new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, - &need_rmap_locks); -diff --git a/mm/rmap.c b/mm/rmap.c -index 8632e02661ac..cfdaa56cad3e 100644 ---- a/mm/rmap.c -+++ b/mm/rmap.c -@@ -25,21 +25,22 @@ - * mapping->invalidate_lock (in filemap_fault) - * page->flags PG_locked (lock_page) - * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below) -- * mapping->i_mmap_rwsem -- * anon_vma->rwsem -- * mm->page_table_lock or pte_lock -- * swap_lock (in swap_duplicate, swap_info_get) -- * mmlist_lock (in mmput, drain_mmlist and others) -- * mapping->private_lock (in block_dirty_folio) -- * folio_lock_memcg move_lock (in block_dirty_folio) -- * i_pages lock (widely used) -- * lruvec->lru_lock (in folio_lruvec_lock_irq) -- * inode->i_lock (in set_page_dirty's __mark_inode_dirty) -- * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) -- * sb_lock (within inode_lock in fs/fs-writeback.c) -- * i_pages lock (widely used, in set_page_dirty, -- * in arch-dependent flush_dcache_mmap_lock, -- * within bdi.wb->list_lock in __sync_single_inode) -+ * vma_start_write -+ * mapping->i_mmap_rwsem -+ * anon_vma->rwsem -+ * mm->page_table_lock or pte_lock -+ * swap_lock (in swap_duplicate, swap_info_get) -+ * mmlist_lock (in mmput, drain_mmlist and others) -+ * mapping->private_lock (in block_dirty_folio) -+ * folio_lock_memcg move_lock (in block_dirty_folio) -+ * i_pages lock (widely used) -+ * lruvec->lru_lock (in folio_lruvec_lock_irq) -+ * inode->i_lock (in set_page_dirty's __mark_inode_dirty) -+ * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) -+ * sb_lock (within inode_lock in fs/fs-writeback.c) -+ * i_pages lock (widely used, in set_page_dirty, -+ * in arch-dependent flush_dcache_mmap_lock, -+ * within bdi.wb->list_lock in __sync_single_inode) - * - * anon_vma->rwsem,mapping->i_mmap_rwsem (memory_failure, collect_procs_anon) - * ->tasklist_lock -diff --git a/mm/vmstat.c b/mm/vmstat.c -index 1ea6a5ce1c41..4f1089a1860e 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -1399,6 +1399,12 @@ const char * const vmstat_text[] = { - "direct_map_level2_splits", - "direct_map_level3_splits", - #endif -+#ifdef CONFIG_PER_VMA_LOCK_STATS -+ "vma_lock_success", -+ "vma_lock_abort", -+ "vma_lock_retry", -+ "vma_lock_miss", -+#endif - #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ - }; - #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ -diff --git a/tools/testing/selftests/mm/userfaultfd.c b/tools/testing/selftests/mm/userfaultfd.c -index 7f22844ed704..e030d63c031a 100644 ---- a/tools/testing/selftests/mm/userfaultfd.c -+++ b/tools/testing/selftests/mm/userfaultfd.c -@@ -1444,6 +1444,43 @@ static int pagemap_test_fork(bool present) - return result; - } - -+static void userfaultfd_wp_unpopulated_test(int pagemap_fd) -+{ -+ uint64_t value; -+ -+ /* Test applying pte marker to anon unpopulated */ -+ wp_range(uffd, (uint64_t)area_dst, page_size, true); -+ value = pagemap_read_vaddr(pagemap_fd, area_dst); -+ pagemap_check_wp(value, true); -+ -+ /* Test unprotect on anon pte marker */ -+ wp_range(uffd, (uint64_t)area_dst, page_size, false); -+ value = pagemap_read_vaddr(pagemap_fd, area_dst); -+ pagemap_check_wp(value, false); -+ -+ /* Test zap on anon marker */ -+ wp_range(uffd, (uint64_t)area_dst, page_size, true); -+ if (madvise(area_dst, page_size, MADV_DONTNEED)) -+ err("madvise(MADV_DONTNEED) failed"); -+ value = pagemap_read_vaddr(pagemap_fd, area_dst); -+ pagemap_check_wp(value, false); -+ -+ /* Test fault in after marker removed */ -+ *area_dst = 1; -+ value = pagemap_read_vaddr(pagemap_fd, area_dst); -+ pagemap_check_wp(value, false); -+ /* Drop it to make pte none again */ -+ if (madvise(area_dst, page_size, MADV_DONTNEED)) -+ err("madvise(MADV_DONTNEED) failed"); -+ -+ /* Test read-zero-page upon pte marker */ -+ wp_range(uffd, (uint64_t)area_dst, page_size, true); -+ *(volatile char *)area_dst; -+ /* Drop it to make pte none again */ -+ if (madvise(area_dst, page_size, MADV_DONTNEED)) -+ err("madvise(MADV_DONTNEED) failed"); -+} -+ - static void userfaultfd_pagemap_test(unsigned int test_pgsize) - { - struct uffdio_register uffdio_register; -@@ -1462,7 +1499,7 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize) - /* Flush so it doesn't flush twice in parent/child later */ - fflush(stdout); - -- uffd_test_ctx_init(0); -+ uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED); - - if (test_pgsize > page_size) { - /* This is a thp test */ -@@ -1482,6 +1519,10 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize) - - pagemap_fd = pagemap_open(); - -+ /* Smoke test WP_UNPOPULATED first when it's still empty */ -+ if (test_pgsize == page_size) -+ userfaultfd_wp_unpopulated_test(pagemap_fd); -+ - /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, test_pgsize, true); -@@ -1526,7 +1567,7 @@ static int userfaultfd_stress(void) - struct uffdio_register uffdio_register; - struct uffd_stats uffd_stats[nr_cpus]; - -- uffd_test_ctx_init(0); -+ uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED); - - if (posix_memalign(&area, page_size, page_size)) - err("out of memory"); --- -2.40.0 - -From 888661765419ab8a18ee6597356b0a0b79c2de90 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 24 Apr 2023 15:36:39 +0200 -Subject: [PATCH 09/12] sched +Date: Tue, 25 Apr 2023 17:19:06 +0200 +Subject: [PATCH 7/8] sched Signed-off-by: Peter Jung --- @@ -17470,17 +10384,14 @@ Signed-off-by: Peter Jung arch/x86/kernel/smpboot.c | 4 +- include/linux/sched.h | 3 + include/linux/sched/sd_flags.h | 5 +- - kernel/sched/clock.c | 3 + kernel/sched/core.c | 4 +- - kernel/sched/deadline.c | 1 + kernel/sched/debug.c | 1 + kernel/sched/fair.c | 265 ++++++++++++++++++++------------- kernel/sched/features.h | 1 + kernel/sched/pelt.c | 60 ++++++++ kernel/sched/pelt.h | 42 +++++- - kernel/sched/rt.c | 4 + kernel/sched/sched.h | 23 ++- - 14 files changed, 302 insertions(+), 137 deletions(-) + 11 files changed, 294 insertions(+), 137 deletions(-) diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 9ff480e94511..6510883c5e81 100644 @@ -17578,20 +10489,6 @@ index 57bde66d95f7..fad77b5172e2 100644 /* * Prefer to place tasks in a sibling domain -diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c -index 5732fa75ebab..b5cc2b53464d 100644 ---- a/kernel/sched/clock.c -+++ b/kernel/sched/clock.c -@@ -300,6 +300,9 @@ noinstr u64 local_clock(void) - if (static_branch_likely(&__sched_clock_stable)) - return sched_clock() + __sched_clock_offset; - -+ if (!static_branch_likely(&sched_clock_running)) -+ return sched_clock(); -+ - preempt_disable_notrace(); - clock = sched_clock_local(this_scd()); - preempt_enable_notrace(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0d18c3969f90..17bb9637f314 100644 --- a/kernel/sched/core.c @@ -17614,18 +10511,6 @@ index 0d18c3969f90..17bb9637f314 100644 INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c -index 71b24371a6f7..ac8010f6f3a2 100644 ---- a/kernel/sched/deadline.c -+++ b/kernel/sched/deadline.c -@@ -2246,6 +2246,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) - !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) || - task_on_cpu(rq, task) || - !dl_task(task) || -+ is_migration_disabled(task) || - !task_on_rq_queued(task))) { - double_unlock_balance(rq, later_rq); - later_rq = NULL; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 1637b65ba07a..8d64fba16cfe 100644 --- a/kernel/sched/debug.c @@ -18199,26 +11084,6 @@ index 3a0e0dc28721..9b35b5072bae 100644 static inline void update_idle_rq_clock_pelt(struct rq *rq) { } -diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c -index 0a11f44adee5..4f5796dd26a5 100644 ---- a/kernel/sched/rt.c -+++ b/kernel/sched/rt.c -@@ -2000,11 +2000,15 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) - * the mean time, task could have - * migrated already or had its affinity changed. - * Also make sure that it wasn't scheduled on its rq. -+ * It is possible the task was scheduled, set -+ * "migrate_disabled" and then got preempted, so we must -+ * check the task migration disable flag here too. - */ - if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || - task_on_cpu(rq, task) || - !rt_task(task) || -+ is_migration_disabled(task) || - !task_on_rq_queued(task))) { - - double_unlock_balance(rq, lowest_rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3e8df6d31c1e..7331d436ebc4 100644 --- a/kernel/sched/sched.h @@ -18275,6315 +11140,12 @@ index 3e8df6d31c1e..7331d436ebc4 100644 return hsd; -- -2.40.0 +2.40.1 -From 7ea2532fa27ecd8a5b0300c93bfc66cf5b0aadf1 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Sat, 22 Apr 2023 11:46:19 +0200 -Subject: [PATCH 10/12] Surface - -Signed-off-by: Peter Jung ---- - arch/x86/kernel/acpi/boot.c | 24 + - drivers/acpi/acpi_tad.c | 35 +- - drivers/bluetooth/btusb.c | 15 + - drivers/hid/Kconfig | 4 + - drivers/hid/Makefile | 3 + - drivers/hid/hid-multitouch.c | 196 ++++++- - drivers/hid/ipts/Kconfig | 14 + - drivers/hid/ipts/Makefile | 14 + - drivers/hid/ipts/cmd.c | 62 ++ - drivers/hid/ipts/cmd.h | 61 ++ - drivers/hid/ipts/context.h | 51 ++ - drivers/hid/ipts/control.c | 495 ++++++++++++++++ - drivers/hid/ipts/control.h | 127 +++++ - drivers/hid/ipts/desc.h | 81 +++ - drivers/hid/ipts/hid.c | 348 ++++++++++++ - drivers/hid/ipts/hid.h | 22 + - drivers/hid/ipts/main.c | 127 +++++ - drivers/hid/ipts/mei.c | 189 +++++++ - drivers/hid/ipts/mei.h | 67 +++ - drivers/hid/ipts/receiver.c | 249 ++++++++ - drivers/hid/ipts/receiver.h | 17 + - drivers/hid/ipts/resources.c | 108 ++++ - drivers/hid/ipts/resources.h | 39 ++ - drivers/hid/ipts/spec-data.h | 100 ++++ - drivers/hid/ipts/spec-device.h | 285 ++++++++++ - drivers/hid/ipts/spec-hid.h | 35 ++ - drivers/hid/ipts/thread.c | 85 +++ - drivers/hid/ipts/thread.h | 60 ++ - drivers/hid/ithc/Kbuild | 6 + - drivers/hid/ithc/Kconfig | 12 + - drivers/hid/ithc/ithc-debug.c | 96 ++++ - drivers/hid/ithc/ithc-dma.c | 258 +++++++++ - drivers/hid/ithc/ithc-dma.h | 67 +++ - drivers/hid/ithc/ithc-main.c | 534 ++++++++++++++++++ - drivers/hid/ithc/ithc-regs.c | 64 +++ - drivers/hid/ithc/ithc-regs.h | 186 ++++++ - drivers/hid/ithc/ithc.h | 60 ++ - drivers/i2c/i2c-core-acpi.c | 35 ++ - drivers/input/misc/soc_button_array.c | 33 +- - drivers/iommu/intel/iommu.c | 24 + - drivers/iommu/intel/irq_remapping.c | 16 + - drivers/misc/mei/hw-me-regs.h | 1 + - drivers/misc/mei/pci-me.c | 1 + - drivers/net/wireless/ath/ath10k/core.c | 58 ++ - drivers/net/wireless/marvell/mwifiex/pcie.c | 19 + - .../wireless/marvell/mwifiex/pcie_quirks.c | 37 +- - .../wireless/marvell/mwifiex/pcie_quirks.h | 2 + - drivers/pci/pci-driver.c | 3 + - drivers/pci/quirks.c | 36 ++ - drivers/platform/surface/Kconfig | 7 + - drivers/platform/surface/Makefile | 1 + - drivers/platform/surface/surface3-wmi.c | 7 + - drivers/platform/surface/surface_gpe.c | 17 + - .../surface/surfacebook1_dgpu_switch.c | 162 ++++++ - drivers/platform/surface/surfacepro3_button.c | 30 +- - drivers/usb/core/quirks.c | 3 + - include/linux/pci.h | 1 + - sound/soc/codecs/rt5645.c | 9 + - .../intel/common/soc-acpi-intel-cht-match.c | 8 + - 59 files changed, 4636 insertions(+), 70 deletions(-) - create mode 100644 drivers/hid/ipts/Kconfig - create mode 100644 drivers/hid/ipts/Makefile - create mode 100644 drivers/hid/ipts/cmd.c - create mode 100644 drivers/hid/ipts/cmd.h - create mode 100644 drivers/hid/ipts/context.h - create mode 100644 drivers/hid/ipts/control.c - create mode 100644 drivers/hid/ipts/control.h - create mode 100644 drivers/hid/ipts/desc.h - create mode 100644 drivers/hid/ipts/hid.c - create mode 100644 drivers/hid/ipts/hid.h - create mode 100644 drivers/hid/ipts/main.c - create mode 100644 drivers/hid/ipts/mei.c - create mode 100644 drivers/hid/ipts/mei.h - create mode 100644 drivers/hid/ipts/receiver.c - create mode 100644 drivers/hid/ipts/receiver.h - create mode 100644 drivers/hid/ipts/resources.c - create mode 100644 drivers/hid/ipts/resources.h - create mode 100644 drivers/hid/ipts/spec-data.h - create mode 100644 drivers/hid/ipts/spec-device.h - create mode 100644 drivers/hid/ipts/spec-hid.h - create mode 100644 drivers/hid/ipts/thread.c - create mode 100644 drivers/hid/ipts/thread.h - create mode 100644 drivers/hid/ithc/Kbuild - create mode 100644 drivers/hid/ithc/Kconfig - create mode 100644 drivers/hid/ithc/ithc-debug.c - create mode 100644 drivers/hid/ithc/ithc-dma.c - create mode 100644 drivers/hid/ithc/ithc-dma.h - create mode 100644 drivers/hid/ithc/ithc-main.c - create mode 100644 drivers/hid/ithc/ithc-regs.c - create mode 100644 drivers/hid/ithc/ithc-regs.h - create mode 100644 drivers/hid/ithc/ithc.h - create mode 100644 drivers/platform/surface/surfacebook1_dgpu_switch.c - -diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index 0dac4ab5b55b..623d94a9cb86 100644 ---- a/arch/x86/kernel/acpi/boot.c -+++ b/arch/x86/kernel/acpi/boot.c -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -1252,6 +1253,24 @@ static void __init mp_config_acpi_legacy_irqs(void) - } - } - -+static const struct dmi_system_id surface_quirk[] __initconst = { -+ { -+ .ident = "Microsoft Surface Laptop 4 (AMD 15\")", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1952:1953") -+ }, -+ }, -+ { -+ .ident = "Microsoft Surface Laptop 4 (AMD 13\")", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1958:1959") -+ }, -+ }, -+ {} -+}; -+ - /* - * Parse IOAPIC related entries in MADT - * returns 0 on success, < 0 on error -@@ -1307,6 +1326,11 @@ static int __init acpi_parse_madt_ioapic_entries(void) - acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, - acpi_gbl_FADT.sci_interrupt); - -+ if (dmi_check_system(surface_quirk)) { -+ pr_warn("Surface hack: Override irq 7\n"); -+ mp_override_legacy_irq(7, 3, 3, 7); -+ } -+ - /* Fill in identity legacy mappings where no override */ - mp_config_acpi_legacy_irqs(); - -diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c -index e9b8e8305e23..944276934e7e 100644 ---- a/drivers/acpi/acpi_tad.c -+++ b/drivers/acpi/acpi_tad.c -@@ -432,6 +432,14 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr, - - static DEVICE_ATTR_RO(caps); - -+static struct attribute *acpi_tad_attrs[] = { -+ &dev_attr_caps.attr, -+ NULL, -+}; -+static const struct attribute_group acpi_tad_attr_group = { -+ .attrs = acpi_tad_attrs, -+}; -+ - static ssize_t ac_alarm_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) - { -@@ -480,15 +488,14 @@ static ssize_t ac_status_show(struct device *dev, struct device_attribute *attr, - - static DEVICE_ATTR_RW(ac_status); - --static struct attribute *acpi_tad_attrs[] = { -- &dev_attr_caps.attr, -+static struct attribute *acpi_tad_ac_attrs[] = { - &dev_attr_ac_alarm.attr, - &dev_attr_ac_policy.attr, - &dev_attr_ac_status.attr, - NULL, - }; --static const struct attribute_group acpi_tad_attr_group = { -- .attrs = acpi_tad_attrs, -+static const struct attribute_group acpi_tad_ac_attr_group = { -+ .attrs = acpi_tad_ac_attrs, - }; - - static ssize_t dc_alarm_store(struct device *dev, struct device_attribute *attr, -@@ -563,13 +570,18 @@ static int acpi_tad_remove(struct platform_device *pdev) - - pm_runtime_get_sync(dev); - -+ if (dd->capabilities & ACPI_TAD_AC_WAKE) -+ sysfs_remove_group(&dev->kobj, &acpi_tad_ac_attr_group); -+ - if (dd->capabilities & ACPI_TAD_DC_WAKE) - sysfs_remove_group(&dev->kobj, &acpi_tad_dc_attr_group); - - sysfs_remove_group(&dev->kobj, &acpi_tad_attr_group); - -- acpi_tad_disable_timer(dev, ACPI_TAD_AC_TIMER); -- acpi_tad_clear_status(dev, ACPI_TAD_AC_TIMER); -+ if (dd->capabilities & ACPI_TAD_AC_WAKE) { -+ acpi_tad_disable_timer(dev, ACPI_TAD_AC_TIMER); -+ acpi_tad_clear_status(dev, ACPI_TAD_AC_TIMER); -+ } - if (dd->capabilities & ACPI_TAD_DC_WAKE) { - acpi_tad_disable_timer(dev, ACPI_TAD_DC_TIMER); - acpi_tad_clear_status(dev, ACPI_TAD_DC_TIMER); -@@ -604,11 +616,6 @@ static int acpi_tad_probe(struct platform_device *pdev) - return -ENODEV; - } - -- if (!acpi_has_method(handle, "_PRW")) { -- dev_info(dev, "Missing _PRW\n"); -- return -ENODEV; -- } -- - dd = devm_kzalloc(dev, sizeof(*dd), GFP_KERNEL); - if (!dd) - return -ENOMEM; -@@ -637,6 +644,12 @@ static int acpi_tad_probe(struct platform_device *pdev) - if (ret) - goto fail; - -+ if (caps & ACPI_TAD_AC_WAKE) { -+ ret = sysfs_create_group(&dev->kobj, &acpi_tad_ac_attr_group); -+ if (ret) -+ goto fail; -+ } -+ - if (caps & ACPI_TAD_DC_WAKE) { - ret = sysfs_create_group(&dev->kobj, &acpi_tad_dc_attr_group); - if (ret) -diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 5a80379253a7..5651b4bfe72c 100644 ---- a/drivers/bluetooth/btusb.c -+++ b/drivers/bluetooth/btusb.c -@@ -65,6 +65,7 @@ static struct usb_driver btusb_driver; - #define BTUSB_INTEL_BROKEN_INITIAL_NCMD BIT(25) - #define BTUSB_INTEL_NO_WBS_SUPPORT BIT(26) - #define BTUSB_ACTIONS_SEMI BIT(27) -+#define BTUSB_LOWER_LESCAN_INTERVAL BIT(28) - - static const struct usb_device_id btusb_table[] = { - /* Generic Bluetooth USB device */ -@@ -468,6 +469,7 @@ static const struct usb_device_id blacklist_table[] = { - { USB_DEVICE(0x1286, 0x2044), .driver_info = BTUSB_MARVELL }, - { USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL }, - { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL }, -+ { USB_DEVICE(0x1286, 0x204c), .driver_info = BTUSB_LOWER_LESCAN_INTERVAL }, - - /* Intel Bluetooth devices */ - { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED }, -@@ -4033,6 +4035,19 @@ static int btusb_probe(struct usb_interface *intf, - if (id->driver_info & BTUSB_MARVELL) - hdev->set_bdaddr = btusb_set_bdaddr_marvell; - -+ /* The Marvell 88W8897 combined wifi and bluetooth card is known for -+ * very bad bt+wifi coexisting performance. -+ * -+ * Decrease the passive BT Low Energy scan interval a bit -+ * (0x0190 * 0.625 msec = 250 msec) and make the scan window shorter -+ * (0x000a * 0,625 msec = 6.25 msec). This allows for significantly -+ * higher wifi throughput while passively scanning for BT LE devices. -+ */ -+ if (id->driver_info & BTUSB_LOWER_LESCAN_INTERVAL) { -+ hdev->le_scan_interval = 0x0190; -+ hdev->le_scan_window = 0x000a; -+ } -+ - if (IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) && - (id->driver_info & BTUSB_MEDIATEK)) { - hdev->setup = btusb_mtk_setup; -diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 4ce012f83253..aebb62488cf1 100644 ---- a/drivers/hid/Kconfig -+++ b/drivers/hid/Kconfig -@@ -1300,6 +1300,10 @@ config HID_KUNIT_TEST - - If in doubt, say "N". - -+source "drivers/hid/ipts/Kconfig" -+ -+source "drivers/hid/ithc/Kconfig" -+ - endmenu - - source "drivers/hid/bpf/Kconfig" -diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 5d37cacbde33..a3ff62e922f1 100644 ---- a/drivers/hid/Makefile -+++ b/drivers/hid/Makefile -@@ -167,3 +167,6 @@ obj-$(INTEL_ISH_FIRMWARE_DOWNLOADER) += intel-ish-hid/ - obj-$(CONFIG_AMD_SFH_HID) += amd-sfh-hid/ - - obj-$(CONFIG_SURFACE_HID_CORE) += surface-hid/ -+ -+obj-$(CONFIG_HID_IPTS) += ipts/ -+obj-$(CONFIG_HID_ITHC) += ithc/ -diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index e31be0cb8b85..508a250ff4bf 100644 ---- a/drivers/hid/hid-multitouch.c -+++ b/drivers/hid/hid-multitouch.c -@@ -34,7 +34,10 @@ - #include - #include - #include -+#include - #include -+#include -+#include - #include - #include - #include -@@ -47,6 +50,7 @@ MODULE_DESCRIPTION("HID multitouch panels"); - MODULE_LICENSE("GPL"); - - #include "hid-ids.h" -+#include "usbhid/usbhid.h" - - /* quirks to control the device */ - #define MT_QUIRK_NOT_SEEN_MEANS_UP BIT(0) -@@ -72,12 +76,18 @@ MODULE_LICENSE("GPL"); - #define MT_QUIRK_FORCE_MULTI_INPUT BIT(20) - #define MT_QUIRK_DISABLE_WAKEUP BIT(21) - #define MT_QUIRK_ORIENTATION_INVERT BIT(22) -+#define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT BIT(23) -+#define MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH BIT(24) - - #define MT_INPUTMODE_TOUCHSCREEN 0x02 - #define MT_INPUTMODE_TOUCHPAD 0x03 - - #define MT_BUTTONTYPE_CLICKPAD 0 - -+#define MS_TYPE_COVER_FEATURE_REPORT_USAGE 0xff050086 -+#define MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE 0xff050072 -+#define MS_TYPE_COVER_APPLICATION 0xff050050 -+ - enum latency_mode { - HID_LATENCY_NORMAL = 0, - HID_LATENCY_HIGH = 1, -@@ -169,6 +179,8 @@ struct mt_device { - - struct list_head applications; - struct list_head reports; -+ -+ struct notifier_block pm_notifier; - }; - - static void mt_post_parse_default_settings(struct mt_device *td, -@@ -213,6 +225,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); - #define MT_CLS_GOOGLE 0x0111 - #define MT_CLS_RAZER_BLADE_STEALTH 0x0112 - #define MT_CLS_SMART_TECH 0x0113 -+#define MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER 0x0114 - - #define MT_DEFAULT_MAXCONTACT 10 - #define MT_MAX_MAXCONTACT 250 -@@ -397,6 +410,17 @@ static const struct mt_class mt_classes[] = { - MT_QUIRK_CONTACT_CNT_ACCURATE | - MT_QUIRK_SEPARATE_APP_REPORT, - }, -+ { .name = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER, -+ .quirks = MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT | -+ MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH | -+ MT_QUIRK_ALWAYS_VALID | -+ MT_QUIRK_IGNORE_DUPLICATES | -+ MT_QUIRK_HOVERING | -+ MT_QUIRK_CONTACT_CNT_ACCURATE | -+ MT_QUIRK_STICKY_FINGERS | -+ MT_QUIRK_WIN8_PTP_BUTTONS, -+ .export_all_inputs = true -+ }, - { } - }; - -@@ -1370,6 +1394,9 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, - field->application != HID_CP_CONSUMER_CONTROL && - field->application != HID_GD_WIRELESS_RADIO_CTLS && - field->application != HID_GD_SYSTEM_MULTIAXIS && -+ !(field->application == MS_TYPE_COVER_APPLICATION && -+ application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) && - !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS && - application->quirks & MT_QUIRK_ASUS_CUSTOM_UP)) - return -1; -@@ -1397,6 +1424,21 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, - return 1; - } - -+ /* -+ * The Microsoft Surface Pro Typecover has a non-standard HID -+ * tablet mode switch on a vendor specific usage page with vendor -+ * specific usage. -+ */ -+ if (field->application == MS_TYPE_COVER_APPLICATION && -+ application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) { -+ usage->type = EV_SW; -+ usage->code = SW_TABLET_MODE; -+ *max = SW_MAX; -+ *bit = hi->input->swbit; -+ return 1; -+ } -+ - if (rdata->is_mt_collection) - return mt_touch_input_mapping(hdev, hi, field, usage, bit, max, - application); -@@ -1418,6 +1460,7 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi, - { - struct mt_device *td = hid_get_drvdata(hdev); - struct mt_report_data *rdata; -+ struct input_dev *input; - - rdata = mt_find_report_data(td, field->report); - if (rdata && rdata->is_mt_collection) { -@@ -1425,6 +1468,19 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi, - return -1; - } - -+ /* -+ * We own an input device which acts as a tablet mode switch for -+ * the Surface Pro Typecover. -+ */ -+ if (field->application == MS_TYPE_COVER_APPLICATION && -+ rdata->application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) { -+ input = hi->input; -+ input_set_capability(input, EV_SW, SW_TABLET_MODE); -+ input_report_switch(input, SW_TABLET_MODE, 0); -+ return -1; -+ } -+ - /* let hid-core decide for the others */ - return 0; - } -@@ -1434,11 +1490,21 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, - { - struct mt_device *td = hid_get_drvdata(hid); - struct mt_report_data *rdata; -+ struct input_dev *input; - - rdata = mt_find_report_data(td, field->report); - if (rdata && rdata->is_mt_collection) - return mt_touch_event(hid, field, usage, value); - -+ if (field->application == MS_TYPE_COVER_APPLICATION && -+ rdata->application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) { -+ input = field->hidinput->input; -+ input_report_switch(input, SW_TABLET_MODE, (value & 0xFF) != 0x22); -+ input_sync(input); -+ return 1; -+ } -+ - return 0; - } - -@@ -1591,6 +1657,42 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app) - app->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; - } - -+static int get_type_cover_field(struct hid_report_enum *rep_enum, -+ struct hid_field **field, int usage) -+{ -+ struct hid_report *rep; -+ struct hid_field *cur_field; -+ int i, j; -+ -+ list_for_each_entry(rep, &rep_enum->report_list, list) { -+ for (i = 0; i < rep->maxfield; i++) { -+ cur_field = rep->field[i]; -+ if (cur_field->application != MS_TYPE_COVER_APPLICATION) -+ continue; -+ for (j = 0; j < cur_field->maxusage; j++) { -+ if (cur_field->usage[j].hid == usage) { -+ *field = cur_field; -+ return true; -+ } -+ } -+ } -+ } -+ return false; -+} -+ -+static void request_type_cover_tablet_mode_switch(struct hid_device *hdev) -+{ -+ struct hid_field *field; -+ -+ if (get_type_cover_field(&hdev->report_enum[HID_INPUT_REPORT], -+ &field, -+ MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE)) { -+ hid_hw_request(hdev, field->report, HID_REQ_GET_REPORT); -+ } else { -+ hid_err(hdev, "couldn't find tablet mode field\n"); -+ } -+} -+ - static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) - { - struct mt_device *td = hid_get_drvdata(hdev); -@@ -1640,6 +1742,13 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) - /* force BTN_STYLUS to allow tablet matching in udev */ - __set_bit(BTN_STYLUS, hi->input->keybit); - break; -+ case MS_TYPE_COVER_APPLICATION: -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) { -+ suffix = "Tablet Mode Switch"; -+ request_type_cover_tablet_mode_switch(hdev); -+ break; -+ } -+ fallthrough; - default: - suffix = "UNKNOWN"; - break; -@@ -1728,6 +1837,46 @@ static void mt_expired_timeout(struct timer_list *t) - clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); - } - -+static void update_keyboard_backlight(struct hid_device *hdev, bool enabled) -+{ -+ struct usb_device *udev = hid_to_usb_dev(hdev); -+ struct hid_field *field = NULL; -+ -+ /* Wake up the device in case it's already suspended */ -+ pm_runtime_get_sync(&udev->dev); -+ -+ if (!get_type_cover_field(&hdev->report_enum[HID_FEATURE_REPORT], -+ &field, -+ MS_TYPE_COVER_FEATURE_REPORT_USAGE)) { -+ hid_err(hdev, "couldn't find backlight field\n"); -+ goto out; -+ } -+ -+ field->value[field->index] = enabled ? 0x01ff00ff : 0x00ff00ff; -+ hid_hw_request(hdev, field->report, HID_REQ_SET_REPORT); -+ -+out: -+ pm_runtime_put_sync(&udev->dev); -+} -+ -+static int mt_pm_notifier(struct notifier_block *notifier, -+ unsigned long pm_event, -+ void *unused) -+{ -+ struct mt_device *td = -+ container_of(notifier, struct mt_device, pm_notifier); -+ struct hid_device *hdev = td->hdev; -+ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT) { -+ if (pm_event == PM_SUSPEND_PREPARE) -+ update_keyboard_backlight(hdev, 0); -+ else if (pm_event == PM_POST_SUSPEND) -+ update_keyboard_backlight(hdev, 1); -+ } -+ -+ return NOTIFY_DONE; -+} -+ - static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - { - int ret, i; -@@ -1751,6 +1900,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - td->inputmode_value = MT_INPUTMODE_TOUCHSCREEN; - hid_set_drvdata(hdev, td); - -+ td->pm_notifier.notifier_call = mt_pm_notifier; -+ register_pm_notifier(&td->pm_notifier); -+ - INIT_LIST_HEAD(&td->applications); - INIT_LIST_HEAD(&td->reports); - -@@ -1789,15 +1941,19 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - timer_setup(&td->release_timer, mt_expired_timeout, 0); - - ret = hid_parse(hdev); -- if (ret != 0) -+ if (ret != 0) { -+ unregister_pm_notifier(&td->pm_notifier); - return ret; -+ } - - if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) - mt_fix_const_fields(hdev, HID_DG_CONTACTID); - - ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); -- if (ret) -+ if (ret) { -+ unregister_pm_notifier(&td->pm_notifier); - return ret; -+ } - - ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group); - if (ret) -@@ -1826,13 +1982,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state) - - static int mt_reset_resume(struct hid_device *hdev) - { -+ struct mt_device *td = hid_get_drvdata(hdev); -+ - mt_release_contacts(hdev); - mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true); -+ -+ /* Request an update on the typecover folding state on resume -+ * after reset. -+ */ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) -+ request_type_cover_tablet_mode_switch(hdev); -+ - return 0; - } - - static int mt_resume(struct hid_device *hdev) - { -+ struct mt_device *td = hid_get_drvdata(hdev); -+ - /* Some Elan legacy devices require SET_IDLE to be set on resume. - * It should be safe to send it to other devices too. - * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */ -@@ -1841,6 +2008,10 @@ static int mt_resume(struct hid_device *hdev) - - mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true); - -+ /* Request an update on the typecover folding state on resume. */ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) -+ request_type_cover_tablet_mode_switch(hdev); -+ - return 0; - } - #endif -@@ -1848,7 +2019,23 @@ static int mt_resume(struct hid_device *hdev) - static void mt_remove(struct hid_device *hdev) - { - struct mt_device *td = hid_get_drvdata(hdev); -+ struct hid_field *field; -+ struct input_dev *input; - -+ /* Reset tablet mode switch on disconnect. */ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) { -+ if (get_type_cover_field(&hdev->report_enum[HID_INPUT_REPORT], -+ &field, -+ MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE)) { -+ input = field->hidinput->input; -+ input_report_switch(input, SW_TABLET_MODE, 0); -+ input_sync(input); -+ } else { -+ hid_err(hdev, "couldn't find tablet mode field\n"); -+ } -+ } -+ -+ unregister_pm_notifier(&td->pm_notifier); - del_timer_sync(&td->release_timer); - - sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group); -@@ -2226,6 +2413,11 @@ static const struct hid_device_id mt_devices[] = { - MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, - USB_DEVICE_ID_XIROKU_CSR2) }, - -+ /* Microsoft Surface type cover */ -+ { .driver_data = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER, -+ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, -+ USB_VENDOR_ID_MICROSOFT, 0x09c0) }, -+ - /* Google MT devices */ - { .driver_data = MT_CLS_GOOGLE, - HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, -diff --git a/drivers/hid/ipts/Kconfig b/drivers/hid/ipts/Kconfig -new file mode 100644 -index 000000000000..297401bd388d ---- /dev/null -+++ b/drivers/hid/ipts/Kconfig -@@ -0,0 +1,14 @@ -+# SPDX-License-Identifier: GPL-2.0-or-later -+ -+config HID_IPTS -+ tristate "Intel Precise Touch & Stylus" -+ depends on INTEL_MEI -+ depends on HID -+ help -+ Say Y here if your system has a touchscreen using Intels -+ Precise Touch & Stylus (IPTS) technology. -+ -+ If unsure say N. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called ipts. -diff --git a/drivers/hid/ipts/Makefile b/drivers/hid/ipts/Makefile -new file mode 100644 -index 000000000000..0fe655bccdc0 ---- /dev/null -+++ b/drivers/hid/ipts/Makefile -@@ -0,0 +1,14 @@ -+# SPDX-License-Identifier: GPL-2.0-or-later -+# -+# Makefile for the IPTS touchscreen driver -+# -+ -+obj-$(CONFIG_HID_IPTS) += ipts.o -+ipts-objs := cmd.o -+ipts-objs += control.o -+ipts-objs += hid.o -+ipts-objs += main.o -+ipts-objs += mei.o -+ipts-objs += receiver.o -+ipts-objs += resources.o -+ipts-objs += thread.o -diff --git a/drivers/hid/ipts/cmd.c b/drivers/hid/ipts/cmd.c -new file mode 100644 -index 000000000000..7fd69271ccd5 ---- /dev/null -+++ b/drivers/hid/ipts/cmd.c -@@ -0,0 +1,62 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+ -+#include "cmd.h" -+#include "context.h" -+#include "mei.h" -+#include "spec-device.h" -+ -+int ipts_cmd_recv_timeout(struct ipts_context *ipts, enum ipts_command_code code, -+ struct ipts_response *rsp, u64 timeout) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!rsp) -+ return -EFAULT; -+ -+ /* -+ * In a response, the command code will have the most significant bit flipped to 1. -+ * If code is passed to ipts_mei_recv as is, no messages will be received. -+ */ -+ ret = ipts_mei_recv(&ipts->mei, code | IPTS_RSP_BIT, rsp, timeout); -+ if (ret < 0) -+ return ret; -+ -+ dev_dbg(ipts->dev, "Received 0x%02X with status 0x%02X\n", code, rsp->status); -+ -+ /* -+ * Some devices will always return this error. -+ * It is allowed to ignore it and to try continuing. -+ */ -+ if (rsp->status == IPTS_STATUS_COMPAT_CHECK_FAIL) -+ rsp->status = IPTS_STATUS_SUCCESS; -+ -+ return 0; -+} -+ -+int ipts_cmd_send(struct ipts_context *ipts, enum ipts_command_code code, void *data, size_t size) -+{ -+ struct ipts_command cmd = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ cmd.cmd = code; -+ -+ if (data && size > 0) -+ memcpy(cmd.payload, data, size); -+ -+ dev_dbg(ipts->dev, "Sending 0x%02X with %ld bytes payload\n", code, size); -+ return ipts_mei_send(&ipts->mei, &cmd, sizeof(cmd.cmd) + size); -+} -diff --git a/drivers/hid/ipts/cmd.h b/drivers/hid/ipts/cmd.h -new file mode 100644 -index 000000000000..924758ffee67 ---- /dev/null -+++ b/drivers/hid/ipts/cmd.h -@@ -0,0 +1,61 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_CMD_H -+#define IPTS_CMD_H -+ -+#include -+ -+#include "context.h" -+#include "spec-device.h" -+ -+/* -+ * The default timeout for receiving responses -+ */ -+#define IPTS_CMD_DEFAULT_TIMEOUT 1000 -+ -+/* -+ * ipts_cmd_recv_timeout() - Receives a response to a command. -+ * @ipts: The IPTS driver context. -+ * @code: The type of the command / response. -+ * @rsp: The address that the received response will be copied to. -+ * @timeout: How many milliseconds the function will wait at most. -+ * -+ * A negative timeout means to wait forever. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received. -+ */ -+int ipts_cmd_recv_timeout(struct ipts_context *ipts, enum ipts_command_code code, -+ struct ipts_response *rsp, u64 timeout); -+ -+/* -+ * ipts_cmd_recv() - Receives a response to a command. -+ * @ipts: The IPTS driver context. -+ * @code: The type of the command / response. -+ * @rsp: The address that the received response will be copied to. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received. -+ */ -+static inline int ipts_cmd_recv(struct ipts_context *ipts, enum ipts_command_code code, -+ struct ipts_response *rsp) -+{ -+ return ipts_cmd_recv_timeout(ipts, code, rsp, IPTS_CMD_DEFAULT_TIMEOUT); -+} -+ -+/* -+ * ipts_cmd_send() - Executes a command on the device. -+ * @ipts: The IPTS driver context. -+ * @code: The type of the command to execute. -+ * @data: The payload containing parameters for the command. -+ * @size: The size of the payload. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_cmd_send(struct ipts_context *ipts, enum ipts_command_code code, void *data, size_t size); -+ -+#endif /* IPTS_CMD_H */ -diff --git a/drivers/hid/ipts/context.h b/drivers/hid/ipts/context.h -new file mode 100644 -index 000000000000..3450a95e66ee ---- /dev/null -+++ b/drivers/hid/ipts/context.h -@@ -0,0 +1,51 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_CONTEXT_H -+#define IPTS_CONTEXT_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mei.h" -+#include "resources.h" -+#include "spec-device.h" -+#include "thread.h" -+ -+struct ipts_context { -+ struct device *dev; -+ struct ipts_mei mei; -+ -+ enum ipts_mode mode; -+ -+ /* -+ * Prevents concurrent GET_FEATURE reports. -+ */ -+ struct mutex feature_lock; -+ struct completion feature_event; -+ -+ /* -+ * These are not inside of struct ipts_resources -+ * because they don't own the memory they point to. -+ */ -+ struct ipts_buffer feature_report; -+ struct ipts_buffer descriptor; -+ -+ struct hid_device *hid; -+ struct ipts_device_info info; -+ struct ipts_resources resources; -+ -+ struct ipts_thread receiver_loop; -+}; -+ -+#endif /* IPTS_CONTEXT_H */ -diff --git a/drivers/hid/ipts/control.c b/drivers/hid/ipts/control.c -new file mode 100644 -index 000000000000..2f61500b5119 ---- /dev/null -+++ b/drivers/hid/ipts/control.c -@@ -0,0 +1,495 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "cmd.h" -+#include "context.h" -+#include "control.h" -+#include "desc.h" -+#include "hid.h" -+#include "receiver.h" -+#include "resources.h" -+#include "spec-data.h" -+#include "spec-device.h" -+ -+static int ipts_control_get_device_info(struct ipts_context *ipts, struct ipts_device_info *info) -+{ -+ int ret = 0; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!info) -+ return -EFAULT; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_GET_DEVICE_INFO, NULL, 0); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DEVICE_INFO: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_GET_DEVICE_INFO, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DEVICE_INFO: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "GET_DEVICE_INFO: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ memcpy(info, rsp.payload, sizeof(*info)); -+ return 0; -+} -+ -+static int ipts_control_set_mode(struct ipts_context *ipts, enum ipts_mode mode) -+{ -+ int ret = 0; -+ struct ipts_set_mode cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ cmd.mode = mode; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_SET_MODE, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MODE: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_SET_MODE, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MODE: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "SET_MODE: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+static int ipts_control_set_mem_window(struct ipts_context *ipts, struct ipts_resources *res) -+{ -+ int ret = 0; -+ struct ipts_mem_window cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!res) -+ return -EFAULT; -+ -+ for (int i = 0; i < IPTS_BUFFERS; i++) { -+ cmd.data_addr_lower[i] = lower_32_bits(res->data[i].dma_address); -+ cmd.data_addr_upper[i] = upper_32_bits(res->data[i].dma_address); -+ cmd.feedback_addr_lower[i] = lower_32_bits(res->feedback[i].dma_address); -+ cmd.feedback_addr_upper[i] = upper_32_bits(res->feedback[i].dma_address); -+ } -+ -+ cmd.workqueue_addr_lower = lower_32_bits(res->workqueue.dma_address); -+ cmd.workqueue_addr_upper = upper_32_bits(res->workqueue.dma_address); -+ -+ cmd.doorbell_addr_lower = lower_32_bits(res->doorbell.dma_address); -+ cmd.doorbell_addr_upper = upper_32_bits(res->doorbell.dma_address); -+ -+ cmd.hid2me_addr_lower = lower_32_bits(res->hid2me.dma_address); -+ cmd.hid2me_addr_upper = upper_32_bits(res->hid2me.dma_address); -+ -+ cmd.workqueue_size = IPTS_WORKQUEUE_SIZE; -+ cmd.workqueue_item_size = IPTS_WORKQUEUE_ITEM_SIZE; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_SET_MEM_WINDOW, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MEM_WINDOW: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_SET_MEM_WINDOW, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MEM_WINDOW: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "SET_MEM_WINDOW: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+static int ipts_control_get_descriptor(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_data_header *header = NULL; -+ struct ipts_get_descriptor cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->resources.descriptor.address) -+ return -EFAULT; -+ -+ memset(ipts->resources.descriptor.address, 0, ipts->resources.descriptor.size); -+ -+ cmd.addr_lower = lower_32_bits(ipts->resources.descriptor.dma_address); -+ cmd.addr_upper = upper_32_bits(ipts->resources.descriptor.dma_address); -+ cmd.magic = 8; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_GET_DESCRIPTOR, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DESCRIPTOR: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_GET_DESCRIPTOR, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DESCRIPTOR: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "GET_DESCRIPTOR: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ header = (struct ipts_data_header *)ipts->resources.descriptor.address; -+ -+ if (header->type == IPTS_DATA_TYPE_DESCRIPTOR) { -+ ipts->descriptor.address = &header->data[8]; -+ ipts->descriptor.size = header->size - 8; -+ -+ return 0; -+ } -+ -+ return -ENODATA; -+} -+ -+int ipts_control_request_flush(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_quiesce_io cmd = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_QUIESCE_IO, &cmd, sizeof(cmd)); -+ if (ret) -+ dev_err(ipts->dev, "QUIESCE_IO: send failed: %d\n", ret); -+ -+ return ret; -+} -+ -+int ipts_control_wait_flush(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_QUIESCE_IO, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "QUIESCE_IO: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status == IPTS_STATUS_TIMEOUT) -+ return -EAGAIN; -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "QUIESCE_IO: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_request_data(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_READY_FOR_DATA, NULL, 0); -+ if (ret) -+ dev_err(ipts->dev, "READY_FOR_DATA: send failed: %d\n", ret); -+ -+ return ret; -+} -+ -+int ipts_control_wait_data(struct ipts_context *ipts, bool shutdown) -+{ -+ int ret = 0; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!shutdown) -+ ret = ipts_cmd_recv_timeout(ipts, IPTS_CMD_READY_FOR_DATA, &rsp, 0); -+ else -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_READY_FOR_DATA, &rsp); -+ -+ if (ret) { -+ if (ret != -EAGAIN) -+ dev_err(ipts->dev, "READY_FOR_DATA: recv failed: %d\n", ret); -+ -+ return ret; -+ } -+ -+ /* -+ * During shutdown, it is possible that the sensor has already been disabled. -+ */ -+ if (rsp.status == IPTS_STATUS_SENSOR_DISABLED) -+ return 0; -+ -+ if (rsp.status == IPTS_STATUS_TIMEOUT) -+ return -EAGAIN; -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "READY_FOR_DATA: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_send_feedback(struct ipts_context *ipts, u32 buffer) -+{ -+ int ret = 0; -+ struct ipts_feedback cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ cmd.buffer = buffer; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_FEEDBACK, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "FEEDBACK: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_FEEDBACK, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "FEEDBACK: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ /* -+ * We don't know what feedback data looks like so we are sending zeros. -+ * See also ipts_control_refill_buffer. -+ */ -+ if (rsp.status == IPTS_STATUS_INVALID_PARAMS) -+ return 0; -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "FEEDBACK: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_hid2me_feedback(struct ipts_context *ipts, enum ipts_feedback_cmd_type cmd, -+ enum ipts_feedback_data_type type, void *data, size_t size) -+{ -+ struct ipts_feedback_header *header = NULL; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->resources.hid2me.address) -+ return -EFAULT; -+ -+ memset(ipts->resources.hid2me.address, 0, ipts->resources.hid2me.size); -+ header = (struct ipts_feedback_header *)ipts->resources.hid2me.address; -+ -+ header->cmd_type = cmd; -+ header->data_type = type; -+ header->size = size; -+ header->buffer = IPTS_HID2ME_BUFFER; -+ -+ if (size + sizeof(*header) > ipts->resources.hid2me.size) -+ return -EINVAL; -+ -+ if (data && size > 0) -+ memcpy(header->payload, data, size); -+ -+ return ipts_control_send_feedback(ipts, IPTS_HID2ME_BUFFER); -+} -+ -+static inline int ipts_control_reset_sensor(struct ipts_context *ipts) -+{ -+ return ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_SOFT_RESET, -+ IPTS_FEEDBACK_DATA_TYPE_VENDOR, NULL, 0); -+} -+ -+int ipts_control_start(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_device_info info = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "Starting IPTS\n"); -+ -+ ret = ipts_control_get_device_info(ipts, &info); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to get device info: %d\n", ret); -+ return ret; -+ } -+ -+ ipts->info = info; -+ -+ ret = ipts_resources_init(&ipts->resources, ipts->dev, info.data_size, info.feedback_size); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to allocate buffers: %d", ret); -+ return ret; -+ } -+ -+ dev_info(ipts->dev, "IPTS EDS Version: %d\n", info.intf_eds); -+ -+ /* -+ * Handle newer devices -+ */ -+ if (info.intf_eds > 1) { -+ /* -+ * Fetching the descriptor will only work on newer devices. -+ * For older devices, a fallback descriptor will be used. -+ */ -+ ret = ipts_control_get_descriptor(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to fetch HID descriptor: %d\n", ret); -+ return ret; -+ } -+ -+ /* -+ * Newer devices can be directly initialized in doorbell mode. -+ */ -+ ipts->mode = IPTS_MODE_DOORBELL; -+ } -+ -+ ret = ipts_control_set_mode(ipts, ipts->mode); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to set mode: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_set_mem_window(ipts, &ipts->resources); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to set memory window: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_receiver_start(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to start receiver: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_request_data(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to request data: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_hid_init(ipts, info); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to initialize HID device: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int _ipts_control_stop(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "Stopping IPTS\n"); -+ -+ ret = ipts_receiver_stop(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to stop receiver: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_reset_sensor(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to reset sensor: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_resources_free(&ipts->resources); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to free resources: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_stop(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ ret = _ipts_control_stop(ipts); -+ if (ret) -+ return ret; -+ -+ ret = ipts_hid_free(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to free HID device: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_restart(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ ret = _ipts_control_stop(ipts); -+ if (ret) -+ return ret; -+ -+ /* -+ * Give the sensor some time to come back from resetting -+ */ -+ msleep(1000); -+ -+ ret = ipts_control_start(ipts); -+ if (ret) -+ return ret; -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/control.h b/drivers/hid/ipts/control.h -new file mode 100644 -index 000000000000..744bb92d682a ---- /dev/null -+++ b/drivers/hid/ipts/control.h -@@ -0,0 +1,127 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_CONTROL_H -+#define IPTS_CONTROL_H -+ -+#include -+ -+#include "context.h" -+#include "spec-data.h" -+#include "spec-device.h" -+ -+/* -+ * ipts_control_request_flush() - Stop the data flow. -+ * @ipts: The IPTS driver context. -+ * -+ * Runs the command to stop the data flow on the device. -+ * All outstanding data needs to be acknowledged using feedback before the command will return. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_request_flush(struct ipts_context *ipts); -+ -+/* -+ * ipts_control_wait_flush() - Wait until data flow has been stopped. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_wait_flush(struct ipts_context *ipts); -+ -+/* -+ * ipts_control_wait_flush() - Notify the device that the driver can receive new data. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_request_data(struct ipts_context *ipts); -+ -+/* -+ * ipts_control_wait_data() - Wait until new data is available. -+ * @ipts: The IPTS driver context. -+ * @block: Whether to block execution until data is available. -+ * -+ * In doorbell mode, this function will never return while the data flow is active. Instead, -+ * the doorbell will be incremented when new data is available. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no data is available. -+ */ -+int ipts_control_wait_data(struct ipts_context *ipts, bool block); -+ -+/* -+ * ipts_control_send_feedback() - Submits a feedback buffer to the device. -+ * @ipts: The IPTS driver context. -+ * @buffer: The ID of the buffer containing feedback data. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_send_feedback(struct ipts_context *ipts, u32 buffer); -+ -+/* -+ * ipts_control_hid2me_feedback() - Sends HID2ME feedback, a special type of feedback. -+ * @ipts: The IPTS driver context. -+ * @cmd: The command that will be run on the device. -+ * @type: The type of the payload that is sent to the device. -+ * @data: The payload of the feedback command. -+ * @size: The size of the payload. -+ * -+ * HID2ME feedback is a special type of feedback, because it allows interfacing with -+ * the HID API of the device at any moment, without requiring a buffer that has to -+ * be acknowledged. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_hid2me_feedback(struct ipts_context *ipts, enum ipts_feedback_cmd_type cmd, -+ enum ipts_feedback_data_type type, void *data, size_t size); -+ -+/* -+ * ipts_control_refill_buffer() - Acknowledges that data in a buffer has been processed. -+ * @ipts: The IPTS driver context. -+ * @buffer: The buffer that has been processed and can be refilled. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+static inline int ipts_control_refill_buffer(struct ipts_context *ipts, u32 buffer) -+{ -+ /* -+ * IPTS expects structured data in the feedback buffer matching the buffer that will be -+ * refilled. We don't know what that data looks like, so we just keep the buffer empty. -+ * This results in an INVALID_PARAMS error, but the buffer gets refilled without an issue. -+ * Sending a minimal structure with the buffer ID fixes the error, but breaks refilling -+ * the buffers on some devices. -+ */ -+ -+ return ipts_control_send_feedback(ipts, buffer); -+} -+ -+/* -+ * ipts_control_start() - Initialized the device and starts the data flow. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_start(struct ipts_context *ipts); -+ -+/* -+ * ipts_control_stop() - Stops the data flow and resets the device. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_stop(struct ipts_context *ipts); -+ -+/* -+ * ipts_control_restart() - Stops the device and starts it again. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_restart(struct ipts_context *ipts); -+ -+#endif /* IPTS_CONTROL_H */ -diff --git a/drivers/hid/ipts/desc.h b/drivers/hid/ipts/desc.h -new file mode 100644 -index 000000000000..c058974a03a1 ---- /dev/null -+++ b/drivers/hid/ipts/desc.h -@@ -0,0 +1,81 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2022-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_DESC_H -+#define IPTS_DESC_H -+ -+#include -+ -+#define IPTS_HID_REPORT_SINGLETOUCH 64 -+#define IPTS_HID_REPORT_DATA 65 -+#define IPTS_HID_REPORT_SET_MODE 66 -+ -+#define IPTS_HID_REPORT_DATA_SIZE 7485 -+ -+/* -+ * HID descriptor for singletouch data. -+ * This descriptor should be present on all IPTS devices. -+ */ -+static const u8 ipts_singletouch_descriptor[] = { -+ 0x05, 0x0D, /* Usage Page (Digitizer), */ -+ 0x09, 0x04, /* Usage (Touchscreen), */ -+ 0xA1, 0x01, /* Collection (Application), */ -+ 0x85, 0x40, /* Report ID (64), */ -+ 0x09, 0x42, /* Usage (Tip Switch), */ -+ 0x15, 0x00, /* Logical Minimum (0), */ -+ 0x25, 0x01, /* Logical Maximum (1), */ -+ 0x75, 0x01, /* Report Size (1), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0x95, 0x07, /* Report Count (7), */ -+ 0x81, 0x03, /* Input (Constant, Variable), */ -+ 0x05, 0x01, /* Usage Page (Desktop), */ -+ 0x09, 0x30, /* Usage (X), */ -+ 0x75, 0x10, /* Report Size (16), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0xA4, /* Push, */ -+ 0x55, 0x0E, /* Unit Exponent (14), */ -+ 0x65, 0x11, /* Unit (Centimeter), */ -+ 0x46, 0x76, 0x0B, /* Physical Maximum (2934), */ -+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0x09, 0x31, /* Usage (Y), */ -+ 0x46, 0x74, 0x06, /* Physical Maximum (1652), */ -+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0xB4, /* Pop, */ -+ 0xC0, /* End Collection */ -+}; -+ -+/* -+ * Fallback HID descriptor for older devices that do not have -+ * the ability to query their HID descriptor. -+ */ -+static const u8 ipts_fallback_descriptor[] = { -+ 0x05, 0x0D, /* Usage Page (Digitizer), */ -+ 0x09, 0x0F, /* Usage (Capacitive Hm Digitizer), */ -+ 0xA1, 0x01, /* Collection (Application), */ -+ 0x85, 0x41, /* Report ID (65), */ -+ 0x09, 0x56, /* Usage (Scan Time), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0x75, 0x10, /* Report Size (16), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0x09, 0x61, /* Usage (Gesture Char Quality), */ -+ 0x75, 0x08, /* Report Size (8), */ -+ 0x96, 0x3D, 0x1D, /* Report Count (7485), */ -+ 0x81, 0x03, /* Input (Constant, Variable), */ -+ 0x85, 0x42, /* Report ID (66), */ -+ 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ -+ 0x09, 0xC8, /* Usage (C8h), */ -+ 0x75, 0x08, /* Report Size (8), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0xB1, 0x02, /* Feature (Variable), */ -+ 0xC0, /* End Collection, */ -+}; -+ -+#endif /* IPTS_DESC_H */ -diff --git a/drivers/hid/ipts/hid.c b/drivers/hid/ipts/hid.c -new file mode 100644 -index 000000000000..6782394e8dde ---- /dev/null -+++ b/drivers/hid/ipts/hid.c -@@ -0,0 +1,348 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2022-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "control.h" -+#include "desc.h" -+#include "hid.h" -+#include "spec-data.h" -+#include "spec-device.h" -+#include "spec-hid.h" -+ -+static int ipts_hid_start(struct hid_device *hid) -+{ -+ return 0; -+} -+ -+static void ipts_hid_stop(struct hid_device *hid) -+{ -+} -+ -+static int ipts_hid_switch_mode(struct ipts_context *ipts, enum ipts_mode mode) -+{ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (ipts->mode == mode) -+ return 0; -+ -+ /* -+ * This is only allowed on older devices. -+ */ -+ if (ipts->info.intf_eds > 1) -+ return 0; -+ -+ ipts->mode = mode; -+ return ipts_control_restart(ipts); -+} -+ -+static int ipts_hid_parse(struct hid_device *hid) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ bool has_native_descriptor = false; -+ -+ u8 *buffer = NULL; -+ size_t size = 0; -+ -+ if (!hid) -+ return -ENODEV; -+ -+ ipts = hid->driver_data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ size = sizeof(ipts_singletouch_descriptor); -+ has_native_descriptor = ipts->descriptor.address && ipts->descriptor.size > 0; -+ -+ if (has_native_descriptor) -+ size += ipts->descriptor.size; -+ else -+ size += sizeof(ipts_fallback_descriptor); -+ -+ buffer = kzalloc(size, GFP_KERNEL); -+ if (!buffer) -+ return -ENOMEM; -+ -+ memcpy(buffer, ipts_singletouch_descriptor, sizeof(ipts_singletouch_descriptor)); -+ -+ if (has_native_descriptor) { -+ memcpy(&buffer[sizeof(ipts_singletouch_descriptor)], ipts->descriptor.address, -+ ipts->descriptor.size); -+ } else { -+ memcpy(&buffer[sizeof(ipts_singletouch_descriptor)], ipts_fallback_descriptor, -+ sizeof(ipts_fallback_descriptor)); -+ } -+ -+ ret = hid_parse_report(hid, buffer, size); -+ kfree(buffer); -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to parse HID descriptor: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int ipts_hid_get_feature(struct ipts_context *ipts, unsigned char reportnum, __u8 *buf, -+ size_t size, enum ipts_feedback_data_type type) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buf) -+ return -EFAULT; -+ -+ mutex_lock(&ipts->feature_lock); -+ -+ memset(buf, 0, size); -+ buf[0] = reportnum; -+ -+ memset(&ipts->feature_report, 0, sizeof(ipts->feature_report)); -+ reinit_completion(&ipts->feature_event); -+ -+ ret = ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, type, buf, size); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to send hid2me feedback: %d\n", ret); -+ goto out; -+ } -+ -+ ret = wait_for_completion_timeout(&ipts->feature_event, msecs_to_jiffies(5000)); -+ if (ret == 0) { -+ dev_warn(ipts->dev, "GET_FEATURES timed out!\n"); -+ ret = -EIO; -+ goto out; -+ } -+ -+ if (!ipts->feature_report.address) { -+ ret = -EFAULT; -+ goto out; -+ } -+ -+ if (ipts->feature_report.size > size) { -+ ret = -ETOOSMALL; -+ goto out; -+ } -+ -+ ret = ipts->feature_report.size; -+ memcpy(buf, ipts->feature_report.address, ipts->feature_report.size); -+ -+out: -+ mutex_unlock(&ipts->feature_lock); -+ return ret; -+} -+ -+static int ipts_hid_set_feature(struct ipts_context *ipts, unsigned char reportnum, __u8 *buf, -+ size_t size, enum ipts_feedback_data_type type) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buf) -+ return -EFAULT; -+ -+ buf[0] = reportnum; -+ -+ ret = ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, type, buf, size); -+ if (ret) -+ dev_err(ipts->dev, "Failed to send hid2me feedback: %d\n", ret); -+ -+ return ret; -+} -+ -+static int ipts_hid_raw_request(struct hid_device *hid, unsigned char reportnum, __u8 *buf, -+ size_t size, unsigned char rtype, int reqtype) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ enum ipts_feedback_data_type type = IPTS_FEEDBACK_DATA_TYPE_VENDOR; -+ -+ if (!hid) -+ return -ENODEV; -+ -+ ipts = hid->driver_data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buf) -+ return -EFAULT; -+ -+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) -+ type = IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) -+ type = IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) -+ type = IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES; -+ else -+ return -EIO; -+ -+ // Implemente mode switching report for older devices without native HID support -+ if (type == IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES && reportnum == IPTS_HID_REPORT_SET_MODE) { -+ ret = ipts_hid_switch_mode(ipts, buf[1]); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to switch modes: %d\n", ret); -+ return ret; -+ } -+ } -+ -+ if (reqtype == HID_REQ_GET_REPORT) -+ return ipts_hid_get_feature(ipts, reportnum, buf, size, type); -+ else -+ return ipts_hid_set_feature(ipts, reportnum, buf, size, type); -+} -+ -+static int ipts_hid_output_report(struct hid_device *hid, __u8 *data, size_t size) -+{ -+ struct ipts_context *ipts = NULL; -+ -+ if (!hid) -+ return -ENODEV; -+ -+ ipts = hid->driver_data; -+ -+ return ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, -+ IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT, data, size); -+} -+ -+static struct hid_ll_driver ipts_hid_driver = { -+ .start = ipts_hid_start, -+ .stop = ipts_hid_stop, -+ .open = ipts_hid_start, -+ .close = ipts_hid_stop, -+ .parse = ipts_hid_parse, -+ .raw_request = ipts_hid_raw_request, -+ .output_report = ipts_hid_output_report, -+}; -+ -+int ipts_hid_input_data(struct ipts_context *ipts, u32 buffer) -+{ -+ int ret = 0; -+ u8 *temp = NULL; -+ struct ipts_hid_header *frame = NULL; -+ struct ipts_data_header *header = NULL; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->hid) -+ return -ENODEV; -+ -+ header = (struct ipts_data_header *)ipts->resources.data[buffer].address; -+ -+ if (!header) -+ return -EFAULT; -+ -+ if (header->size == 0) -+ return 0; -+ -+ if (header->type == IPTS_DATA_TYPE_HID) -+ return hid_input_report(ipts->hid, HID_INPUT_REPORT, header->data, header->size, 1); -+ -+ if (header->type == IPTS_DATA_TYPE_GET_FEATURES) { -+ ipts->feature_report.address = header->data; -+ ipts->feature_report.size = header->size; -+ -+ complete_all(&ipts->feature_event); -+ return 0; -+ } -+ -+ if (header->type != IPTS_DATA_TYPE_FRAME) -+ return 0; -+ -+ if (header->size + 3 + sizeof(struct ipts_hid_header) > IPTS_HID_REPORT_DATA_SIZE) -+ return -ERANGE; -+ -+ temp = kzalloc(IPTS_HID_REPORT_DATA_SIZE, GFP_KERNEL); -+ if (!temp) -+ return -ENOMEM; -+ -+ /* -+ * Synthesize a HID report matching the devices that natively send HID reports -+ */ -+ temp[0] = IPTS_HID_REPORT_DATA; -+ -+ frame = (struct ipts_hid_header *)&temp[3]; -+ frame->type = IPTS_HID_FRAME_TYPE_RAW; -+ frame->size = header->size + sizeof(*frame); -+ -+ memcpy(frame->data, header->data, header->size); -+ -+ ret = hid_input_report(ipts->hid, HID_INPUT_REPORT, temp, IPTS_HID_REPORT_DATA_SIZE, 1); -+ kfree(temp); -+ -+ return ret; -+} -+ -+int ipts_hid_init(struct ipts_context *ipts, struct ipts_device_info info) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (ipts->hid) -+ return 0; -+ -+ ipts->hid = hid_allocate_device(); -+ if (IS_ERR(ipts->hid)) { -+ int err = PTR_ERR(ipts->hid); -+ -+ dev_err(ipts->dev, "Failed to allocate HID device: %d\n", err); -+ return err; -+ } -+ -+ ipts->hid->driver_data = ipts; -+ ipts->hid->dev.parent = ipts->dev; -+ ipts->hid->ll_driver = &ipts_hid_driver; -+ -+ ipts->hid->vendor = info.vendor; -+ ipts->hid->product = info.product; -+ ipts->hid->group = HID_GROUP_MULTITOUCH; -+ -+ snprintf(ipts->hid->name, sizeof(ipts->hid->name), "IPTS %04X:%04X", info.vendor, -+ info.product); -+ -+ ret = hid_add_device(ipts->hid); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to add HID device: %d\n", ret); -+ ipts_hid_free(ipts); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_hid_free(struct ipts_context *ipts) -+{ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->hid) -+ return 0; -+ -+ hid_destroy_device(ipts->hid); -+ ipts->hid = NULL; -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/hid.h b/drivers/hid/ipts/hid.h -new file mode 100644 -index 000000000000..62bf3cd48608 ---- /dev/null -+++ b/drivers/hid/ipts/hid.h -@@ -0,0 +1,22 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2022-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_HID_H -+#define IPTS_HID_H -+ -+#include -+ -+#include "context.h" -+#include "spec-device.h" -+ -+int ipts_hid_input_data(struct ipts_context *ipts, u32 buffer); -+ -+int ipts_hid_init(struct ipts_context *ipts, struct ipts_device_info info); -+int ipts_hid_free(struct ipts_context *ipts); -+ -+#endif /* IPTS_HID_H */ -diff --git a/drivers/hid/ipts/main.c b/drivers/hid/ipts/main.c -new file mode 100644 -index 000000000000..0f20c6c08c38 ---- /dev/null -+++ b/drivers/hid/ipts/main.c -@@ -0,0 +1,127 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "control.h" -+#include "mei.h" -+#include "receiver.h" -+#include "spec-device.h" -+ -+/* -+ * The MEI client ID for IPTS functionality. -+ */ -+#define IPTS_ID UUID_LE(0x3e8d0870, 0x271a, 0x4208, 0x8e, 0xb5, 0x9a, 0xcb, 0x94, 0x02, 0xae, 0x04) -+ -+static int ipts_set_dma_mask(struct mei_cl_device *cldev) -+{ -+ if (!cldev) -+ return -EFAULT; -+ -+ if (!dma_coerce_mask_and_coherent(&cldev->dev, DMA_BIT_MASK(64))) -+ return 0; -+ -+ return dma_coerce_mask_and_coherent(&cldev->dev, DMA_BIT_MASK(32)); -+} -+ -+static int ipts_probe(struct mei_cl_device *cldev, const struct mei_cl_device_id *id) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ if (!cldev) -+ return -EFAULT; -+ -+ ret = ipts_set_dma_mask(cldev); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to set DMA mask for IPTS: %d\n", ret); -+ return ret; -+ } -+ -+ ret = mei_cldev_enable(cldev); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to enable MEI device: %d\n", ret); -+ return ret; -+ } -+ -+ ipts = devm_kzalloc(&cldev->dev, sizeof(*ipts), GFP_KERNEL); -+ if (!ipts) { -+ mei_cldev_disable(cldev); -+ return -ENOMEM; -+ } -+ -+ ret = ipts_mei_init(&ipts->mei, cldev); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to init MEI bus logic: %d\n", ret); -+ return ret; -+ } -+ -+ ipts->dev = &cldev->dev; -+ ipts->mode = IPTS_MODE_EVENT; -+ -+ mutex_init(&ipts->feature_lock); -+ init_completion(&ipts->feature_event); -+ -+ mei_cldev_set_drvdata(cldev, ipts); -+ -+ ret = ipts_control_start(ipts); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to start IPTS: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void ipts_remove(struct mei_cl_device *cldev) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ if (!cldev) { -+ pr_err("MEI device is NULL!"); -+ return; -+ } -+ -+ ipts = mei_cldev_get_drvdata(cldev); -+ -+ ret = ipts_control_stop(ipts); -+ if (ret) -+ dev_err(&cldev->dev, "Failed to stop IPTS: %d\n", ret); -+ -+ mei_cldev_disable(cldev); -+} -+ -+static struct mei_cl_device_id ipts_device_id_table[] = { -+ { .uuid = IPTS_ID, .version = MEI_CL_VERSION_ANY }, -+ {}, -+}; -+MODULE_DEVICE_TABLE(mei, ipts_device_id_table); -+ -+static struct mei_cl_driver ipts_driver = { -+ .id_table = ipts_device_id_table, -+ .name = "ipts", -+ .probe = ipts_probe, -+ .remove = ipts_remove, -+}; -+module_mei_cl_driver(ipts_driver); -+ -+MODULE_DESCRIPTION("IPTS touchscreen driver"); -+MODULE_AUTHOR("Dorian Stoll "); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/hid/ipts/mei.c b/drivers/hid/ipts/mei.c -new file mode 100644 -index 000000000000..26666fd99b0c ---- /dev/null -+++ b/drivers/hid/ipts/mei.c -@@ -0,0 +1,189 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "mei.h" -+ -+static void locked_list_add(struct list_head *new, struct list_head *head, -+ struct rw_semaphore *lock) -+{ -+ down_write(lock); -+ list_add(new, head); -+ up_write(lock); -+} -+ -+static void locked_list_del(struct list_head *entry, struct rw_semaphore *lock) -+{ -+ down_write(lock); -+ list_del(entry); -+ up_write(lock); -+} -+ -+static void ipts_mei_incoming(struct mei_cl_device *cldev) -+{ -+ ssize_t ret = 0; -+ struct ipts_mei_message *entry = NULL; -+ struct ipts_context *ipts = NULL; -+ -+ if (!cldev) { -+ pr_err("MEI device is NULL!"); -+ return; -+ } -+ -+ ipts = mei_cldev_get_drvdata(cldev); -+ if (!ipts) { -+ pr_err("IPTS driver context is NULL!"); -+ return; -+ } -+ -+ entry = devm_kzalloc(ipts->dev, sizeof(*entry), GFP_KERNEL); -+ if (!entry) -+ return; -+ -+ INIT_LIST_HEAD(&entry->list); -+ -+ do { -+ ret = mei_cldev_recv(cldev, (u8 *)&entry->rsp, sizeof(entry->rsp)); -+ } while (ret == -EINTR); -+ -+ if (ret < 0) { -+ dev_err(ipts->dev, "Error while reading response: %ld\n", ret); -+ return; -+ } -+ -+ if (ret == 0) { -+ dev_err(ipts->dev, "Received empty response\n"); -+ return; -+ } -+ -+ locked_list_add(&entry->list, &ipts->mei.messages, &ipts->mei.message_lock); -+ wake_up_all(&ipts->mei.message_queue); -+} -+ -+static int ipts_mei_search(struct ipts_mei *mei, enum ipts_command_code code, -+ struct ipts_response *rsp) -+{ -+ struct ipts_mei_message *entry = NULL; -+ -+ if (!mei) -+ return -EFAULT; -+ -+ if (!rsp) -+ return -EFAULT; -+ -+ down_read(&mei->message_lock); -+ -+ /* -+ * Iterate over the list of received messages, and check if there is one -+ * matching the requested command code. -+ */ -+ list_for_each_entry(entry, &mei->messages, list) { -+ if (entry->rsp.cmd == code) -+ break; -+ } -+ -+ up_read(&mei->message_lock); -+ -+ /* -+ * If entry is not the list head, this means that the loop above has been stopped early, -+ * and that we found a matching element. We drop the message from the list and return it. -+ */ -+ if (!list_entry_is_head(entry, &mei->messages, list)) { -+ locked_list_del(&entry->list, &mei->message_lock); -+ -+ *rsp = entry->rsp; -+ devm_kfree(&mei->cldev->dev, entry); -+ -+ return 0; -+ } -+ -+ return -EAGAIN; -+} -+ -+int ipts_mei_recv(struct ipts_mei *mei, enum ipts_command_code code, struct ipts_response *rsp, -+ u64 timeout) -+{ -+ int ret = 0; -+ -+ if (!mei) -+ return -EFAULT; -+ -+ /* -+ * A timeout of 0 means check and return immideately. -+ */ -+ if (timeout == 0) -+ return ipts_mei_search(mei, code, rsp); -+ -+ /* -+ * A timeout of less than 0 means to wait forever. -+ */ -+ if (timeout < 0) { -+ wait_event(mei->message_queue, ipts_mei_search(mei, code, rsp) == 0); -+ return 0; -+ } -+ -+ ret = wait_event_timeout(mei->message_queue, ipts_mei_search(mei, code, rsp) == 0, -+ msecs_to_jiffies(timeout)); -+ -+ if (ret > 0) -+ return 0; -+ -+ return -EAGAIN; -+} -+ -+int ipts_mei_send(struct ipts_mei *mei, void *data, size_t length) -+{ -+ int ret = 0; -+ -+ if (!mei) -+ return -EFAULT; -+ -+ if (!mei->cldev) -+ return -EFAULT; -+ -+ if (!data) -+ return -EFAULT; -+ -+ do { -+ ret = mei_cldev_send(mei->cldev, (u8 *)data, length); -+ } while (ret == -EINTR); -+ -+ if (ret < 0) -+ return ret; -+ -+ return 0; -+} -+ -+int ipts_mei_init(struct ipts_mei *mei, struct mei_cl_device *cldev) -+{ -+ if (!mei) -+ return -EFAULT; -+ -+ if (!cldev) -+ return -EFAULT; -+ -+ mei->cldev = cldev; -+ -+ INIT_LIST_HEAD(&mei->messages); -+ init_waitqueue_head(&mei->message_queue); -+ init_rwsem(&mei->message_lock); -+ -+ mei_cldev_register_rx_cb(cldev, ipts_mei_incoming); -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/mei.h b/drivers/hid/ipts/mei.h -new file mode 100644 -index 000000000000..eadacae54c40 ---- /dev/null -+++ b/drivers/hid/ipts/mei.h -@@ -0,0 +1,67 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_MEI_H -+#define IPTS_MEI_H -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "spec-device.h" -+ -+struct ipts_mei_message { -+ struct list_head list; -+ struct ipts_response rsp; -+}; -+ -+struct ipts_mei { -+ struct mei_cl_device *cldev; -+ -+ struct list_head messages; -+ -+ wait_queue_head_t message_queue; -+ struct rw_semaphore message_lock; -+}; -+ -+/* -+ * ipts_mei_recv() - Receive data from a MEI device. -+ * @mei: The IPTS MEI device context. -+ * @code: The IPTS command code to look for. -+ * @rsp: The address that the received data will be copied to. -+ * @timeout: How many milliseconds the function will wait at most. -+ * -+ * A negative timeout means to wait forever. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received. -+ */ -+int ipts_mei_recv(struct ipts_mei *mei, enum ipts_command_code code, struct ipts_response *rsp, -+ u64 timeout); -+ -+/* -+ * ipts_mei_send() - Send data to a MEI device. -+ * @ipts: The IPTS MEI device context. -+ * @data: The data to send. -+ * @size: The size of the data. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_mei_send(struct ipts_mei *mei, void *data, size_t length); -+ -+/* -+ * ipts_mei_init() - Initialize the MEI device context. -+ * @mei: The MEI device context to initialize. -+ * @cldev: The MEI device the context will be bound to. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_mei_init(struct ipts_mei *mei, struct mei_cl_device *cldev); -+ -+#endif /* IPTS_MEI_H */ -diff --git a/drivers/hid/ipts/receiver.c b/drivers/hid/ipts/receiver.c -new file mode 100644 -index 000000000000..77234f9e0e17 ---- /dev/null -+++ b/drivers/hid/ipts/receiver.c -@@ -0,0 +1,249 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "cmd.h" -+#include "context.h" -+#include "control.h" -+#include "hid.h" -+#include "resources.h" -+#include "spec-device.h" -+#include "thread.h" -+ -+static void ipts_receiver_next_doorbell(struct ipts_context *ipts) -+{ -+ u32 *doorbell = (u32 *)ipts->resources.doorbell.address; -+ *doorbell = *doorbell + 1; -+} -+ -+static u32 ipts_receiver_current_doorbell(struct ipts_context *ipts) -+{ -+ u32 *doorbell = (u32 *)ipts->resources.doorbell.address; -+ return *doorbell; -+} -+ -+static void ipts_receiver_backoff(time64_t last, u32 n) -+{ -+ /* -+ * If the last change was less than n seconds ago, -+ * sleep for a shorter period so that new data can be -+ * processed quickly. If there was no change for more than -+ * n seconds, sleep longer to avoid wasting CPU cycles. -+ */ -+ if (last + n > ktime_get_seconds()) -+ msleep(20); -+ else -+ msleep(200); -+} -+ -+static int ipts_receiver_event_loop(struct ipts_thread *thread) -+{ -+ int ret = 0; -+ u32 buffer = 0; -+ -+ struct ipts_context *ipts = NULL; -+ time64_t last = ktime_get_seconds(); -+ -+ if (!thread) -+ return -EFAULT; -+ -+ ipts = thread->data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "IPTS running in event mode\n"); -+ -+ while (!ipts_thread_should_stop(thread)) { -+ for (int i = 0; i < IPTS_BUFFERS; i++) { -+ ret = ipts_control_wait_data(ipts, false); -+ if (ret == -EAGAIN) -+ break; -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for data: %d\n", ret); -+ continue; -+ } -+ -+ buffer = ipts_receiver_current_doorbell(ipts) % IPTS_BUFFERS; -+ ipts_receiver_next_doorbell(ipts); -+ -+ ret = ipts_hid_input_data(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to process buffer: %d\n", ret); -+ -+ ret = ipts_control_refill_buffer(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to send feedback: %d\n", ret); -+ -+ ret = ipts_control_request_data(ipts); -+ if (ret) -+ dev_err(ipts->dev, "Failed to request data: %d\n", ret); -+ -+ last = ktime_get_seconds(); -+ } -+ -+ ipts_receiver_backoff(last, 5); -+ } -+ -+ ret = ipts_control_request_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to request flush: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_wait_data(ipts, true); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for data: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ ret = ipts_control_wait_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for flush: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ return 0; -+} -+ -+static int ipts_receiver_doorbell_loop(struct ipts_thread *thread) -+{ -+ int ret = 0; -+ u32 buffer = 0; -+ -+ u32 doorbell = 0; -+ u32 lastdb = 0; -+ -+ struct ipts_context *ipts = NULL; -+ time64_t last = ktime_get_seconds(); -+ -+ if (!thread) -+ return -EFAULT; -+ -+ ipts = thread->data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "IPTS running in doorbell mode\n"); -+ -+ while (true) { -+ if (ipts_thread_should_stop(thread)) { -+ ret = ipts_control_request_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to request flush: %d\n", ret); -+ return ret; -+ } -+ } -+ -+ doorbell = ipts_receiver_current_doorbell(ipts); -+ -+ /* -+ * After filling up one of the data buffers, IPTS will increment -+ * the doorbell. The value of the doorbell stands for the *next* -+ * buffer that IPTS is going to fill. -+ */ -+ while (lastdb != doorbell) { -+ buffer = lastdb % IPTS_BUFFERS; -+ -+ ret = ipts_hid_input_data(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to process buffer: %d\n", ret); -+ -+ ret = ipts_control_refill_buffer(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to send feedback: %d\n", ret); -+ -+ last = ktime_get_seconds(); -+ lastdb++; -+ } -+ -+ if (ipts_thread_should_stop(thread)) -+ break; -+ -+ ipts_receiver_backoff(last, 5); -+ } -+ -+ ret = ipts_control_wait_data(ipts, true); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for data: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ ret = ipts_control_wait_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for flush: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ return 0; -+} -+ -+int ipts_receiver_start(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (ipts->mode == IPTS_MODE_EVENT) { -+ ret = ipts_thread_start(&ipts->receiver_loop, ipts_receiver_event_loop, ipts, -+ "ipts_event"); -+ } else if (ipts->mode == IPTS_MODE_DOORBELL) { -+ ret = ipts_thread_start(&ipts->receiver_loop, ipts_receiver_doorbell_loop, ipts, -+ "ipts_doorbell"); -+ } else { -+ ret = -EINVAL; -+ } -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to start receiver loop: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_receiver_stop(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_thread_stop(&ipts->receiver_loop); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to stop receiver loop: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/receiver.h b/drivers/hid/ipts/receiver.h -new file mode 100644 -index 000000000000..96070f34fbca ---- /dev/null -+++ b/drivers/hid/ipts/receiver.h -@@ -0,0 +1,17 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_RECEIVER_H -+#define IPTS_RECEIVER_H -+ -+#include "context.h" -+ -+int ipts_receiver_start(struct ipts_context *ipts); -+int ipts_receiver_stop(struct ipts_context *ipts); -+ -+#endif /* IPTS_RECEIVER_H */ -diff --git a/drivers/hid/ipts/resources.c b/drivers/hid/ipts/resources.c -new file mode 100644 -index 000000000000..80ba5885bb55 ---- /dev/null -+++ b/drivers/hid/ipts/resources.c -@@ -0,0 +1,108 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+ -+#include "resources.h" -+#include "spec-device.h" -+ -+static int ipts_resources_alloc_buffer(struct ipts_buffer *buffer, struct device *dev, size_t size) -+{ -+ if (!buffer) -+ return -EFAULT; -+ -+ if (buffer->address) -+ return 0; -+ -+ buffer->address = dma_alloc_coherent(dev, size, &buffer->dma_address, GFP_KERNEL); -+ -+ if (!buffer->address) -+ return -ENOMEM; -+ -+ buffer->size = size; -+ buffer->device = dev; -+ -+ return 0; -+} -+ -+static void ipts_resources_free_buffer(struct ipts_buffer *buffer) -+{ -+ if (!buffer->address) -+ return; -+ -+ dma_free_coherent(buffer->device, buffer->size, buffer->address, buffer->dma_address); -+ -+ buffer->address = NULL; -+ buffer->size = 0; -+ -+ buffer->dma_address = 0; -+ buffer->device = NULL; -+} -+ -+int ipts_resources_init(struct ipts_resources *res, struct device *dev, size_t ds, size_t fs) -+{ -+ int ret = 0; -+ -+ if (!res) -+ return -EFAULT; -+ -+ for (int i = 0; i < IPTS_BUFFERS; i++) { -+ ret = ipts_resources_alloc_buffer(&res->data[i], dev, ds); -+ if (ret) -+ goto err; -+ } -+ -+ for (int i = 0; i < IPTS_BUFFERS; i++) { -+ ret = ipts_resources_alloc_buffer(&res->feedback[i], dev, fs); -+ if (ret) -+ goto err; -+ } -+ -+ ret = ipts_resources_alloc_buffer(&res->doorbell, dev, sizeof(u32)); -+ if (ret) -+ goto err; -+ -+ ret = ipts_resources_alloc_buffer(&res->workqueue, dev, sizeof(u32)); -+ if (ret) -+ goto err; -+ -+ ret = ipts_resources_alloc_buffer(&res->hid2me, dev, fs); -+ if (ret) -+ goto err; -+ -+ ret = ipts_resources_alloc_buffer(&res->descriptor, dev, ds + 8); -+ if (ret) -+ goto err; -+ -+ return 0; -+ -+err: -+ -+ ipts_resources_free(res); -+ return ret; -+} -+ -+int ipts_resources_free(struct ipts_resources *res) -+{ -+ if (!res) -+ return -EFAULT; -+ -+ for (int i = 0; i < IPTS_BUFFERS; i++) -+ ipts_resources_free_buffer(&res->data[i]); -+ -+ for (int i = 0; i < IPTS_BUFFERS; i++) -+ ipts_resources_free_buffer(&res->feedback[i]); -+ -+ ipts_resources_free_buffer(&res->doorbell); -+ ipts_resources_free_buffer(&res->workqueue); -+ ipts_resources_free_buffer(&res->hid2me); -+ ipts_resources_free_buffer(&res->descriptor); -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/resources.h b/drivers/hid/ipts/resources.h -new file mode 100644 -index 000000000000..6cbb24a8a054 ---- /dev/null -+++ b/drivers/hid/ipts/resources.h -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_RESOURCES_H -+#define IPTS_RESOURCES_H -+ -+#include -+#include -+ -+#include "spec-device.h" -+ -+struct ipts_buffer { -+ u8 *address; -+ size_t size; -+ -+ dma_addr_t dma_address; -+ struct device *device; -+}; -+ -+struct ipts_resources { -+ struct ipts_buffer data[IPTS_BUFFERS]; -+ struct ipts_buffer feedback[IPTS_BUFFERS]; -+ -+ struct ipts_buffer doorbell; -+ struct ipts_buffer workqueue; -+ struct ipts_buffer hid2me; -+ -+ struct ipts_buffer descriptor; -+}; -+ -+int ipts_resources_init(struct ipts_resources *res, struct device *dev, size_t ds, size_t fs); -+int ipts_resources_free(struct ipts_resources *res); -+ -+#endif /* IPTS_RESOURCES_H */ -diff --git a/drivers/hid/ipts/spec-data.h b/drivers/hid/ipts/spec-data.h -new file mode 100644 -index 000000000000..e8dd98895a7e ---- /dev/null -+++ b/drivers/hid/ipts/spec-data.h -@@ -0,0 +1,100 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_SPEC_DATA_H -+#define IPTS_SPEC_DATA_H -+ -+#include -+#include -+ -+/** -+ * enum ipts_feedback_cmd_type - Commands that can be executed on the sensor through feedback. -+ */ -+enum ipts_feedback_cmd_type { -+ IPTS_FEEDBACK_CMD_TYPE_NONE = 0, -+ IPTS_FEEDBACK_CMD_TYPE_SOFT_RESET = 1, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_ARMED = 2, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_SENSING = 3, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_SLEEP = 4, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_DOZE = 5, -+ IPTS_FEEDBACK_CMD_TYPE_HARD_RESET = 6, -+}; -+ -+/** -+ * enum ipts_feedback_data_type - Defines what data a feedback buffer contains. -+ * @IPTS_FEEDBACK_DATA_TYPE_VENDOR: The buffer contains vendor specific feedback. -+ * @IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES: The buffer contains a HID set features report. -+ * @IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES: The buffer contains a HID get features report. -+ * @IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT: The buffer contains a HID output report. -+ * @IPTS_FEEDBACK_DATA_TYPE_STORE_DATA: The buffer contains calibration data for the sensor. -+ */ -+enum ipts_feedback_data_type { -+ IPTS_FEEDBACK_DATA_TYPE_VENDOR = 0, -+ IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES = 1, -+ IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES = 2, -+ IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT = 3, -+ IPTS_FEEDBACK_DATA_TYPE_STORE_DATA = 4, -+}; -+ -+/** -+ * struct ipts_feedback_header - Header that is prefixed to the data in a feedback buffer. -+ * @cmd_type: A command that should be executed on the sensor. -+ * @size: The size of the payload to be written. -+ * @buffer: The ID of the buffer that contains this feedback data. -+ * @protocol: The protocol version of the EDS. -+ * @data_type: The type of data that the buffer contains. -+ * @spi_offset: The offset at which to write the payload data to the sensor. -+ * @payload: Payload for the feedback command, or 0 if no payload is sent. -+ */ -+struct ipts_feedback_header { -+ enum ipts_feedback_cmd_type cmd_type; -+ u32 size; -+ u32 buffer; -+ u32 protocol; -+ enum ipts_feedback_data_type data_type; -+ u32 spi_offset; -+ u8 reserved[40]; -+ u8 payload[]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_feedback_header) == 64); -+ -+/** -+ * enum ipts_data_type - Defines what type of data a buffer contains. -+ * @IPTS_DATA_TYPE_FRAME: Raw data frame. -+ * @IPTS_DATA_TYPE_ERROR: Error data. -+ * @IPTS_DATA_TYPE_VENDOR: Vendor specific data. -+ * @IPTS_DATA_TYPE_HID: A HID report. -+ * @IPTS_DATA_TYPE_GET_FEATURES: The response to a GET_FEATURES HID2ME command. -+ */ -+enum ipts_data_type { -+ IPTS_DATA_TYPE_FRAME = 0x00, -+ IPTS_DATA_TYPE_ERROR = 0x01, -+ IPTS_DATA_TYPE_VENDOR = 0x02, -+ IPTS_DATA_TYPE_HID = 0x03, -+ IPTS_DATA_TYPE_GET_FEATURES = 0x04, -+ IPTS_DATA_TYPE_DESCRIPTOR = 0x05, -+}; -+ -+/** -+ * struct ipts_data_header - Header that is prefixed to the data in a data buffer. -+ * @type: What data the buffer contains. -+ * @size: How much data the buffer contains. -+ * @buffer: Which buffer the data is in. -+ */ -+struct ipts_data_header { -+ enum ipts_data_type type; -+ u32 size; -+ u32 buffer; -+ u8 reserved[52]; -+ u8 data[]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_data_header) == 64); -+ -+#endif /* IPTS_SPEC_DATA_H */ -diff --git a/drivers/hid/ipts/spec-device.h b/drivers/hid/ipts/spec-device.h -new file mode 100644 -index 000000000000..93f673d981f7 ---- /dev/null -+++ b/drivers/hid/ipts/spec-device.h -@@ -0,0 +1,285 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_SPEC_DEVICE_H -+#define IPTS_SPEC_DEVICE_H -+ -+#include -+#include -+ -+/* -+ * The amount of buffers that IPTS can use for data transfer. -+ */ -+#define IPTS_BUFFERS 16 -+ -+/* -+ * The buffer ID that is used for HID2ME feedback -+ */ -+#define IPTS_HID2ME_BUFFER IPTS_BUFFERS -+ -+/** -+ * enum ipts_command - Commands that can be sent to the IPTS hardware. -+ * @IPTS_CMD_GET_DEVICE_INFO: Retrieves vendor information from the device. -+ * @IPTS_CMD_SET_MODE: Changes the mode that the device will operate in. -+ * @IPTS_CMD_SET_MEM_WINDOW: Configures memory buffers for passing data between device and driver. -+ * @IPTS_CMD_QUIESCE_IO: Stops the data flow from the device to the driver. -+ * @IPTS_CMD_READY_FOR_DATA: Informs the device that the driver is ready to receive data. -+ * @IPTS_CMD_FEEDBACK: Informs the device that a buffer was processed and can be refilled. -+ * @IPTS_CMD_CLEAR_MEM_WINDOW: Stops the data flow and clears the buffer addresses on the device. -+ * @IPTS_CMD_RESET_SENSOR: Resets the sensor to its default state. -+ * @IPTS_CMD_GET_DESCRIPTOR: Retrieves the HID descriptor of the device. -+ */ -+enum ipts_command_code { -+ IPTS_CMD_GET_DEVICE_INFO = 0x01, -+ IPTS_CMD_SET_MODE = 0x02, -+ IPTS_CMD_SET_MEM_WINDOW = 0x03, -+ IPTS_CMD_QUIESCE_IO = 0x04, -+ IPTS_CMD_READY_FOR_DATA = 0x05, -+ IPTS_CMD_FEEDBACK = 0x06, -+ IPTS_CMD_CLEAR_MEM_WINDOW = 0x07, -+ IPTS_CMD_RESET_SENSOR = 0x0B, -+ IPTS_CMD_GET_DESCRIPTOR = 0x0F, -+}; -+ -+/** -+ * enum ipts_status - Possible status codes returned by the IPTS device. -+ * @IPTS_STATUS_SUCCESS: Operation completed successfully. -+ * @IPTS_STATUS_INVALID_PARAMS: Command contained an invalid payload. -+ * @IPTS_STATUS_ACCESS_DENIED: ME could not validate a buffer address. -+ * @IPTS_STATUS_CMD_SIZE_ERROR: Command contains an invalid payload. -+ * @IPTS_STATUS_NOT_READY: Buffer addresses have not been set. -+ * @IPTS_STATUS_REQUEST_OUTSTANDING: There is an outstanding command of the same type. -+ * @IPTS_STATUS_NO_SENSOR_FOUND: No sensor could be found. -+ * @IPTS_STATUS_OUT_OF_MEMORY: Not enough free memory for requested operation. -+ * @IPTS_STATUS_INTERNAL_ERROR: An unexpected error occurred. -+ * @IPTS_STATUS_SENSOR_DISABLED: The sensor has been disabled and must be reinitialized. -+ * @IPTS_STATUS_COMPAT_CHECK_FAIL: Compatibility revision check between sensor and ME failed. -+ * The host can ignore this error and attempt to continue. -+ * @IPTS_STATUS_SENSOR_EXPECTED_RESET: The sensor went through a reset initiated by the driver. -+ * @IPTS_STATUS_SENSOR_UNEXPECTED_RESET: The sensor went through an unexpected reset. -+ * @IPTS_STATUS_RESET_FAILED: Requested sensor reset failed to complete. -+ * @IPTS_STATUS_TIMEOUT: The operation timed out. -+ * @IPTS_STATUS_TEST_MODE_FAIL: Test mode pattern did not match expected values. -+ * @IPTS_STATUS_SENSOR_FAIL_FATAL: The sensor reported an error during reset sequence. -+ * Further progress is not possible. -+ * @IPTS_STATUS_SENSOR_FAIL_NONFATAL: The sensor reported an error during reset sequence. -+ * The driver can attempt to continue. -+ * @IPTS_STATUS_INVALID_DEVICE_CAPS: The device reported invalid capabilities. -+ * @IPTS_STATUS_QUIESCE_IO_IN_PROGRESS: Command cannot be completed until Quiesce IO is done. -+ */ -+enum ipts_status { -+ IPTS_STATUS_SUCCESS = 0x00, -+ IPTS_STATUS_INVALID_PARAMS = 0x01, -+ IPTS_STATUS_ACCESS_DENIED = 0x02, -+ IPTS_STATUS_CMD_SIZE_ERROR = 0x03, -+ IPTS_STATUS_NOT_READY = 0x04, -+ IPTS_STATUS_REQUEST_OUTSTANDING = 0x05, -+ IPTS_STATUS_NO_SENSOR_FOUND = 0x06, -+ IPTS_STATUS_OUT_OF_MEMORY = 0x07, -+ IPTS_STATUS_INTERNAL_ERROR = 0x08, -+ IPTS_STATUS_SENSOR_DISABLED = 0x09, -+ IPTS_STATUS_COMPAT_CHECK_FAIL = 0x0A, -+ IPTS_STATUS_SENSOR_EXPECTED_RESET = 0x0B, -+ IPTS_STATUS_SENSOR_UNEXPECTED_RESET = 0x0C, -+ IPTS_STATUS_RESET_FAILED = 0x0D, -+ IPTS_STATUS_TIMEOUT = 0x0E, -+ IPTS_STATUS_TEST_MODE_FAIL = 0x0F, -+ IPTS_STATUS_SENSOR_FAIL_FATAL = 0x10, -+ IPTS_STATUS_SENSOR_FAIL_NONFATAL = 0x11, -+ IPTS_STATUS_INVALID_DEVICE_CAPS = 0x12, -+ IPTS_STATUS_QUIESCE_IO_IN_PROGRESS = 0x13, -+}; -+ -+/** -+ * struct ipts_command - Message that is sent to the device for calling a command. -+ * @cmd: The command that will be called. -+ * @payload: Payload containing parameters for the called command. -+ */ -+struct ipts_command { -+ enum ipts_command_code cmd; -+ u8 payload[320]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_command) == 324); -+ -+/** -+ * enum ipts_mode - Configures what data the device produces and how its sent. -+ * @IPTS_MODE_EVENT: The device will send an event once a buffer was filled. -+ * Older devices will return singletouch data in this mode. -+ * @IPTS_MODE_DOORBELL: The device will notify the driver by incrementing the doorbell value. -+ * Older devices will return multitouch data in this mode. -+ */ -+enum ipts_mode { -+ IPTS_MODE_EVENT = 0x00, -+ IPTS_MODE_DOORBELL = 0x01, -+}; -+ -+/** -+ * struct ipts_set_mode - Payload for the SET_MODE command. -+ * @mode: Changes the mode that IPTS will operate in. -+ */ -+struct ipts_set_mode { -+ enum ipts_mode mode; -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_set_mode) == 16); -+ -+#define IPTS_WORKQUEUE_SIZE 8192 -+#define IPTS_WORKQUEUE_ITEM_SIZE 16 -+ -+/** -+ * struct ipts_mem_window - Payload for the SET_MEM_WINDOW command. -+ * @data_addr_lower: Lower 32 bits of the data buffer addresses. -+ * @data_addr_upper: Upper 32 bits of the data buffer addresses. -+ * @workqueue_addr_lower: Lower 32 bits of the workqueue buffer address. -+ * @workqueue_addr_upper: Upper 32 bits of the workqueue buffer address. -+ * @doorbell_addr_lower: Lower 32 bits of the doorbell buffer address. -+ * @doorbell_addr_upper: Upper 32 bits of the doorbell buffer address. -+ * @feedbackaddr_lower: Lower 32 bits of the feedback buffer addresses. -+ * @feedbackaddr_upper: Upper 32 bits of the feedback buffer addresses. -+ * @hid2me_addr_lower: Lower 32 bits of the hid2me buffer address. -+ * @hid2me_addr_upper: Upper 32 bits of the hid2me buffer address. -+ * @hid2me_size: Size of the hid2me feedback buffer. -+ * @workqueue_item_size: Magic value. Must be 16. -+ * @workqueue_size: Magic value. Must be 8192. -+ * -+ * The workqueue related items in this struct are required for using -+ * GuC submission with binary processing firmware. Since this driver does -+ * not use GuC submission and instead exports raw data to userspace, these -+ * items are not actually used, but they need to be allocated and passed -+ * to the device, otherwise initialization will fail. -+ */ -+struct ipts_mem_window { -+ u32 data_addr_lower[IPTS_BUFFERS]; -+ u32 data_addr_upper[IPTS_BUFFERS]; -+ u32 workqueue_addr_lower; -+ u32 workqueue_addr_upper; -+ u32 doorbell_addr_lower; -+ u32 doorbell_addr_upper; -+ u32 feedback_addr_lower[IPTS_BUFFERS]; -+ u32 feedback_addr_upper[IPTS_BUFFERS]; -+ u32 hid2me_addr_lower; -+ u32 hid2me_addr_upper; -+ u32 hid2me_size; -+ u8 reserved1; -+ u8 workqueue_item_size; -+ u16 workqueue_size; -+ u8 reserved[32]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_mem_window) == 320); -+ -+/** -+ * struct ipts_quiesce_io - Payload for the QUIESCE_IO command. -+ */ -+struct ipts_quiesce_io { -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_quiesce_io) == 12); -+ -+/** -+ * struct ipts_feedback - Payload for the FEEDBACK command. -+ * @buffer: The buffer that the device should refill. -+ */ -+struct ipts_feedback { -+ u32 buffer; -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_feedback) == 16); -+ -+/** -+ * enum ipts_reset_type - Possible ways of resetting the device. -+ * @IPTS_RESET_TYPE_HARD: Perform hardware reset using GPIO pin. -+ * @IPTS_RESET_TYPE_SOFT: Perform software reset using SPI command. -+ */ -+enum ipts_reset_type { -+ IPTS_RESET_TYPE_HARD = 0x00, -+ IPTS_RESET_TYPE_SOFT = 0x01, -+}; -+ -+/** -+ * struct ipts_reset - Payload for the RESET_SENSOR command. -+ * @type: How the device should get reset. -+ */ -+struct ipts_reset_sensor { -+ enum ipts_reset_type type; -+ u8 reserved[4]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_reset_sensor) == 8); -+ -+/** -+ * struct ipts_get_descriptor - Payload for the GET_DESCRIPTOR command. -+ * @addr_lower: The lower 32 bits of the descriptor buffer address. -+ * @addr_upper: The upper 32 bits of the descriptor buffer address. -+ * @magic: A magic value. Must be 8. -+ */ -+struct ipts_get_descriptor { -+ u32 addr_lower; -+ u32 addr_upper; -+ u32 magic; -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_get_descriptor) == 24); -+ -+/* -+ * The type of a response is indicated by a -+ * command code, with the most significant bit flipped to 1. -+ */ -+#define IPTS_RSP_BIT BIT(31) -+ -+/** -+ * struct ipts_response - Data returned from the device in response to a command. -+ * @cmd: The command that this response answers (IPTS_RSP_BIT will be 1). -+ * @status: The return code of the command. -+ * @payload: The data that was produced by the command. -+ */ -+struct ipts_response { -+ enum ipts_command_code cmd; -+ enum ipts_status status; -+ u8 payload[80]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_response) == 88); -+ -+/** -+ * struct ipts_device_info - Vendor information of the IPTS device. -+ * @vendor: Vendor ID of this device. -+ * @product: Product ID of this device. -+ * @hw_version: Hardware revision of this device. -+ * @fw_version: Firmware revision of this device. -+ * @data_size: Requested size for a data buffer. -+ * @feedback_size: Requested size for a feedback buffer. -+ * @mode: Mode that the device currently operates in. -+ * @max_contacts: Maximum amount of concurrent touches the sensor can process. -+ */ -+struct ipts_device_info { -+ u16 vendor; -+ u16 product; -+ u32 hw_version; -+ u32 fw_version; -+ u32 data_size; -+ u32 feedback_size; -+ enum ipts_mode mode; -+ u8 max_contacts; -+ u8 reserved1[3]; -+ u8 sensor_min_eds; -+ u8 sensor_maj_eds; -+ u8 me_min_eds; -+ u8 me_maj_eds; -+ u8 intf_eds; -+ u8 reserved2[11]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_device_info) == 44); -+ -+#endif /* IPTS_SPEC_DEVICE_H */ -diff --git a/drivers/hid/ipts/spec-hid.h b/drivers/hid/ipts/spec-hid.h -new file mode 100644 -index 000000000000..ea70f29ff00c ---- /dev/null -+++ b/drivers/hid/ipts/spec-hid.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_SPEC_HID_H -+#define IPTS_SPEC_HID_H -+ -+#include -+#include -+ -+/* -+ * Made-up type for passing raw IPTS data in a HID report. -+ */ -+#define IPTS_HID_FRAME_TYPE_RAW 0xEE -+ -+/** -+ * struct ipts_hid_frame - Header that is prefixed to raw IPTS data wrapped in a HID report. -+ * @size: Size of the data inside the report, including this header. -+ * @type: What type of data does this report contain. -+ */ -+struct ipts_hid_header { -+ u32 size; -+ u8 reserved1; -+ u8 type; -+ u8 reserved2; -+ u8 data[]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_hid_header) == 7); -+ -+#endif /* IPTS_SPEC_HID_H */ -diff --git a/drivers/hid/ipts/thread.c b/drivers/hid/ipts/thread.c -new file mode 100644 -index 000000000000..8b46f775c107 ---- /dev/null -+++ b/drivers/hid/ipts/thread.c -@@ -0,0 +1,85 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include "thread.h" -+ -+bool ipts_thread_should_stop(struct ipts_thread *thread) -+{ -+ if (!thread) -+ return false; -+ -+ return READ_ONCE(thread->should_stop); -+} -+ -+static int ipts_thread_runner(void *data) -+{ -+ int ret = 0; -+ struct ipts_thread *thread = data; -+ -+ if (!thread) -+ return -EFAULT; -+ -+ if (!thread->threadfn) -+ return -EFAULT; -+ -+ ret = thread->threadfn(thread); -+ complete_all(&thread->done); -+ -+ return ret; -+} -+ -+int ipts_thread_start(struct ipts_thread *thread, int (*threadfn)(struct ipts_thread *thread), -+ void *data, const char *name) -+{ -+ if (!thread) -+ return -EFAULT; -+ -+ if (!threadfn) -+ return -EFAULT; -+ -+ init_completion(&thread->done); -+ -+ thread->data = data; -+ thread->should_stop = false; -+ thread->threadfn = threadfn; -+ -+ thread->thread = kthread_run(ipts_thread_runner, thread, name); -+ return PTR_ERR_OR_ZERO(thread->thread); -+} -+ -+int ipts_thread_stop(struct ipts_thread *thread) -+{ -+ int ret = 0; -+ -+ if (!thread) -+ return -EFAULT; -+ -+ if (!thread->thread) -+ return 0; -+ -+ WRITE_ONCE(thread->should_stop, true); -+ -+ /* -+ * Make sure that the write has gone through before waiting. -+ */ -+ wmb(); -+ -+ wait_for_completion(&thread->done); -+ ret = kthread_stop(thread->thread); -+ -+ thread->thread = NULL; -+ thread->data = NULL; -+ thread->threadfn = NULL; -+ -+ return ret; -+} -diff --git a/drivers/hid/ipts/thread.h b/drivers/hid/ipts/thread.h -new file mode 100644 -index 000000000000..a314843599fc ---- /dev/null -+++ b/drivers/hid/ipts/thread.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_THREAD_H -+#define IPTS_THREAD_H -+ -+#include -+#include -+#include -+ -+/* -+ * This wrapper over kthread is necessary, because calling kthread_stop makes it impossible -+ * to issue MEI commands from that thread while it shuts itself down. By using a custom -+ * boolean variable and a completion object, we can call kthread_stop only when the thread -+ * already finished all of its work and has returned. -+ */ -+struct ipts_thread { -+ struct task_struct *thread; -+ -+ bool should_stop; -+ struct completion done; -+ -+ void *data; -+ int (*threadfn)(struct ipts_thread *thread); -+}; -+ -+/* -+ * ipts_thread_should_stop() - Returns true if the thread is asked to terminate. -+ * @thread: The current thread. -+ * -+ * Returns: true if the thread should stop, false if not. -+ */ -+bool ipts_thread_should_stop(struct ipts_thread *thread); -+ -+/* -+ * ipts_thread_start() - Starts an IPTS thread. -+ * @thread: The thread to initialize and start. -+ * @threadfn: The function to execute. -+ * @data: An argument that will be passed to threadfn. -+ * @name: The name of the new thread. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_thread_start(struct ipts_thread *thread, int (*threadfn)(struct ipts_thread *thread), -+ void *data, const char name[]); -+ -+/* -+ * ipts_thread_stop() - Asks the thread to terminate and waits until it has finished. -+ * @thread: The thread that should stop. -+ * -+ * Returns: The return value of the thread function. -+ */ -+int ipts_thread_stop(struct ipts_thread *thread); -+ -+#endif /* IPTS_THREAD_H */ -diff --git a/drivers/hid/ithc/Kbuild b/drivers/hid/ithc/Kbuild -new file mode 100644 -index 000000000000..aea83f2ac07b ---- /dev/null -+++ b/drivers/hid/ithc/Kbuild -@@ -0,0 +1,6 @@ -+obj-$(CONFIG_HID_ITHC) := ithc.o -+ -+ithc-objs := ithc-main.o ithc-regs.o ithc-dma.o ithc-debug.o -+ -+ccflags-y := -std=gnu11 -Wno-declaration-after-statement -+ -diff --git a/drivers/hid/ithc/Kconfig b/drivers/hid/ithc/Kconfig -new file mode 100644 -index 000000000000..ede713023609 ---- /dev/null -+++ b/drivers/hid/ithc/Kconfig -@@ -0,0 +1,12 @@ -+config HID_ITHC -+ tristate "Intel Touch Host Controller" -+ depends on PCI -+ depends on HID -+ help -+ Say Y here if your system has a touchscreen using Intels -+ Touch Host Controller (ITHC / IPTS) technology. -+ -+ If unsure say N. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called ithc. -diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c -new file mode 100644 -index 000000000000..57bf125c45bd ---- /dev/null -+++ b/drivers/hid/ithc/ithc-debug.c -@@ -0,0 +1,96 @@ -+#include "ithc.h" -+ -+void ithc_log_regs(struct ithc *ithc) { -+ if (!ithc->prev_regs) return; -+ u32 __iomem *cur = (__iomem void*)ithc->regs; -+ u32 *prev = (void*)ithc->prev_regs; -+ for (int i = 1024; i < sizeof *ithc->regs / 4; i++) { -+ u32 x = readl(cur + i); -+ if (x != prev[i]) { -+ pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x); -+ prev[i] = x; -+ } -+ } -+} -+ -+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) { -+ struct ithc *ithc = file_inode(f)->i_private; -+ char cmd[256]; -+ if (!ithc || !ithc->pci) return -ENODEV; -+ if (!len) return -EINVAL; -+ if (len >= sizeof cmd) return -EINVAL; -+ if (copy_from_user(cmd, buf, len)) return -EFAULT; -+ cmd[len] = 0; -+ if (cmd[len-1] == '\n') cmd[len-1] = 0; -+ pci_info(ithc->pci, "debug command: %s\n", cmd); -+ u32 n = 0; -+ const char *s = cmd + 1; -+ u32 a[32]; -+ while (*s && *s != '\n') { -+ if (n >= ARRAY_SIZE(a)) return -EINVAL; -+ if (*s++ != ' ') return -EINVAL; -+ char *e; -+ a[n++] = simple_strtoul(s, &e, 0); -+ if (e == s) return -EINVAL; -+ s = e; -+ } -+ ithc_log_regs(ithc); -+ switch(cmd[0]) { -+ case 'x': // reset -+ ithc_reset(ithc); -+ break; -+ case 'w': // write register: offset mask value -+ if (n != 3 || (a[0] & 3)) return -EINVAL; -+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]); -+ bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]); -+ break; -+ case 'r': // read register: offset -+ if (n != 1 || (a[0] & 3)) return -EINVAL; -+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); -+ break; -+ case 's': // spi command: cmd offset len data... -+ // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -+ // set touch cfg: s 6 12 4 XX -+ if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL; -+ pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]); -+ if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3)) -+ for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); -+ break; -+ case 'd': // dma command: cmd len data... -+ // get report descriptor: d 7 8 0 0 -+ // enable multitouch: d 3 2 0x0105 -+ if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL; -+ pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]); -+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n"); -+ break; -+ default: -+ return -EINVAL; -+ } -+ ithc_log_regs(ithc); -+ return len; -+} -+ -+static const struct file_operations ithc_debugfops_cmd = { -+ .owner = THIS_MODULE, -+ .write = ithc_debugfs_cmd_write, -+}; -+ -+static void ithc_debugfs_devres_release(struct device *dev, void *res) { -+ struct dentry **dbgm = res; -+ if (*dbgm) debugfs_remove_recursive(*dbgm); -+} -+ -+int ithc_debug_init(struct ithc *ithc) { -+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL); -+ if (!dbgm) return -ENOMEM; -+ devres_add(&ithc->pci->dev, dbgm); -+ struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL); -+ if (IS_ERR(dbg)) return PTR_ERR(dbg); -+ *dbgm = dbg; -+ -+ struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd); -+ if (IS_ERR(cmd)) return PTR_ERR(cmd); -+ -+ return 0; -+} -+ -diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c -new file mode 100644 -index 000000000000..7e89b3496918 ---- /dev/null -+++ b/drivers/hid/ithc/ithc-dma.c -@@ -0,0 +1,258 @@ -+#include "ithc.h" -+ -+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) { -+ p->num_pages = num_pages; -+ p->dir = dir; -+ p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE); -+ p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL); -+ if (!p->addr) return -ENOMEM; -+ if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT; -+ return 0; -+} -+ -+struct ithc_sg_table { -+ void *addr; -+ struct sg_table sgt; -+ enum dma_data_direction dir; -+}; -+static void ithc_dma_sgtable_free(struct sg_table *sgt) { -+ struct scatterlist *sg; -+ int i; -+ for_each_sgtable_sg(sgt, sg, i) { -+ struct page *p = sg_page(sg); -+ if (p) __free_page(p); -+ } -+ sg_free_table(sgt); -+} -+static void ithc_dma_data_devres_release(struct device *dev, void *res) { -+ struct ithc_sg_table *sgt = res; -+ if (sgt->addr) vunmap(sgt->addr); -+ dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0); -+ ithc_dma_sgtable_free(&sgt->sgt); -+} -+ -+static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) { -+ // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional). -+ // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now). -+ struct page *pages[16]; -+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL; -+ b->active_idx = -1; -+ struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL); -+ if (!sgt) return -ENOMEM; -+ sgt->dir = prds->dir; -+ if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) { -+ struct scatterlist *sg; -+ int i; -+ bool ok = true; -+ for_each_sgtable_sg(&sgt->sgt, sg, i) { -+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA -+ if (!p) { ok = false; break; } -+ sg_set_page(sg, p, PAGE_SIZE, 0); -+ } -+ if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) { -+ devres_add(&ithc->pci->dev, sgt); -+ b->sgt = &sgt->sgt; -+ b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL); -+ if (!b->addr) return -ENOMEM; -+ return 0; -+ } -+ ithc_dma_sgtable_free(&sgt->sgt); -+ } -+ devres_free(sgt); -+ return -ENOMEM; -+} -+ -+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+ struct ithc_phys_region_desc *prd = prds->addr; -+ prd += idx * prds->num_pages; -+ if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; } -+ b->active_idx = idx; -+ if (prds->dir == DMA_TO_DEVICE) { -+ if (b->data_size > PAGE_SIZE) return -EINVAL; -+ prd->addr = sg_dma_address(b->sgt->sgl) >> 10; -+ prd->size = b->data_size | PRD_FLAG_END; -+ flush_kernel_vmap_range(b->addr, b->data_size); -+ } else if (prds->dir == DMA_FROM_DEVICE) { -+ struct scatterlist *sg; -+ int i; -+ for_each_sgtable_dma_sg(b->sgt, sg, i) { -+ prd->addr = sg_dma_address(sg) >> 10; -+ prd->size = sg_dma_len(sg); -+ prd++; -+ } -+ prd[-1].size |= PRD_FLAG_END; -+ } -+ dma_wmb(); // for the prds -+ dma_sync_sgtable_for_device(&ithc->pci->dev, b->sgt, prds->dir); -+ return 0; -+} -+ -+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+ struct ithc_phys_region_desc *prd = prds->addr; -+ prd += idx * prds->num_pages; -+ if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; } -+ b->active_idx = -1; -+ if (prds->dir == DMA_FROM_DEVICE) { -+ dma_rmb(); // for the prds -+ b->data_size = 0; -+ struct scatterlist *sg; -+ int i; -+ for_each_sgtable_dma_sg(b->sgt, sg, i) { -+ unsigned size = prd->size; -+ b->data_size += size & PRD_SIZE_MASK; -+ if (size & PRD_FLAG_END) break; -+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; } -+ prd++; -+ } -+ invalidate_kernel_vmap_range(b->addr, b->data_size); -+ } -+ dma_sync_sgtable_for_cpu(&ithc->pci->dev, b->sgt, prds->dir); -+ return 0; -+} -+ -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) { -+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ mutex_init(&rx->mutex); -+ u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes); -+ unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages); -+ CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE); -+ for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]); -+ writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2); -+ lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr); -+ writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs); -+ writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds); -+ u8 head = readb(&ithc->regs->dma_rx[channel].head); -+ if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; } -+ for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i); -+ writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail); -+ return 0; -+} -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) { -+ bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); -+} -+ -+int ithc_dma_tx_init(struct ithc *ithc) { -+ struct ithc_dma_tx *tx = &ithc->dma_tx; -+ mutex_init(&tx->mutex); -+ tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes); -+ unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages); -+ CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE); -+ CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf); -+ lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr); -+ writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds); -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); -+ return 0; -+} -+ -+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) { -+ if (buf >= NUM_RX_BUF) { -+ pci_err(ithc->pci, "invalid dma ringbuffer index\n"); -+ return -EINVAL; -+ } -+ ithc_set_active(ithc); -+ u32 len = data->data_size; -+ struct ithc_dma_rx_header *hdr = data->addr; -+ u8 *hiddata = (void *)(hdr + 1); -+ if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) { -+ CHECK(ithc_reset, ithc); -+ } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) { -+ if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ // When the CPU enters a low power state during DMA, we can get truncated messages. -+ // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes. -+ // See also ithc_set_active(). -+ } else { -+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ } -+ } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) { -+ CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8); -+ WRITE_ONCE(ithc->hid_parse_done, true); -+ wake_up(&ithc->wait_hid_parse); -+ } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1); -+ } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) { -+ bool done = false; -+ mutex_lock(&ithc->hid_get_feature_mutex); -+ if (ithc->hid_get_feature_buf) { -+ if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size; -+ memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size); -+ ithc->hid_get_feature_buf = NULL; -+ done = true; -+ } -+ mutex_unlock(&ithc->hid_get_feature_mutex); -+ if (done) wake_up(&ithc->wait_hid_get_feature); -+ else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1); -+ } else { -+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ } -+ return 0; -+} -+ -+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { -+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ unsigned n = rx->num_received; -+ u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head); -+ while (1) { -+ u8 tail = n % NUM_RX_BUF; -+ u8 tail_wrap = tail | ((n / NUM_RX_BUF) & 1 ? 0 : DMA_RX_WRAP_FLAG); -+ writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail); -+ // ringbuffer is full if tail_wrap == head_wrap -+ // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG -+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0; -+ -+ // take the buffer that the device just filled -+ struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF]; -+ CHECK_RET(ithc_dma_data_buffer_get, ithc, &rx->prds, b, tail); -+ rx->num_received = ++n; -+ -+ // process data -+ CHECK(ithc_dma_rx_process_buf, ithc, b, channel, tail); -+ -+ // give the buffer back to the device -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail); -+ } -+} -+int ithc_dma_rx(struct ithc *ithc, u8 channel) { -+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ mutex_lock(&rx->mutex); -+ int ret = ithc_dma_rx_unlocked(ithc, channel); -+ mutex_unlock(&rx->mutex); -+ return ret; -+} -+ -+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+ pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize); -+ struct ithc_dma_tx_header *hdr; -+ u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0; -+ unsigned fullsize = sizeof *hdr + datasize + padding; -+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL; -+ CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); -+ -+ ithc->dma_tx.buf.data_size = fullsize; -+ hdr = ithc->dma_tx.buf.addr; -+ hdr->code = cmdcode; -+ hdr->data_size = datasize; -+ u8 *dest = (void *)(hdr + 1); -+ memcpy(dest, data, datasize); -+ dest += datasize; -+ for (u8 p = 0; p < padding; p++) *dest++ = 0; -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); -+ -+ bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND); -+ CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); -+ writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status); -+ return 0; -+} -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+ mutex_lock(&ithc->dma_tx.mutex); -+ int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data); -+ mutex_unlock(&ithc->dma_tx.mutex); -+ return ret; -+} -+ -diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h -new file mode 100644 -index 000000000000..d9f2c19a13f3 ---- /dev/null -+++ b/drivers/hid/ithc/ithc-dma.h -@@ -0,0 +1,67 @@ -+#define PRD_SIZE_MASK 0xffffff -+#define PRD_FLAG_END 0x1000000 -+#define PRD_FLAG_SUCCESS 0x2000000 -+#define PRD_FLAG_ERROR 0x4000000 -+ -+struct ithc_phys_region_desc { -+ u64 addr; // physical addr/1024 -+ u32 size; // num bytes, PRD_FLAG_END marks last prd for data split over multiple prds -+ u32 unused; -+}; -+ -+#define DMA_RX_CODE_INPUT_REPORT 3 -+#define DMA_RX_CODE_FEATURE_REPORT 4 -+#define DMA_RX_CODE_REPORT_DESCRIPTOR 5 -+#define DMA_RX_CODE_RESET 7 -+ -+struct ithc_dma_rx_header { -+ u32 code; -+ u32 data_size; -+ u32 _unknown[14]; -+}; -+ -+#define DMA_TX_CODE_SET_FEATURE 3 -+#define DMA_TX_CODE_GET_FEATURE 4 -+#define DMA_TX_CODE_OUTPUT_REPORT 5 -+#define DMA_TX_CODE_GET_REPORT_DESCRIPTOR 7 -+ -+struct ithc_dma_tx_header { -+ u32 code; -+ u32 data_size; -+}; -+ -+struct ithc_dma_prd_buffer { -+ void *addr; -+ dma_addr_t dma_addr; -+ u32 size; -+ u32 num_pages; // per data buffer -+ enum dma_data_direction dir; -+}; -+ -+struct ithc_dma_data_buffer { -+ void *addr; -+ struct sg_table *sgt; -+ int active_idx; -+ u32 data_size; -+}; -+ -+struct ithc_dma_tx { -+ struct mutex mutex; -+ u32 max_size; -+ struct ithc_dma_prd_buffer prds; -+ struct ithc_dma_data_buffer buf; -+}; -+ -+struct ithc_dma_rx { -+ struct mutex mutex; -+ u32 num_received; -+ struct ithc_dma_prd_buffer prds; -+ struct ithc_dma_data_buffer bufs[NUM_RX_BUF]; -+}; -+ -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname); -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel); -+int ithc_dma_tx_init(struct ithc *ithc); -+int ithc_dma_rx(struct ithc *ithc, u8 channel); -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *cmddata); -+ -diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c -new file mode 100644 -index 000000000000..09512b9cb4d3 ---- /dev/null -+++ b/drivers/hid/ithc/ithc-main.c -@@ -0,0 +1,534 @@ -+#include "ithc.h" -+ -+MODULE_DESCRIPTION("Intel Touch Host Controller driver"); -+MODULE_LICENSE("Dual BSD/GPL"); -+ -+// Lakefield -+#define PCI_DEVICE_ID_INTEL_THC_LKF_PORT1 0x98d0 -+#define PCI_DEVICE_ID_INTEL_THC_LKF_PORT2 0x98d1 -+// Tiger Lake -+#define PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT1 0xa0d0 -+#define PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT2 0xa0d1 -+#define PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT1 0x43d0 -+#define PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT2 0x43d1 -+// Alder Lake -+#define PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT1 0x7ad8 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT2 0x7ad9 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT1 0x51d0 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT2 0x51d1 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT1 0x54d0 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT2 0x54d1 -+// Raptor Lake -+#define PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT1 0x7a58 -+#define PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2 0x7a59 -+// Meteor Lake -+#define PCI_DEVICE_ID_INTEL_THC_MTL_PORT1 0x7e48 -+#define PCI_DEVICE_ID_INTEL_THC_MTL_PORT2 0x7e4a -+ -+static const struct pci_device_id ithc_pci_tbl[] = { -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_LKF_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_LKF_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) }, -+ {} -+}; -+MODULE_DEVICE_TABLE(pci, ithc_pci_tbl); -+ -+// Module parameters -+ -+static bool ithc_use_polling = false; -+module_param_named(poll, ithc_use_polling, bool, 0); -+MODULE_PARM_DESC(poll, "Use polling instead of interrupts"); -+ -+static bool ithc_use_rx0 = false; -+module_param_named(rx0, ithc_use_rx0, bool, 0); -+MODULE_PARM_DESC(rx0, "Use DMA RX channel 0"); -+ -+static bool ithc_use_rx1 = true; -+module_param_named(rx1, ithc_use_rx1, bool, 0); -+MODULE_PARM_DESC(rx1, "Use DMA RX channel 1"); -+ -+static bool ithc_log_regs_enabled = false; -+module_param_named(logregs, ithc_log_regs_enabled, bool, 0); -+MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)"); -+ -+// Sysfs attributes -+ -+static bool ithc_is_config_valid(struct ithc *ithc) { -+ return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC; -+} -+ -+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ return sprintf(buf, "0x%04x", ithc->config.vendor_id); -+} -+static DEVICE_ATTR_RO(vendor); -+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ return sprintf(buf, "0x%04x", ithc->config.product_id); -+} -+static DEVICE_ATTR_RO(product); -+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ return sprintf(buf, "%u", ithc->config.revision); -+} -+static DEVICE_ATTR_RO(revision); -+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ u32 v = ithc->config.fw_version; -+ return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff); -+} -+static DEVICE_ATTR_RO(fw_version); -+ -+static const struct attribute_group *ithc_attribute_groups[] = { -+ &(const struct attribute_group){ -+ .name = DEVNAME, -+ .attrs = (struct attribute *[]){ -+ &dev_attr_vendor.attr, -+ &dev_attr_product.attr, -+ &dev_attr_revision.attr, -+ &dev_attr_fw_version.attr, -+ NULL -+ }, -+ }, -+ NULL -+}; -+ -+// HID setup -+ -+static int ithc_hid_start(struct hid_device *hdev) { return 0; } -+static void ithc_hid_stop(struct hid_device *hdev) { } -+static int ithc_hid_open(struct hid_device *hdev) { return 0; } -+static void ithc_hid_close(struct hid_device *hdev) { } -+ -+static int ithc_hid_parse(struct hid_device *hdev) { -+ struct ithc *ithc = hdev->driver_data; -+ u64 val = 0; -+ WRITE_ONCE(ithc->hid_parse_done, false); -+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val); -+ if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT; -+ return 0; -+} -+ -+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { -+ struct ithc *ithc = hdev->driver_data; -+ if (!buf || !len) return -EINVAL; -+ u32 code; -+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE; -+ else { -+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum); -+ return -EINVAL; -+ } -+ buf[0] = reportnum; -+ if (reqtype == HID_REQ_GET_REPORT) { -+ mutex_lock(&ithc->hid_get_feature_mutex); -+ ithc->hid_get_feature_buf = buf; -+ ithc->hid_get_feature_size = len; -+ mutex_unlock(&ithc->hid_get_feature_mutex); -+ int r = CHECK(ithc_dma_tx, ithc, code, 1, buf); -+ if (!r) { -+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -+ if (!r) r = -ETIMEDOUT; -+ else if (r < 0) r = -EINTR; -+ else r = 0; -+ } -+ mutex_lock(&ithc->hid_get_feature_mutex); -+ ithc->hid_get_feature_buf = NULL; -+ if (!r) r = ithc->hid_get_feature_size; -+ mutex_unlock(&ithc->hid_get_feature_mutex); -+ return r; -+ } -+ CHECK_RET(ithc_dma_tx, ithc, code, len, buf); -+ return 0; -+} -+ -+static struct hid_ll_driver ithc_ll_driver = { -+ .start = ithc_hid_start, -+ .stop = ithc_hid_stop, -+ .open = ithc_hid_open, -+ .close = ithc_hid_close, -+ .parse = ithc_hid_parse, -+ .raw_request = ithc_hid_raw_request, -+}; -+ -+static void ithc_hid_devres_release(struct device *dev, void *res) { -+ struct hid_device **hidm = res; -+ if (*hidm) hid_destroy_device(*hidm); -+} -+ -+static int ithc_hid_init(struct ithc *ithc) { -+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL); -+ if (!hidm) return -ENOMEM; -+ devres_add(&ithc->pci->dev, hidm); -+ struct hid_device *hid = hid_allocate_device(); -+ if (IS_ERR(hid)) return PTR_ERR(hid); -+ *hidm = hid; -+ -+ strscpy(hid->name, DEVFULLNAME, sizeof(hid->name)); -+ strscpy(hid->phys, ithc->phys, sizeof(hid->phys)); -+ hid->ll_driver = &ithc_ll_driver; -+ hid->bus = BUS_PCI; -+ hid->vendor = ithc->config.vendor_id; -+ hid->product = ithc->config.product_id; -+ hid->version = 0x100; -+ hid->dev.parent = &ithc->pci->dev; -+ hid->driver_data = ithc; -+ -+ ithc->hid = hid; -+ return 0; -+} -+ -+// Interrupts/polling -+ -+static void ithc_activity_timer_callback(struct timer_list *t) { -+ struct ithc *ithc = container_of(t, struct ithc, activity_timer); -+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+} -+ -+void ithc_set_active(struct ithc *ithc) { -+ // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens. -+ // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor). -+ // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions. -+ cpu_latency_qos_update_request(&ithc->activity_qos, 0); -+ mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000)); -+} -+ -+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) { -+ u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 -+ | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x); -+} -+ -+static void ithc_disable_interrupts(struct ithc *ithc) { -+ writel(0, &ithc->regs->error_control); -+ bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0); -+ bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); -+ bitsb(&ithc->regs->dma_rx[1].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); -+ bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0); -+} -+ -+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) { -+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status); -+} -+ -+static void ithc_clear_interrupts(struct ithc *ithc) { -+ writel(0xffffffff, &ithc->regs->error_flags); -+ writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status); -+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ ithc_clear_dma_rx_interrupts(ithc, 0); -+ ithc_clear_dma_rx_interrupts(ithc, 1); -+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status); -+} -+ -+static void ithc_process(struct ithc *ithc) { -+ ithc_log_regs(ithc); -+ -+ // read and clear error bits -+ u32 err = readl(&ithc->regs->error_flags); -+ if (err) { -+ if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err); -+ writel(err, &ithc->regs->error_flags); -+ } -+ -+ // process DMA rx -+ if (ithc_use_rx0) { -+ ithc_clear_dma_rx_interrupts(ithc, 0); -+ ithc_dma_rx(ithc, 0); -+ } -+ if (ithc_use_rx1) { -+ ithc_clear_dma_rx_interrupts(ithc, 1); -+ ithc_dma_rx(ithc, 1); -+ } -+ -+ ithc_log_regs(ithc); -+} -+ -+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { -+ struct ithc *ithc = arg; -+ pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n", -+ readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags), -+ readb(&ithc->regs->spi_cmd.control), readl(&ithc->regs->spi_cmd.status), -+ readb(&ithc->regs->dma_rx[0].control), readl(&ithc->regs->dma_rx[0].status), -+ readb(&ithc->regs->dma_rx[1].control), readl(&ithc->regs->dma_rx[1].status), -+ readb(&ithc->regs->dma_tx.control), readl(&ithc->regs->dma_tx.status)); -+ ithc_process(ithc); -+ return IRQ_HANDLED; -+} -+ -+static int ithc_poll_thread(void *arg) { -+ struct ithc *ithc = arg; -+ unsigned sleep = 100; -+ while (!kthread_should_stop()) { -+ u32 n = ithc->dma_rx[1].num_received; -+ ithc_process(ithc); -+ if (n != ithc->dma_rx[1].num_received) sleep = 20; -+ else sleep = min(200u, sleep + (sleep >> 4) + 1); -+ msleep_interruptible(sleep); -+ } -+ return 0; -+} -+ -+// Device initialization and shutdown -+ -+static void ithc_disable(struct ithc *ithc) { -+ bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE); -+ CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED); -+ bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -+ bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND, 0); -+ bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); -+ bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_ENABLE, 0); -+ bitsb(&ithc->regs->dma_rx[1].control, DMA_RX_CONTROL_ENABLE, 0); -+ ithc_disable_interrupts(ithc); -+ ithc_clear_interrupts(ithc); -+} -+ -+static int ithc_init_device(struct ithc *ithc) { -+ ithc_log_regs(ithc); -+ bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0; -+ ithc_disable(ithc); -+ CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY); -+ ithc_set_spi_config(ithc, 10, 0); -+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config -+ -+ if (was_enabled) if (msleep_interruptible(100)) return -EINTR; -+ bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0); -+ CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0); -+ for (int retries = 0; ; retries++) { -+ ithc_log_regs(ithc); -+ bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET); -+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break; -+ if (retries > 5) { -+ pci_err(ithc->pci, "too many retries, failed to reset device\n"); -+ return -ETIMEDOUT; -+ } -+ pci_err(ithc->pci, "invalid state, retrying reset\n"); -+ bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -+ if (msleep_interruptible(1000)) return -EINTR; -+ } -+ ithc_log_regs(ithc); -+ -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4); -+ -+ // read config -+ for (int retries = 0; ; retries++) { -+ ithc_log_regs(ithc); -+ memset(&ithc->config, 0, sizeof ithc->config); -+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config); -+ u32 *p = (void *)&ithc->config; -+ pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", -+ p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); -+ if (ithc_is_config_valid(ithc)) break; -+ if (retries > 10) { -+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id); -+ return -EIO; -+ } -+ pci_err(ithc->pci, "failed to read config, retrying\n"); -+ if (msleep_interruptible(100)) return -EINTR; -+ } -+ ithc_log_regs(ithc); -+ -+ CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config)); -+ CHECK_RET(ithc_set_device_enabled, ithc, true); -+ ithc_log_regs(ithc); -+ return 0; -+} -+ -+int ithc_reset(struct ithc *ithc) { -+ // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA -+ // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking? -+ pci_err(ithc->pci, "reset\n"); -+ CHECK(ithc_init_device, ithc); -+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ ithc_log_regs(ithc); -+ pci_dbg(ithc->pci, "reset completed\n"); -+ return 0; -+} -+ -+static void ithc_stop(void *res) { -+ struct ithc *ithc = res; -+ pci_dbg(ithc->pci, "stopping\n"); -+ ithc_log_regs(ithc); -+ if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread); -+ if (ithc->irq >= 0) disable_irq(ithc->irq); -+ CHECK(ithc_set_device_enabled, ithc, false); -+ ithc_disable(ithc); -+ del_timer_sync(&ithc->activity_timer); -+ cpu_latency_qos_remove_request(&ithc->activity_qos); -+ // clear dma config -+ for(unsigned i = 0; i < 2; i++) { -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0); -+ lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr); -+ writeb(0, &ithc->regs->dma_rx[i].num_bufs); -+ writeb(0, &ithc->regs->dma_rx[i].num_prds); -+ } -+ lo_hi_writeq(0, &ithc->regs->dma_tx.addr); -+ writeb(0, &ithc->regs->dma_tx.num_prds); -+ ithc_log_regs(ithc); -+ pci_dbg(ithc->pci, "stopped\n"); -+} -+ -+static void ithc_clear_drvdata(void *res) { -+ struct pci_dev *pci = res; -+ pci_set_drvdata(pci, NULL); -+} -+ -+static int ithc_start(struct pci_dev *pci) { -+ pci_dbg(pci, "starting\n"); -+ if (pci_get_drvdata(pci)) { -+ pci_err(pci, "device already initialized\n"); -+ return -EINVAL; -+ } -+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM; -+ -+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL); -+ if (!ithc) return -ENOMEM; -+ ithc->irq = -1; -+ ithc->pci = pci; -+ snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci)); -+ init_waitqueue_head(&ithc->wait_hid_parse); -+ init_waitqueue_head(&ithc->wait_hid_get_feature); -+ mutex_init(&ithc->hid_get_feature_mutex); -+ pci_set_drvdata(pci, ithc); -+ CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci); -+ if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL); -+ -+ CHECK_RET(pcim_enable_device, pci); -+ pci_set_master(pci); -+ CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs"); -+ CHECK_RET(dma_set_mask_and_coherent, &pci->dev, DMA_BIT_MASK(64)); -+ CHECK_RET(pci_set_power_state, pci, PCI_D0); -+ ithc->regs = pcim_iomap_table(pci)[0]; -+ -+ if (!ithc_use_polling) { -+ CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); -+ ithc->irq = CHECK(pci_irq_vector, pci, 0); -+ if (ithc->irq < 0) return ithc->irq; -+ } -+ -+ CHECK_RET(ithc_init_device, ithc); -+ CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups); -+ if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME); -+ if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME); -+ CHECK_RET(ithc_dma_tx_init, ithc); -+ -+ CHECK_RET(ithc_hid_init, ithc); -+ -+ cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0); -+ -+ // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed -+ CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc); -+ -+ if (ithc_use_polling) { -+ pci_info(pci, "using polling instead of irq\n"); -+ // use a thread instead of simple timer because we want to be able to sleep -+ ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll"); -+ if (IS_ERR(ithc->poll_thread)) { -+ int err = PTR_ERR(ithc->poll_thread); -+ ithc->poll_thread = NULL; -+ return err; -+ } -+ } else { -+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); -+ } -+ -+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ -+ // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA -+ CHECK_RET(hid_add_device, ithc->hid); -+ -+ CHECK(ithc_debug_init, ithc); -+ -+ pci_dbg(pci, "started\n"); -+ return 0; -+} -+ -+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) { -+ pci_dbg(pci, "device probe\n"); -+ return ithc_start(pci); -+} -+ -+static void ithc_remove(struct pci_dev *pci) { -+ pci_dbg(pci, "device remove\n"); -+ // all cleanup is handled by devres -+} -+ -+static int ithc_suspend(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm suspend\n"); -+ devres_release_group(dev, ithc_start); -+ return 0; -+} -+ -+static int ithc_resume(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm resume\n"); -+ return ithc_start(pci); -+} -+ -+static int ithc_freeze(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm freeze\n"); -+ devres_release_group(dev, ithc_start); -+ return 0; -+} -+ -+static int ithc_thaw(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm thaw\n"); -+ return ithc_start(pci); -+} -+ -+static int ithc_restore(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm restore\n"); -+ return ithc_start(pci); -+} -+ -+static struct pci_driver ithc_driver = { -+ .name = DEVNAME, -+ .id_table = ithc_pci_tbl, -+ .probe = ithc_probe, -+ .remove = ithc_remove, -+ .driver.pm = &(const struct dev_pm_ops) { -+ .suspend = ithc_suspend, -+ .resume = ithc_resume, -+ .freeze = ithc_freeze, -+ .thaw = ithc_thaw, -+ .restore = ithc_restore, -+ }, -+ //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway -+}; -+ -+static int __init ithc_init(void) { -+ return pci_register_driver(&ithc_driver); -+} -+ -+static void __exit ithc_exit(void) { -+ pci_unregister_driver(&ithc_driver); -+} -+ -+module_init(ithc_init); -+module_exit(ithc_exit); -+ -diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c -new file mode 100644 -index 000000000000..85d567b05761 ---- /dev/null -+++ b/drivers/hid/ithc/ithc-regs.c -@@ -0,0 +1,64 @@ -+#include "ithc.h" -+ -+#define reg_num(r) (0x1fff & (u16)(__force u64)(r)) -+ -+void bitsl(__iomem u32 *reg, u32 mask, u32 val) { -+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+ writel((readl(reg) & ~mask) | (val & mask), reg); -+} -+ -+void bitsb(__iomem u8 *reg, u8 mask, u8 val) { -+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+ writeb((readb(reg) & ~mask) | (val & mask), reg); -+} -+ -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) { -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+ u32 x; -+ if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+ return -ETIMEDOUT; -+ } -+ pci_dbg(ithc->pci, "done waiting\n"); -+ return 0; -+} -+ -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) { -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+ u8 x; -+ if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+ return -ETIMEDOUT; -+ } -+ pci_dbg(ithc->pci, "done waiting\n"); -+ return 0; -+} -+ -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) { -+ pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode); -+ if (mode == 3) mode = 2; -+ bitsl(&ithc->regs->spi_config, -+ SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff), -+ SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed)); -+ return 0; -+} -+ -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) { -+ pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset); -+ if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL; -+ CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ writeb(command, &ithc->regs->spi_cmd.code); -+ writew(size, &ithc->regs->spi_cmd.size); -+ writel(offset, &ithc->regs->spi_cmd.offset); -+ u32 *p = data, n = (size + 3) / 4; -+ for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]); -+ bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND); -+ CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO; -+ if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE; -+ for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]); -+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ return 0; -+} -+ -diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h -new file mode 100644 -index 000000000000..1a96092ed7ee ---- /dev/null -+++ b/drivers/hid/ithc/ithc-regs.h -@@ -0,0 +1,186 @@ -+#define CONTROL_QUIESCE BIT(1) -+#define CONTROL_IS_QUIESCED BIT(2) -+#define CONTROL_NRESET BIT(3) -+#define CONTROL_READY BIT(29) -+ -+#define SPI_CONFIG_MODE(x) (((x) & 3) << 2) -+#define SPI_CONFIG_SPEED(x) (((x) & 7) << 4) -+#define SPI_CONFIG_UNKNOWN_18(x) (((x) & 3) << 18) -+#define SPI_CONFIG_SPEED2(x) (((x) & 0xf) << 20) // high bit = high speed mode? -+ -+#define ERROR_CONTROL_UNKNOWN_0 BIT(0) -+#define ERROR_CONTROL_DISABLE_DMA BIT(1) // clears DMA_RX_CONTROL_ENABLE when a DMA error occurs -+#define ERROR_CONTROL_UNKNOWN_2 BIT(2) -+#define ERROR_CONTROL_UNKNOWN_3 BIT(3) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_9 BIT(9) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_10 BIT(10) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_12 BIT(12) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_13 BIT(13) -+#define ERROR_CONTROL_UNKNOWN_16(x) (((x) & 0xff) << 16) // spi error code irq? -+#define ERROR_CONTROL_SET_DMA_STATUS BIT(29) // sets DMA_RX_STATUS_ERROR when a DMA error occurs -+ -+#define ERROR_STATUS_DMA BIT(28) -+#define ERROR_STATUS_SPI BIT(30) -+ -+#define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9) -+#define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10) -+#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message -+#define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13) -+#define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16) -+#define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17) -+#define ERROR_FLAG_SPI_INTRA_PACKET_TIMEOUT BIT(18) -+#define ERROR_FLAG_SPI_INVALID_RESPONSE BIT(19) -+#define ERROR_FLAG_SPI_HS_RX_TIMEOUT BIT(20) -+#define ERROR_FLAG_SPI_TOUCH_IC_INIT BIT(21) -+ -+#define SPI_CMD_CONTROL_SEND BIT(0) // cleared by device when sending is complete -+#define SPI_CMD_CONTROL_IRQ BIT(1) -+ -+#define SPI_CMD_CODE_READ 4 -+#define SPI_CMD_CODE_WRITE 6 -+ -+#define SPI_CMD_STATUS_DONE BIT(0) -+#define SPI_CMD_STATUS_ERROR BIT(1) -+#define SPI_CMD_STATUS_BUSY BIT(3) -+ -+#define DMA_TX_CONTROL_SEND BIT(0) // cleared by device when sending is complete -+#define DMA_TX_CONTROL_IRQ BIT(3) -+ -+#define DMA_TX_STATUS_DONE BIT(0) -+#define DMA_TX_STATUS_ERROR BIT(1) -+#define DMA_TX_STATUS_UNKNOWN_2 BIT(2) -+#define DMA_TX_STATUS_UNKNOWN_3 BIT(3) // busy? -+ -+#define DMA_RX_CONTROL_ENABLE BIT(0) -+#define DMA_RX_CONTROL_IRQ_UNKNOWN_1 BIT(1) // rx1 only? -+#define DMA_RX_CONTROL_IRQ_ERROR BIT(3) // rx1 only? -+#define DMA_RX_CONTROL_IRQ_UNKNOWN_4 BIT(4) // rx0 only? -+#define DMA_RX_CONTROL_IRQ_DATA BIT(5) -+ -+#define DMA_RX_CONTROL2_UNKNOWN_5 BIT(5) // rx0 only? -+#define DMA_RX_CONTROL2_RESET BIT(7) // resets ringbuffer indices -+ -+#define DMA_RX_WRAP_FLAG BIT(7) -+ -+#define DMA_RX_STATUS_ERROR BIT(3) -+#define DMA_RX_STATUS_UNKNOWN_4 BIT(4) // set in rx0 after using CONTROL_NRESET when it becomes possible to read config (can take >100ms) -+#define DMA_RX_STATUS_HAVE_DATA BIT(5) -+#define DMA_RX_STATUS_ENABLED BIT(8) -+ -+#define COUNTER_RESET BIT(31) -+ -+struct ithc_registers { -+ /* 0000 */ u32 _unknown_0000[1024]; -+ /* 1000 */ u32 _unknown_1000; -+ /* 1004 */ u32 _unknown_1004; -+ /* 1008 */ u32 control_bits; -+ /* 100c */ u32 _unknown_100c; -+ /* 1010 */ u32 spi_config; -+ /* 1014 */ u32 _unknown_1014[3]; -+ /* 1020 */ u32 error_control; -+ /* 1024 */ u32 error_status; // write to clear -+ /* 1028 */ u32 error_flags; // write to clear -+ /* 102c */ u32 _unknown_102c[5]; -+ struct { -+ /* 1040 */ u8 control; -+ /* 1041 */ u8 code; -+ /* 1042 */ u16 size; -+ /* 1044 */ u32 status; // write to clear -+ /* 1048 */ u32 offset; -+ /* 104c */ u32 data[16]; -+ /* 108c */ u32 _unknown_108c; -+ } spi_cmd; -+ struct { -+ /* 1090 */ u64 addr; // cannot be written with writeq(), must use lo_hi_writeq() -+ /* 1098 */ u8 control; -+ /* 1099 */ u8 _unknown_1099; -+ /* 109a */ u8 _unknown_109a; -+ /* 109b */ u8 num_prds; -+ /* 109c */ u32 status; // write to clear -+ } dma_tx; -+ /* 10a0 */ u32 _unknown_10a0[7]; -+ /* 10bc */ u32 state; // is 0xe0000402 (dev config val 0) after CONTROL_NRESET, 0xe0000461 after first touch, 0xe0000401 after DMA_RX_CODE_RESET -+ /* 10c0 */ u32 _unknown_10c0[8]; -+ /* 10e0 */ u32 _unknown_10e0_counters[3]; -+ /* 10ec */ u32 _unknown_10ec[5]; -+ struct { -+ /* 1100/1200 */ u64 addr; // cannot be written with writeq(), must use lo_hi_writeq() -+ /* 1108/1208 */ u8 num_bufs; -+ /* 1109/1209 */ u8 num_prds; -+ /* 110a/120a */ u16 _unknown_110a; -+ /* 110c/120c */ u8 control; -+ /* 110d/120d */ u8 head; -+ /* 110e/120e */ u8 tail; -+ /* 110f/120f */ u8 control2; -+ /* 1110/1210 */ u32 status; // write to clear -+ /* 1114/1214 */ u32 _unknown_1114; -+ /* 1118/1218 */ u64 _unknown_1118_guc_addr; -+ /* 1120/1220 */ u32 _unknown_1120_guc; -+ /* 1124/1224 */ u32 _unknown_1124_guc; -+ /* 1128/1228 */ u32 unknown_init_bits; // bit 2 = guc related, bit 3 = rx1 related, bit 4 = guc related -+ /* 112c/122c */ u32 _unknown_112c; -+ /* 1130/1230 */ u64 _unknown_1130_guc_addr; -+ /* 1138/1238 */ u32 _unknown_1138_guc; -+ /* 113c/123c */ u32 _unknown_113c; -+ /* 1140/1240 */ u32 _unknown_1140_guc; -+ /* 1144/1244 */ u32 _unknown_1144[23]; -+ /* 11a0/12a0 */ u32 _unknown_11a0_counters[6]; -+ /* 11b8/12b8 */ u32 _unknown_11b8[18]; -+ } dma_rx[2]; -+}; -+static_assert(sizeof(struct ithc_registers) == 0x1300); -+ -+#define DEVCFG_DMA_RX_SIZE(x) ((((x) & 0x3fff) + 1) << 6) -+#define DEVCFG_DMA_TX_SIZE(x) (((((x) >> 14) & 0x3ff) + 1) << 6) -+ -+#define DEVCFG_TOUCH_MASK 0x3f -+#define DEVCFG_TOUCH_ENABLE BIT(0) -+#define DEVCFG_TOUCH_UNKNOWN_1 BIT(1) -+#define DEVCFG_TOUCH_UNKNOWN_2 BIT(2) -+#define DEVCFG_TOUCH_UNKNOWN_3 BIT(3) -+#define DEVCFG_TOUCH_UNKNOWN_4 BIT(4) -+#define DEVCFG_TOUCH_UNKNOWN_5 BIT(5) -+#define DEVCFG_TOUCH_UNKNOWN_6 BIT(6) -+ -+#define DEVCFG_DEVICE_ID_TIC 0x43495424 // "$TIC" -+ -+#define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode? -+#define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3) -+#define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f) -+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) -+#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7) -+#define DEVCFG_SPI_UNKNOWN_25 BIT(25) -+#define DEVCFG_SPI_UNKNOWN_26 BIT(26) -+#define DEVCFG_SPI_UNKNOWN_27 BIT(27) -+#define DEVCFG_SPI_DELAY (((x) >> 28) & 7) -+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) -+ -+struct ithc_device_config { -+ u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET) -+ u32 _unknown_04; // 04 = 0x00000000 -+ u32 dma_buf_sizes; // 08 = 0x000a00ff -+ u32 touch_cfg; // 0c = 0x0000001c -+ u32 _unknown_10; // 10 = 0x0000001c -+ u32 device_id; // 14 = 0x43495424 = "$TIC" -+ u32 spi_config; // 18 = 0xfda00a2e -+ u16 vendor_id; // 1c = 0x045e = Microsoft Corp. -+ u16 product_id; // 1e = 0x0c1a -+ u32 revision; // 20 = 0x00000001 -+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 -+ u32 _unknown_28; // 28 = 0x00000000 -+ u32 fw_mode; // 2c = 0x00000000 -+ u32 _unknown_30; // 30 = 0x00000000 -+ u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?) -+ u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET) -+ u32 _unknown_3c; // 3c = 0x00000002 -+}; -+ -+void bitsl(__iomem u32 *reg, u32 mask, u32 val); -+void bitsb(__iomem u8 *reg, u8 mask, u8 val); -+#define bitsl_set(reg, x) bitsl(reg, x, x) -+#define bitsb_set(reg, x) bitsb(reg, x, x) -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val); -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val); -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode); -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data); -+ -diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h -new file mode 100644 -index 000000000000..6a9b0d480bc1 ---- /dev/null -+++ b/drivers/hid/ithc/ithc.h -@@ -0,0 +1,60 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DEVNAME "ithc" -+#define DEVFULLNAME "Intel Touch Host Controller" -+ -+#undef pr_fmt -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; }) -+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0) -+ -+#define NUM_RX_BUF 16 -+ -+struct ithc; -+ -+#include "ithc-regs.h" -+#include "ithc-dma.h" -+ -+struct ithc { -+ char phys[32]; -+ struct pci_dev *pci; -+ int irq; -+ struct task_struct *poll_thread; -+ struct pm_qos_request activity_qos; -+ struct timer_list activity_timer; -+ -+ struct hid_device *hid; -+ bool hid_parse_done; -+ wait_queue_head_t wait_hid_parse; -+ wait_queue_head_t wait_hid_get_feature; -+ struct mutex hid_get_feature_mutex; -+ void *hid_get_feature_buf; -+ size_t hid_get_feature_size; -+ -+ struct ithc_registers __iomem *regs; -+ struct ithc_registers *prev_regs; // for debugging -+ struct ithc_device_config config; -+ struct ithc_dma_rx dma_rx[2]; -+ struct ithc_dma_tx dma_tx; -+}; -+ -+int ithc_reset(struct ithc *ithc); -+void ithc_set_active(struct ithc *ithc); -+int ithc_debug_init(struct ithc *ithc); -+void ithc_log_regs(struct ithc *ithc); -+ -diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c -index d6037a328669..a290ebc77aea 100644 ---- a/drivers/i2c/i2c-core-acpi.c -+++ b/drivers/i2c/i2c-core-acpi.c -@@ -628,6 +628,28 @@ static int acpi_gsb_i2c_write_bytes(struct i2c_client *client, - return (ret == 1) ? 0 : -EIO; - } - -+static int acpi_gsb_i2c_write_raw_bytes(struct i2c_client *client, -+ u8 *data, u8 data_len) -+{ -+ struct i2c_msg msgs[1]; -+ int ret = AE_OK; -+ -+ msgs[0].addr = client->addr; -+ msgs[0].flags = client->flags; -+ msgs[0].len = data_len + 1; -+ msgs[0].buf = data; -+ -+ ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); -+ -+ if (ret < 0) { -+ dev_err(&client->adapter->dev, "i2c write failed: %d\n", ret); -+ return ret; -+ } -+ -+ /* 1 transfer must have completed successfully */ -+ return (ret == 1) ? 0 : -EIO; -+} -+ - static acpi_status - i2c_acpi_space_handler(u32 function, acpi_physical_address command, - u32 bits, u64 *value64, -@@ -729,6 +751,19 @@ i2c_acpi_space_handler(u32 function, acpi_physical_address command, - } - break; - -+ case ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES: -+ if (action == ACPI_READ) { -+ dev_warn(&adapter->dev, -+ "protocol 0x%02x not supported for client 0x%02x\n", -+ accessor_type, client->addr); -+ ret = AE_BAD_PARAMETER; -+ goto err; -+ } else { -+ status = acpi_gsb_i2c_write_raw_bytes(client, -+ gsb->data, info->access_length); -+ } -+ break; -+ - default: - dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n", - accessor_type, client->addr); -diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c -index 09489380afda..0f02411a60f1 100644 ---- a/drivers/input/misc/soc_button_array.c -+++ b/drivers/input/misc/soc_button_array.c -@@ -507,8 +507,8 @@ static const struct soc_device_data soc_device_MSHW0028 = { - * Both, the Surface Pro 4 (surfacepro3_button.c) and the above mentioned - * devices use MSHW0040 for power and volume buttons, however the way they - * have to be addressed differs. Make sure that we only load this drivers -- * for the correct devices by checking the OEM Platform Revision provided by -- * the _DSM method. -+ * for the correct devices by checking if the OEM Platform Revision DSM call -+ * exists. - */ - #define MSHW0040_DSM_REVISION 0x01 - #define MSHW0040_DSM_GET_OMPR 0x02 // get OEM Platform Revision -@@ -519,31 +519,14 @@ static const guid_t MSHW0040_DSM_UUID = - static int soc_device_check_MSHW0040(struct device *dev) - { - acpi_handle handle = ACPI_HANDLE(dev); -- union acpi_object *result; -- u64 oem_platform_rev = 0; // valid revisions are nonzero -- -- // get OEM platform revision -- result = acpi_evaluate_dsm_typed(handle, &MSHW0040_DSM_UUID, -- MSHW0040_DSM_REVISION, -- MSHW0040_DSM_GET_OMPR, NULL, -- ACPI_TYPE_INTEGER); -- -- if (result) { -- oem_platform_rev = result->integer.value; -- ACPI_FREE(result); -- } -- -- /* -- * If the revision is zero here, the _DSM evaluation has failed. This -- * indicates that we have a Pro 4 or Book 1 and this driver should not -- * be used. -- */ -- if (oem_platform_rev == 0) -- return -ENODEV; -+ bool exists; - -- dev_dbg(dev, "OEM Platform Revision %llu\n", oem_platform_rev); -+ // check if OEM platform revision DSM call exists -+ exists = acpi_check_dsm(handle, &MSHW0040_DSM_UUID, -+ MSHW0040_DSM_REVISION, -+ BIT(MSHW0040_DSM_GET_OMPR)); - -- return 0; -+ return exists ? 0 : -ENODEV; - } - - /* -diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index 7c2f4bd33582..3ebd2260cdab 100644 ---- a/drivers/iommu/intel/iommu.c -+++ b/drivers/iommu/intel/iommu.c -@@ -37,6 +37,8 @@ - #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) - #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) - #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) -+#define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL && \ -+ ((pdev)->device == 0x9d3e)) - #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) - - #define IOAPIC_RANGE_START (0xfee00000) -@@ -287,12 +289,14 @@ int intel_iommu_enabled = 0; - EXPORT_SYMBOL_GPL(intel_iommu_enabled); - - static int dmar_map_gfx = 1; -+static int dmar_map_ipts = 1; - static int intel_iommu_superpage = 1; - static int iommu_identity_mapping; - static int iommu_skip_te_disable; - - #define IDENTMAP_GFX 2 - #define IDENTMAP_AZALIA 4 -+#define IDENTMAP_IPTS 16 - - const struct iommu_ops intel_iommu_ops; - -@@ -2588,6 +2592,9 @@ static int device_def_domain_type(struct device *dev) - - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return IOMMU_DOMAIN_IDENTITY; -+ -+ if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev)) -+ return IOMMU_DOMAIN_IDENTITY; - } - - return 0; -@@ -2977,6 +2984,9 @@ static int __init init_dmars(void) - if (!dmar_map_gfx) - iommu_identity_mapping |= IDENTMAP_GFX; - -+ if (!dmar_map_ipts) -+ iommu_identity_mapping |= IDENTMAP_IPTS; -+ - check_tylersburg_isoch(); - - ret = si_domain_init(hw_pass_through); -@@ -4819,6 +4829,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev) - dmar_map_gfx = 0; - } - -+static void quirk_iommu_ipts(struct pci_dev *dev) -+{ -+ if (!IS_IPTS(dev)) -+ return; -+ -+ if (risky_device(dev)) -+ return; -+ -+ pci_info(dev, "Passthrough IOMMU for IPTS\n"); -+ dmar_map_ipts = 0; -+} - /* G4x/GM45 integrated gfx dmar support is totally busted. */ - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); -@@ -4854,6 +4875,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); - -+/* disable IPTS dmar support */ -+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts); -+ - static void quirk_iommu_rwbf(struct pci_dev *dev) - { - if (risky_device(dev)) -diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c -index df9e261af0b5..bc2a0aefedf2 100644 ---- a/drivers/iommu/intel/irq_remapping.c -+++ b/drivers/iommu/intel/irq_remapping.c -@@ -390,6 +390,22 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) - data.busmatch_count = 0; - pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); - -+ /* -+ * The Intel Touch Host Controller is at 00:10.6, but for some reason -+ * the MSI interrupts have request id 01:05.0. -+ * Disable id verification to work around this. -+ * FIXME Find proper fix or turn this into a quirk. -+ */ -+ if (dev->vendor == PCI_VENDOR_ID_INTEL && (dev->class >> 8) == PCI_CLASS_INPUT_PEN) { -+ switch(dev->device) { -+ case 0x98d0: case 0x98d1: // LKF -+ case 0xa0d0: case 0xa0d1: // TGL LP -+ case 0x43d0: case 0x43d1: // TGL H -+ set_irte_sid(irte, SVT_NO_VERIFY, SQ_ALL_16, 0); -+ return 0; -+ } -+ } -+ - /* - * DMA alias provides us with a PCI device and alias. The only case - * where the it will return an alias on a different bus than the -diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h -index bdc65d50b945..08723c01d727 100644 ---- a/drivers/misc/mei/hw-me-regs.h -+++ b/drivers/misc/mei/hw-me-regs.h -@@ -92,6 +92,7 @@ - #define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ - - #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ -+#define MEI_DEV_ID_ICP_LP_3 0x34E4 /* Ice Lake Point LP 3 (iTouch) */ - #define MEI_DEV_ID_ICP_N 0x38E0 /* Ice Lake Point N */ - - #define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ -diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c -index 5bf0d50d55a0..c13864512229 100644 ---- a/drivers/misc/mei/pci-me.c -+++ b/drivers/misc/mei/pci-me.c -@@ -97,6 +97,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, -+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP_3, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, -diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c -index 5eb131ab916f..67f074a126d1 100644 ---- a/drivers/net/wireless/ath/ath10k/core.c -+++ b/drivers/net/wireless/ath/ath10k/core.c -@@ -38,6 +38,9 @@ static bool fw_diag_log; - /* frame mode values are mapped as per enum ath10k_hw_txrx_mode */ - unsigned int ath10k_frame_mode = ATH10K_HW_TXRX_NATIVE_WIFI; - -+static char *override_board = ""; -+static char *override_board2 = ""; -+ - unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) | - BIT(ATH10K_FW_CRASH_DUMP_CE_DATA); - -@@ -50,6 +53,9 @@ module_param(fw_diag_log, bool, 0644); - module_param_named(frame_mode, ath10k_frame_mode, uint, 0644); - module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444); - -+module_param(override_board, charp, 0644); -+module_param(override_board2, charp, 0644); -+ - MODULE_PARM_DESC(debug_mask, "Debugging mask"); - MODULE_PARM_DESC(uart_print, "Uart target debugging"); - MODULE_PARM_DESC(skip_otp, "Skip otp failure for calibration in testmode"); -@@ -59,6 +65,9 @@ MODULE_PARM_DESC(frame_mode, - MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file"); - MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging"); - -+MODULE_PARM_DESC(override_board, "Override for board.bin file"); -+MODULE_PARM_DESC(override_board2, "Override for board-2.bin file"); -+ - static const struct ath10k_hw_params ath10k_hw_params_list[] = { - { - .id = QCA988X_HW_2_0_VERSION, -@@ -911,6 +920,42 @@ static int ath10k_init_configure_target(struct ath10k *ar) - return 0; - } - -+static const char *ath10k_override_board_fw_file(struct ath10k *ar, -+ const char *file) -+{ -+ if (strcmp(file, "board.bin") == 0) { -+ if (strcmp(override_board, "") == 0) -+ return file; -+ -+ if (strcmp(override_board, "none") == 0) { -+ dev_info(ar->dev, "firmware override: pretending 'board.bin' does not exist\n"); -+ return NULL; -+ } -+ -+ dev_info(ar->dev, "firmware override: replacing 'board.bin' with '%s'\n", -+ override_board); -+ -+ return override_board; -+ } -+ -+ if (strcmp(file, "board-2.bin") == 0) { -+ if (strcmp(override_board2, "") == 0) -+ return file; -+ -+ if (strcmp(override_board2, "none") == 0) { -+ dev_info(ar->dev, "firmware override: pretending 'board-2.bin' does not exist\n"); -+ return NULL; -+ } -+ -+ dev_info(ar->dev, "firmware override: replacing 'board-2.bin' with '%s'\n", -+ override_board2); -+ -+ return override_board2; -+ } -+ -+ return file; -+} -+ - static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar, - const char *dir, - const char *file) -@@ -925,6 +970,19 @@ static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar, - if (dir == NULL) - dir = "."; - -+ /* HACK: Override board.bin and board-2.bin files if specified. -+ * -+ * Some Surface devices perform better with a different board -+ * configuration. To this end, one would need to replace the board.bin -+ * file with the modified config and remove the board-2.bin file. -+ * Unfortunately, that's not a solution that we can easily package. So -+ * we add module options to perform these overrides here. -+ */ -+ -+ file = ath10k_override_board_fw_file(ar, file); -+ if (!file) -+ return ERR_PTR(-ENOENT); -+ - snprintf(filename, sizeof(filename), "%s/%s", dir, file); - ret = firmware_request_nowarn(&fw, filename, ar->dev); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c -index 9a698a16a8f3..5e1a341f63df 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie.c -+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c -@@ -368,6 +368,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) - { - struct pcie_service_card *card; -+ struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); - int ret; - - pr_debug("info: vendor=0x%4.04X device=0x%4.04X rev=%d\n", -@@ -409,6 +410,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, - return -1; - } - -+ /* disable bridge_d3 for Surface gen4+ devices to fix fw crashing -+ * after suspend -+ */ -+ if (card->quirks & QUIRK_NO_BRIDGE_D3) -+ parent_pdev->bridge_d3 = false; -+ - return 0; - } - -@@ -1762,9 +1769,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb) - static int mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter) - { - struct pcie_service_card *card = adapter->card; -+ struct pci_dev *pdev = card->dev; -+ struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); - const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; - int tx_wrap = card->txbd_wrptr & reg->tx_wrap_mask; - -+ /* Trigger a function level reset of the PCI bridge device, this makes -+ * the firmware of PCIe 88W8897 cards stop reporting a fixed LTR value -+ * that prevents the system from entering package C10 and S0ix powersaving -+ * states. -+ * We need to do it here because it must happen after firmware -+ * initialization and this function is called after that is done. -+ */ -+ if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE) -+ pci_reset_function(parent_pdev); -+ - /* Write the RX ring read pointer in to reg->rx_rdptr */ - if (mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr | - tx_wrap)) { -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -index dd6d21f1dbfd..99b024ecbade 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -@@ -13,7 +13,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Pro 5", -@@ -22,7 +24,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Pro 5 (LTE)", -@@ -31,7 +35,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Pro 6", -@@ -39,7 +45,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Book 1", -@@ -47,7 +55,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Book 2", -@@ -55,7 +65,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Laptop 1", -@@ -63,7 +75,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Laptop 2", -@@ -71,7 +85,9 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - {} - }; -@@ -89,6 +105,11 @@ void mwifiex_initialize_quirks(struct pcie_service_card *card) - dev_info(&pdev->dev, "no quirks enabled\n"); - if (card->quirks & QUIRK_FW_RST_D3COLD) - dev_info(&pdev->dev, "quirk reset_d3cold enabled\n"); -+ if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE) -+ dev_info(&pdev->dev, "quirk do_flr_on_bridge enabled\n"); -+ if (card->quirks & QUIRK_NO_BRIDGE_D3) -+ dev_info(&pdev->dev, -+ "quirk no_brigde_d3 enabled\n"); - } - - static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev) -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -index d6ff964aec5b..c14eb56eb911 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -@@ -4,6 +4,8 @@ - #include "pcie.h" - - #define QUIRK_FW_RST_D3COLD BIT(0) -+#define QUIRK_DO_FLR_ON_BRIDGE BIT(1) -+#define QUIRK_NO_BRIDGE_D3 BIT(2) - - void mwifiex_initialize_quirks(struct pcie_service_card *card); - int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev); -diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c -index 57ddcc59af30..497cbadd2c6c 100644 ---- a/drivers/pci/pci-driver.c -+++ b/drivers/pci/pci-driver.c -@@ -507,6 +507,9 @@ static void pci_device_shutdown(struct device *dev) - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; - -+ if (pci_dev->no_shutdown) -+ return; -+ - pm_runtime_resume(dev); - - if (drv && drv->shutdown) -diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index 25edf55de985..6ab563cc58f6 100644 ---- a/drivers/pci/quirks.c -+++ b/drivers/pci/quirks.c -@@ -6124,3 +6124,39 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); - #endif -+ -+static const struct dmi_system_id no_shutdown_dmi_table[] = { -+ /* -+ * Systems on which some devices should not be touched during shutdown. -+ */ -+ { -+ .ident = "Microsoft Surface Pro 9", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Pro 9"), -+ }, -+ }, -+ { -+ .ident = "Microsoft Surface Laptop 5", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 5"), -+ }, -+ }, -+ {} -+}; -+ -+static void quirk_no_shutdown(struct pci_dev *dev) -+{ -+ if (!dmi_check_system(no_shutdown_dmi_table)) -+ return; -+ -+ dev->no_shutdown = 1; -+ pci_info(dev, "disabling shutdown ops for [%04x:%04x]\n", -+ dev->vendor, dev->device); -+} -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x461e, quirk_no_shutdown); // Thunderbolt 4 USB Controller -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x461f, quirk_no_shutdown); // Thunderbolt 4 PCI Express Root Port -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x462f, quirk_no_shutdown); // Thunderbolt 4 PCI Express Root Port -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x466d, quirk_no_shutdown); // Thunderbolt 4 NHI -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x46a8, quirk_no_shutdown); // GPU -diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig -index b629e82af97c..68656e8f309e 100644 ---- a/drivers/platform/surface/Kconfig -+++ b/drivers/platform/surface/Kconfig -@@ -149,6 +149,13 @@ config SURFACE_AGGREGATOR_TABLET_SWITCH - Select M or Y here, if you want to provide tablet-mode switch input - events on the Surface Pro 8, Surface Pro X, and Surface Laptop Studio. - -+config SURFACE_BOOK1_DGPU_SWITCH -+ tristate "Surface Book 1 dGPU Switch Driver" -+ depends on SYSFS -+ help -+ This driver provides a sysfs switch to set the power-state of the -+ discrete GPU found on the Microsoft Surface Book 1. -+ - config SURFACE_DTX - tristate "Surface DTX (Detachment System) Driver" - depends on SURFACE_AGGREGATOR -diff --git a/drivers/platform/surface/Makefile b/drivers/platform/surface/Makefile -index 53344330939b..7efcd0cdb532 100644 ---- a/drivers/platform/surface/Makefile -+++ b/drivers/platform/surface/Makefile -@@ -12,6 +12,7 @@ obj-$(CONFIG_SURFACE_AGGREGATOR_CDEV) += surface_aggregator_cdev.o - obj-$(CONFIG_SURFACE_AGGREGATOR_HUB) += surface_aggregator_hub.o - obj-$(CONFIG_SURFACE_AGGREGATOR_REGISTRY) += surface_aggregator_registry.o - obj-$(CONFIG_SURFACE_AGGREGATOR_TABLET_SWITCH) += surface_aggregator_tabletsw.o -+obj-$(CONFIG_SURFACE_BOOK1_DGPU_SWITCH) += surfacebook1_dgpu_switch.o - obj-$(CONFIG_SURFACE_DTX) += surface_dtx.o - obj-$(CONFIG_SURFACE_GPE) += surface_gpe.o - obj-$(CONFIG_SURFACE_HOTPLUG) += surface_hotplug.o -diff --git a/drivers/platform/surface/surface3-wmi.c b/drivers/platform/surface/surface3-wmi.c -index ca4602bcc7de..490b9731068a 100644 ---- a/drivers/platform/surface/surface3-wmi.c -+++ b/drivers/platform/surface/surface3-wmi.c -@@ -37,6 +37,13 @@ static const struct dmi_system_id surface3_dmi_table[] = { - DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"), - }, - }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), -+ DMI_MATCH(DMI_SYS_VENDOR, "OEMB"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"), -+ }, -+ }, - #endif - { } - }; -diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c -index c219b840d491..69c4352e8406 100644 ---- a/drivers/platform/surface/surface_gpe.c -+++ b/drivers/platform/surface/surface_gpe.c -@@ -41,6 +41,11 @@ static const struct property_entry lid_device_props_l4F[] = { - {}, - }; - -+static const struct property_entry lid_device_props_l52[] = { -+ PROPERTY_ENTRY_U32("gpe", 0x52), -+ {}, -+}; -+ - static const struct property_entry lid_device_props_l57[] = { - PROPERTY_ENTRY_U32("gpe", 0x57), - {}, -@@ -107,6 +112,18 @@ static const struct dmi_system_id dmi_lid_device_table[] = { - }, - .driver_data = (void *)lid_device_props_l4B, - }, -+ { -+ /* -+ * We match for SKU here due to product name clash with the ARM -+ * version. -+ */ -+ .ident = "Surface Pro 9", -+ .matches = { -+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_9_2038"), -+ }, -+ .driver_data = (void *)lid_device_props_l52, -+ }, - { - .ident = "Surface Book 1", - .matches = { -diff --git a/drivers/platform/surface/surfacebook1_dgpu_switch.c b/drivers/platform/surface/surfacebook1_dgpu_switch.c -new file mode 100644 -index 000000000000..8b816ed8f35c ---- /dev/null -+++ b/drivers/platform/surface/surfacebook1_dgpu_switch.c -@@ -0,0 +1,162 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+ -+#include -+#include -+#include -+#include -+ -+ -+#ifdef pr_fmt -+#undef pr_fmt -+#endif -+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ -+ -+ -+static const guid_t dgpu_sw_guid = GUID_INIT(0x6fd05c69, 0xcde3, 0x49f4, -+ 0x95, 0xed, 0xab, 0x16, 0x65, 0x49, 0x80, 0x35); -+ -+#define DGPUSW_ACPI_PATH_DSM "\\_SB_.PCI0.LPCB.EC0_.VGBI" -+#define DGPUSW_ACPI_PATH_HGON "\\_SB_.PCI0.RP05.HGON" -+#define DGPUSW_ACPI_PATH_HGOF "\\_SB_.PCI0.RP05.HGOF" -+ -+ -+static int sb1_dgpu_sw_dsmcall(void) -+{ -+ union acpi_object *ret; -+ acpi_handle handle; -+ acpi_status status; -+ -+ status = acpi_get_handle(NULL, DGPUSW_ACPI_PATH_DSM, &handle); -+ if (status) -+ return -EINVAL; -+ -+ ret = acpi_evaluate_dsm_typed(handle, &dgpu_sw_guid, 1, 1, NULL, ACPI_TYPE_BUFFER); -+ if (!ret) -+ return -EINVAL; -+ -+ ACPI_FREE(ret); -+ return 0; -+} -+ -+static int sb1_dgpu_sw_hgon(void) -+{ -+ struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; -+ acpi_status status; -+ -+ status = acpi_evaluate_object(NULL, DGPUSW_ACPI_PATH_HGON, NULL, &buf); -+ if (status) { -+ pr_err("failed to run HGON: %d\n", status); -+ return -EINVAL; -+ } -+ -+ if (buf.pointer) -+ ACPI_FREE(buf.pointer); -+ -+ pr_info("turned-on dGPU via HGON\n"); -+ return 0; -+} -+ -+static int sb1_dgpu_sw_hgof(void) -+{ -+ struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; -+ acpi_status status; -+ -+ status = acpi_evaluate_object(NULL, DGPUSW_ACPI_PATH_HGOF, NULL, &buf); -+ if (status) { -+ pr_err("failed to run HGOF: %d\n", status); -+ return -EINVAL; -+ } -+ -+ if (buf.pointer) -+ ACPI_FREE(buf.pointer); -+ -+ pr_info("turned-off dGPU via HGOF\n"); -+ return 0; -+} -+ -+ -+static ssize_t dgpu_dsmcall_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ int status, value; -+ -+ status = kstrtoint(buf, 0, &value); -+ if (status < 0) -+ return status; -+ -+ if (value != 1) -+ return -EINVAL; -+ -+ status = sb1_dgpu_sw_dsmcall(); -+ -+ return status < 0 ? status : len; -+} -+ -+static ssize_t dgpu_power_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ bool power; -+ int status; -+ -+ status = kstrtobool(buf, &power); -+ if (status < 0) -+ return status; -+ -+ if (power) -+ status = sb1_dgpu_sw_hgon(); -+ else -+ status = sb1_dgpu_sw_hgof(); -+ -+ return status < 0 ? status : len; -+} -+ -+static DEVICE_ATTR_WO(dgpu_dsmcall); -+static DEVICE_ATTR_WO(dgpu_power); -+ -+static struct attribute *sb1_dgpu_sw_attrs[] = { -+ &dev_attr_dgpu_dsmcall.attr, -+ &dev_attr_dgpu_power.attr, -+ NULL, -+}; -+ -+static const struct attribute_group sb1_dgpu_sw_attr_group = { -+ .attrs = sb1_dgpu_sw_attrs, -+}; -+ -+ -+static int sb1_dgpu_sw_probe(struct platform_device *pdev) -+{ -+ return sysfs_create_group(&pdev->dev.kobj, &sb1_dgpu_sw_attr_group); -+} -+ -+static int sb1_dgpu_sw_remove(struct platform_device *pdev) -+{ -+ sysfs_remove_group(&pdev->dev.kobj, &sb1_dgpu_sw_attr_group); -+ return 0; -+} -+ -+/* -+ * The dGPU power seems to be actually handled by MSHW0040. However, that is -+ * also the power-/volume-button device with a mainline driver. So let's use -+ * MSHW0041 instead for now, which seems to be the LTCH (latch/DTX) device. -+ */ -+static const struct acpi_device_id sb1_dgpu_sw_match[] = { -+ { "MSHW0041", }, -+ { }, -+}; -+MODULE_DEVICE_TABLE(acpi, sb1_dgpu_sw_match); -+ -+static struct platform_driver sb1_dgpu_sw = { -+ .probe = sb1_dgpu_sw_probe, -+ .remove = sb1_dgpu_sw_remove, -+ .driver = { -+ .name = "surfacebook1_dgpu_switch", -+ .acpi_match_table = sb1_dgpu_sw_match, -+ .probe_type = PROBE_PREFER_ASYNCHRONOUS, -+ }, -+}; -+module_platform_driver(sb1_dgpu_sw); -+ -+MODULE_AUTHOR("Maximilian Luz "); -+MODULE_DESCRIPTION("Discrete GPU Power-Switch for Surface Book 1"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c -index 2755601f979c..4240c98ca226 100644 ---- a/drivers/platform/surface/surfacepro3_button.c -+++ b/drivers/platform/surface/surfacepro3_button.c -@@ -149,7 +149,8 @@ static int surface_button_resume(struct device *dev) - /* - * Surface Pro 4 and Surface Book 2 / Surface Pro 2017 use the same device - * ID (MSHW0040) for the power/volume buttons. Make sure this is the right -- * device by checking for the _DSM method and OEM Platform Revision. -+ * device by checking for the _DSM method and OEM Platform Revision DSM -+ * function. - * - * Returns true if the driver should bind to this device, i.e. the device is - * either MSWH0028 (Pro 3) or MSHW0040 on a Pro 4 or Book 1. -@@ -157,30 +158,11 @@ static int surface_button_resume(struct device *dev) - static bool surface_button_check_MSHW0040(struct acpi_device *dev) - { - acpi_handle handle = dev->handle; -- union acpi_object *result; -- u64 oem_platform_rev = 0; // valid revisions are nonzero -- -- // get OEM platform revision -- result = acpi_evaluate_dsm_typed(handle, &MSHW0040_DSM_UUID, -- MSHW0040_DSM_REVISION, -- MSHW0040_DSM_GET_OMPR, -- NULL, ACPI_TYPE_INTEGER); -- -- /* -- * If evaluating the _DSM fails, the method is not present. This means -- * that we have either MSHW0028 or MSHW0040 on Pro 4 or Book 1, so we -- * should use this driver. We use revision 0 indicating it is -- * unavailable. -- */ -- -- if (result) { -- oem_platform_rev = result->integer.value; -- ACPI_FREE(result); -- } -- -- dev_dbg(&dev->dev, "OEM Platform Revision %llu\n", oem_platform_rev); - -- return oem_platform_rev == 0; -+ // make sure that OEM platform revision DSM call does not exist -+ return !acpi_check_dsm(handle, &MSHW0040_DSM_UUID, -+ MSHW0040_DSM_REVISION, -+ BIT(MSHW0040_DSM_GET_OMPR)); - } - - -diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c -index 934b3d997702..2c6604c6e8e1 100644 ---- a/drivers/usb/core/quirks.c -+++ b/drivers/usb/core/quirks.c -@@ -220,6 +220,9 @@ static const struct usb_device_id usb_quirk_list[] = { - /* Microsoft Surface Dock Ethernet (RTL8153 GigE) */ - { USB_DEVICE(0x045e, 0x07c6), .driver_info = USB_QUIRK_NO_LPM }, - -+ /* Microsoft Surface Go 3 Type-Cover */ -+ { USB_DEVICE(0x045e, 0x09b5), .driver_info = USB_QUIRK_DELAY_INIT }, -+ - /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */ - { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, - -diff --git a/include/linux/pci.h b/include/linux/pci.h -index a5dda515fcd1..69f6fc707ae5 100644 ---- a/include/linux/pci.h -+++ b/include/linux/pci.h -@@ -464,6 +464,7 @@ struct pci_dev { - unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ - unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ - unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ -+ unsigned int no_shutdown:1; /* Do not touch device on shutdown */ - pci_dev_flags_t dev_flags; - atomic_t enable_cnt; /* pci_enable_device has been called */ - -diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c -index 7c7cbb6362ea..81a8ff40e86e 100644 ---- a/sound/soc/codecs/rt5645.c -+++ b/sound/soc/codecs/rt5645.c -@@ -3717,6 +3717,15 @@ static const struct dmi_system_id dmi_platform_data[] = { - }, - .driver_data = (void *)&intel_braswell_platform_data, - }, -+ { -+ .ident = "Microsoft Surface 3", -+ .matches = { -+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), -+ DMI_MATCH(DMI_SYS_VENDOR, "OEMB"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"), -+ }, -+ .driver_data = (void *)&intel_braswell_platform_data, -+ }, - { - /* - * Match for the GPDwin which unfortunately uses somewhat -diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c -index 6beb00858c33..d82d77387a0a 100644 ---- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c -+++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c -@@ -27,6 +27,14 @@ static const struct dmi_system_id cht_table[] = { - DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"), - }, - }, -+ { -+ .callback = cht_surface_quirk_cb, -+ .matches = { -+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), -+ DMI_MATCH(DMI_SYS_VENDOR, "OEMB"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"), -+ }, -+ }, - { } - }; - --- -2.40.0 - -From e21ef74a910e423fa4ecdfa291a5022e8002fbaf Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Sat, 22 Apr 2023 11:46:32 +0200 -Subject: [PATCH 11/12] zram - -Signed-off-by: Peter Jung ---- - drivers/block/zram/zram_drv.c | 381 ++++++++++++---------------------- - drivers/block/zram/zram_drv.h | 1 - - 2 files changed, 136 insertions(+), 246 deletions(-) - -diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c -index aa490da3cef2..f1e51eb2aba1 100644 ---- a/drivers/block/zram/zram_drv.c -+++ b/drivers/block/zram/zram_drv.c -@@ -54,9 +54,8 @@ static size_t huge_class_size; - static const struct block_device_operations zram_devops; - - static void zram_free_page(struct zram *zram, size_t index); --static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, -- u32 index, int offset, struct bio *bio); -- -+static int zram_read_page(struct zram *zram, struct page *page, u32 index, -+ struct bio *parent); - - static int zram_slot_trylock(struct zram *zram, u32 index) - { -@@ -174,36 +173,6 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index) - return prio & ZRAM_COMP_PRIORITY_MASK; - } - --/* -- * Check if request is within bounds and aligned on zram logical blocks. -- */ --static inline bool valid_io_request(struct zram *zram, -- sector_t start, unsigned int size) --{ -- u64 end, bound; -- -- /* unaligned request */ -- if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) -- return false; -- if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) -- return false; -- -- end = start + (size >> SECTOR_SHIFT); -- bound = zram->disksize >> SECTOR_SHIFT; -- /* out of range */ -- if (unlikely(start >= bound || end > bound || start > end)) -- return false; -- -- /* I/O request is valid */ -- return true; --} -- --static void update_position(u32 *index, int *offset, struct bio_vec *bvec) --{ -- *index += (*offset + bvec->bv_len) / PAGE_SIZE; -- *offset = (*offset + bvec->bv_len) % PAGE_SIZE; --} -- - static inline void update_used_max(struct zram *zram, - const unsigned long pages) - { -@@ -606,41 +575,16 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx) - atomic64_dec(&zram->stats.bd_count); - } - --static void zram_page_end_io(struct bio *bio) --{ -- struct page *page = bio_first_page_all(bio); -- -- page_endio(page, op_is_write(bio_op(bio)), -- blk_status_to_errno(bio->bi_status)); -- bio_put(bio); --} -- --/* -- * Returns 1 if the submission is successful. -- */ --static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, -+static void read_from_bdev_async(struct zram *zram, struct page *page, - unsigned long entry, struct bio *parent) - { - struct bio *bio; - -- bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, -- GFP_NOIO); -- if (!bio) -- return -ENOMEM; -- -+ bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); - bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); -- if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { -- bio_put(bio); -- return -EIO; -- } -- -- if (!parent) -- bio->bi_end_io = zram_page_end_io; -- else -- bio_chain(bio, parent); -- -+ __bio_add_page(bio, page, PAGE_SIZE, 0); -+ bio_chain(bio, parent); - submit_bio(bio); -- return 1; - } - - #define PAGE_WB_SIG "page_index=" -@@ -701,10 +645,6 @@ static ssize_t writeback_store(struct device *dev, - } - - for (; nr_pages != 0; index++, nr_pages--) { -- struct bio_vec bvec; -- -- bvec_set_page(&bvec, page, PAGE_SIZE, 0); -- - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && !zram->bd_wb_limit) { - spin_unlock(&zram->wb_limit_lock); -@@ -748,7 +688,7 @@ static ssize_t writeback_store(struct device *dev, - /* Need for hugepage writeback racing */ - zram_set_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); -- if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { -+ if (zram_read_page(zram, page, index, NULL)) { - zram_slot_lock(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); -@@ -759,9 +699,8 @@ static ssize_t writeback_store(struct device *dev, - bio_init(&bio, zram->bdev, &bio_vec, 1, - REQ_OP_WRITE | REQ_SYNC); - bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); -+ bio_add_page(&bio, page, PAGE_SIZE, 0); - -- bio_add_page(&bio, bvec.bv_page, bvec.bv_len, -- bvec.bv_offset); - /* - * XXX: A single page IO would be inefficient for write - * but it would be not bad as starter. -@@ -829,19 +768,20 @@ struct zram_work { - struct work_struct work; - struct zram *zram; - unsigned long entry; -- struct bio *bio; -- struct bio_vec bvec; -+ struct page *page; -+ int error; - }; - --#if PAGE_SIZE != 4096 - static void zram_sync_read(struct work_struct *work) - { - struct zram_work *zw = container_of(work, struct zram_work, work); -- struct zram *zram = zw->zram; -- unsigned long entry = zw->entry; -- struct bio *bio = zw->bio; -+ struct bio_vec bv; -+ struct bio bio; - -- read_from_bdev_async(zram, &zw->bvec, entry, bio); -+ bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); -+ bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); -+ __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); -+ zw->error = submit_bio_wait(&bio); - } - - /* -@@ -849,45 +789,39 @@ static void zram_sync_read(struct work_struct *work) - * chained IO with parent IO in same context, it's a deadlock. To avoid that, - * use a worker thread context. - */ --static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, -- unsigned long entry, struct bio *bio) -+static int read_from_bdev_sync(struct zram *zram, struct page *page, -+ unsigned long entry) - { - struct zram_work work; - -- work.bvec = *bvec; -+ if (WARN_ON_ONCE(PAGE_SIZE != 4096)) -+ return -EIO; -+ -+ work.page = page; - work.zram = zram; - work.entry = entry; -- work.bio = bio; - - INIT_WORK_ONSTACK(&work.work, zram_sync_read); - queue_work(system_unbound_wq, &work.work); - flush_work(&work.work); - destroy_work_on_stack(&work.work); - -- return 1; --} --#else --static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, -- unsigned long entry, struct bio *bio) --{ -- WARN_ON(1); -- return -EIO; -+ return work.error; - } --#endif - --static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, -- unsigned long entry, struct bio *parent, bool sync) -+static int read_from_bdev(struct zram *zram, struct page *page, -+ unsigned long entry, struct bio *parent) - { - atomic64_inc(&zram->stats.bd_reads); -- if (sync) -- return read_from_bdev_sync(zram, bvec, entry, parent); -- else -- return read_from_bdev_async(zram, bvec, entry, parent); -+ if (!parent) -+ return read_from_bdev_sync(zram, page, entry); -+ read_from_bdev_async(zram, page, entry, parent); -+ return 0; - } - #else - static inline void reset_bdev(struct zram *zram) {}; --static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, -- unsigned long entry, struct bio *parent, bool sync) -+static int read_from_bdev(struct zram *zram, struct page *page, -+ unsigned long entry, struct bio *parent) - { - return -EIO; - } -@@ -1190,10 +1124,9 @@ static ssize_t io_stat_show(struct device *dev, - - down_read(&zram->init_lock); - ret = scnprintf(buf, PAGE_SIZE, -- "%8llu %8llu %8llu %8llu\n", -+ "%8llu %8llu 0 %8llu\n", - (u64)atomic64_read(&zram->stats.failed_reads), - (u64)atomic64_read(&zram->stats.failed_writes), -- (u64)atomic64_read(&zram->stats.invalid_io), - (u64)atomic64_read(&zram->stats.notify_free)); - up_read(&zram->init_lock); - -@@ -1371,20 +1304,6 @@ static void zram_free_page(struct zram *zram, size_t index) - ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); - } - --/* -- * Reads a page from the writeback devices. Corresponding ZRAM slot -- * should be unlocked. -- */ --static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, -- u32 index, struct bio *bio, bool partial_io) --{ -- struct bio_vec bvec; -- -- bvec_set_page(&bvec, page, PAGE_SIZE, 0); -- return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, -- partial_io); --} -- - /* - * Reads (decompresses if needed) a page from zspool (zsmalloc). - * Corresponding ZRAM slot should be locked. -@@ -1434,8 +1353,8 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, - return ret; - } - --static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, -- struct bio *bio, bool partial_io) -+static int zram_read_page(struct zram *zram, struct page *page, u32 index, -+ struct bio *parent) - { - int ret; - -@@ -1445,11 +1364,14 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - ret = zram_read_from_zspool(zram, page, index); - zram_slot_unlock(zram, index); - } else { -- /* Slot should be unlocked before the function call */ -+ /* -+ * The slot should be unlocked before reading from the backing -+ * device. -+ */ - zram_slot_unlock(zram, index); - -- ret = zram_bvec_read_from_bdev(zram, page, index, bio, -- partial_io); -+ ret = read_from_bdev(zram, page, zram_get_element(zram, index), -+ parent); - } - - /* Should NEVER happen. Return bio error if it does. */ -@@ -1459,39 +1381,34 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - return ret; - } - --static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, -- u32 index, int offset, struct bio *bio) -+/* -+ * Use a temporary buffer to decompress the page, as the decompressor -+ * always expects a full page for the output. -+ */ -+static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, -+ u32 index, int offset) - { -+ struct page *page = alloc_page(GFP_NOIO); - int ret; -- struct page *page; - -- page = bvec->bv_page; -- if (is_partial_io(bvec)) { -- /* Use a temporary buffer to decompress the page */ -- page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); -- if (!page) -- return -ENOMEM; -- } -- -- ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); -- if (unlikely(ret)) -- goto out; -- -- if (is_partial_io(bvec)) { -- void *src = kmap_atomic(page); -+ if (!page) -+ return -ENOMEM; -+ ret = zram_read_page(zram, page, index, NULL); -+ if (likely(!ret)) -+ memcpy_to_bvec(bvec, page_address(page) + offset); -+ __free_page(page); -+ return ret; -+} - -- memcpy_to_bvec(bvec, src + offset); -- kunmap_atomic(src); -- } --out: -+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, -+ u32 index, int offset, struct bio *bio) -+{ - if (is_partial_io(bvec)) -- __free_page(page); -- -- return ret; -+ return zram_bvec_read_partial(zram, bvec, index, offset); -+ return zram_read_page(zram, bvec->bv_page, index, bio); - } - --static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, -- u32 index, struct bio *bio) -+static int zram_write_page(struct zram *zram, struct page *page, u32 index) - { - int ret = 0; - unsigned long alloced_pages; -@@ -1499,7 +1416,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - unsigned int comp_len = 0; - void *src, *dst, *mem; - struct zcomp_strm *zstrm; -- struct page *page = bvec->bv_page; - unsigned long element = 0; - enum zram_pageflags flags = 0; - -@@ -1617,40 +1533,33 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - return ret; - } - --static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, -- u32 index, int offset, struct bio *bio) -+/* -+ * This is a partial IO. Read the full page before writing the changes. -+ */ -+static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, -+ u32 index, int offset, struct bio *bio) - { -+ struct page *page = alloc_page(GFP_NOIO); - int ret; -- struct page *page = NULL; -- struct bio_vec vec; -- -- vec = *bvec; -- if (is_partial_io(bvec)) { -- void *dst; -- /* -- * This is a partial IO. We need to read the full page -- * before to write the changes. -- */ -- page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); -- if (!page) -- return -ENOMEM; -- -- ret = __zram_bvec_read(zram, page, index, bio, true); -- if (ret) -- goto out; - -- dst = kmap_atomic(page); -- memcpy_from_bvec(dst + offset, bvec); -- kunmap_atomic(dst); -+ if (!page) -+ return -ENOMEM; - -- bvec_set_page(&vec, page, PAGE_SIZE, 0); -+ ret = zram_read_page(zram, page, index, bio); -+ if (!ret) { -+ memcpy_from_bvec(page_address(page) + offset, bvec); -+ ret = zram_write_page(zram, page, index); - } -+ __free_page(page); -+ return ret; -+} - -- ret = __zram_bvec_write(zram, &vec, index, bio); --out: -+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, -+ u32 index, int offset, struct bio *bio) -+{ - if (is_partial_io(bvec)) -- __free_page(page); -- return ret; -+ return zram_bvec_write_partial(zram, bvec, index, offset, bio); -+ return zram_write_page(zram, bvec->bv_page, index); - } - - #ifdef CONFIG_ZRAM_MULTI_COMP -@@ -1761,7 +1670,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, - - /* - * No direct reclaim (slow path) for handle allocation and no -- * re-compression attempt (unlike in __zram_bvec_write()) since -+ * re-compression attempt (unlike in zram_write_bvec()) since - * we already have stored that object in zsmalloc. If we cannot - * alloc memory for recompressed object then we bail out and - * simply keep the old (existing) object in zsmalloc. -@@ -1921,15 +1830,12 @@ static ssize_t recompress_store(struct device *dev, - } - #endif - --/* -- * zram_bio_discard - handler on discard request -- * @index: physical block index in PAGE_SIZE units -- * @offset: byte offset within physical block -- */ --static void zram_bio_discard(struct zram *zram, u32 index, -- int offset, struct bio *bio) -+static void zram_bio_discard(struct zram *zram, struct bio *bio) - { - size_t n = bio->bi_iter.bi_size; -+ u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; -+ u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << -+ SECTOR_SHIFT; - - /* - * zram manages data in physical block size units. Because logical block -@@ -1957,80 +1863,58 @@ static void zram_bio_discard(struct zram *zram, u32 index, - index++; - n -= PAGE_SIZE; - } -+ -+ bio_endio(bio); - } - --/* -- * Returns errno if it has some problem. Otherwise return 0 or 1. -- * Returns 0 if IO request was done synchronously -- * Returns 1 if IO request was successfully submitted. -- */ --static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, -- int offset, enum req_op op, struct bio *bio) -+static void zram_bio_read(struct zram *zram, struct bio *bio) - { -- int ret; -- -- if (!op_is_write(op)) { -- ret = zram_bvec_read(zram, bvec, index, offset, bio); -- flush_dcache_page(bvec->bv_page); -- } else { -- ret = zram_bvec_write(zram, bvec, index, offset, bio); -- } -+ struct bvec_iter iter; -+ struct bio_vec bv; -+ unsigned long start_time; - -- zram_slot_lock(zram, index); -- zram_accessed(zram, index); -- zram_slot_unlock(zram, index); -+ start_time = bio_start_io_acct(bio); -+ bio_for_each_segment(bv, bio, iter) { -+ u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; -+ u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << -+ SECTOR_SHIFT; - -- if (unlikely(ret < 0)) { -- if (!op_is_write(op)) -+ if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { - atomic64_inc(&zram->stats.failed_reads); -- else -- atomic64_inc(&zram->stats.failed_writes); -- } -+ bio->bi_status = BLK_STS_IOERR; -+ break; -+ } -+ flush_dcache_page(bv.bv_page); - -- return ret; -+ zram_slot_lock(zram, index); -+ zram_accessed(zram, index); -+ zram_slot_unlock(zram, index); -+ } -+ bio_end_io_acct(bio, start_time); -+ bio_endio(bio); - } - --static void __zram_make_request(struct zram *zram, struct bio *bio) -+static void zram_bio_write(struct zram *zram, struct bio *bio) - { -- int offset; -- u32 index; -- struct bio_vec bvec; - struct bvec_iter iter; -+ struct bio_vec bv; - unsigned long start_time; - -- index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; -- offset = (bio->bi_iter.bi_sector & -- (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; -- -- switch (bio_op(bio)) { -- case REQ_OP_DISCARD: -- case REQ_OP_WRITE_ZEROES: -- zram_bio_discard(zram, index, offset, bio); -- bio_endio(bio); -- return; -- default: -- break; -- } -- - start_time = bio_start_io_acct(bio); -- bio_for_each_segment(bvec, bio, iter) { -- struct bio_vec bv = bvec; -- unsigned int unwritten = bvec.bv_len; -- -- do { -- bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, -- unwritten); -- if (zram_bvec_rw(zram, &bv, index, offset, -- bio_op(bio), bio) < 0) { -- bio->bi_status = BLK_STS_IOERR; -- break; -- } -+ bio_for_each_segment(bv, bio, iter) { -+ u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; -+ u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << -+ SECTOR_SHIFT; - -- bv.bv_offset += bv.bv_len; -- unwritten -= bv.bv_len; -+ if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { -+ atomic64_inc(&zram->stats.failed_writes); -+ bio->bi_status = BLK_STS_IOERR; -+ break; -+ } - -- update_position(&index, &offset, &bv); -- } while (unwritten); -+ zram_slot_lock(zram, index); -+ zram_accessed(zram, index); -+ zram_slot_unlock(zram, index); - } - bio_end_io_acct(bio, start_time); - bio_endio(bio); -@@ -2043,14 +1927,21 @@ static void zram_submit_bio(struct bio *bio) - { - struct zram *zram = bio->bi_bdev->bd_disk->private_data; - -- if (!valid_io_request(zram, bio->bi_iter.bi_sector, -- bio->bi_iter.bi_size)) { -- atomic64_inc(&zram->stats.invalid_io); -- bio_io_error(bio); -- return; -+ switch (bio_op(bio)) { -+ case REQ_OP_READ: -+ zram_bio_read(zram, bio); -+ break; -+ case REQ_OP_WRITE: -+ zram_bio_write(zram, bio); -+ break; -+ case REQ_OP_DISCARD: -+ case REQ_OP_WRITE_ZEROES: -+ zram_bio_discard(zram, bio); -+ break; -+ default: -+ WARN_ON_ONCE(1); -+ bio_endio(bio); - } -- -- __zram_make_request(zram, bio); - } - - static void zram_slot_free_notify(struct block_device *bdev, -diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h -index c5254626f051..ca7a15bd4845 100644 ---- a/drivers/block/zram/zram_drv.h -+++ b/drivers/block/zram/zram_drv.h -@@ -78,7 +78,6 @@ struct zram_stats { - atomic64_t compr_data_size; /* compressed size of pages stored */ - atomic64_t failed_reads; /* can happen when memory is too low */ - atomic64_t failed_writes; /* can happen when memory is too low */ -- atomic64_t invalid_io; /* non-page-aligned I/O requests */ - atomic64_t notify_free; /* no. of swap slot free notifications */ - atomic64_t same_pages; /* no. of same element filled pages */ - atomic64_t huge_pages; /* no. of huge pages */ --- -2.40.0 - -From 41a5cc0ad4fbe3706a6829e152b63303684f55fd Mon Sep 17 00:00:00 2001 +From 27d4dbfc6971caf5627a8248adef49f8d15340b4 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sat, 22 Apr 2023 11:46:46 +0200 -Subject: [PATCH 12/12] zstd: import 1.5.5 +Subject: [PATCH 8/8] zstd: import 1.5.5 Signed-off-by: Peter Jung --- @@ -38359,4 +24921,4 @@ index f4ed952ed485..7d31518e9d5a 100644 EXPORT_SYMBOL(zstd_reset_dstream); -- -2.40.0 +2.40.1