From 727b5761549f342c6ccfc0685319aa29a9a460f6 Mon Sep 17 00:00:00 2001 From: Ward from fusion-voyager-3 Date: Wed, 24 Jul 2024 05:09:14 +0300 Subject: [PATCH] initial commit --- .github/ build-canary-v3 | 1 + .github/build-nest-v3 | 1 + .github/release-canary-v3 | 1 + .github/release-nest-v3 | 1 + .github/workflows/build-canaryv3.yml | 34 + .github/workflows/build-nestv3.yml | 34 + .github/workflows/build.yml | 24 - .github/workflows/release-canaryv3.yml | 37 + .github/workflows/release-nestv3.yml | 37 + .github/workflows/release.yml | 37 - VERSION | 2 +- main.sh | 7 - mainv3.sh | 12 + output/key.gpg | 30 - ...add-support-for-2024-ROG-Mini-.patch.patch | 151 - ...s-wmi-add-support-for-Vivobook-GPU-M.patch | 100 - ...s-wmi-add-support-variant-of-TUF-RGB.patch | 74 - ...asus-wmi-support-toggling-POST-sound.patch | 139 - ...s-wmi-store-a-min-default-for-ppt-op.patch | 342 - patches/cachyos/0001-bore-cachy.patch | 929 - patches/cachyos/0001-cachyos-base-all.patch | 53760 ---------------- patches/cachyos/0003-nvidia.patch | 761 - ...tom-USB-pollrate-for-specific-device.patch | 258 - ...-REBAR-size-quirk-for-Sapphire-RX-56.patch | 34 - ...drop-redundant-pci_enable_pcie_error.patch | 108 - ....ppfeaturemask-0xffffffff-as-default.patch | 25 - .../0001-acpi-proc-idle-skip-dummy-wait.patch | 125 - patches/nobara/0001-add-acpi_call.patch | 506 - patches/nobara/0001-amd-hdr.patch | 2042 - ...disable-async-flipping-on-specific-d.patch | 48 - .../0001-hid-asus-nero-patches-rogue.patch | 972 - ...nel-parameter-to-disable-async-page-.patch | 54 - patches/nobara/OpenRGB.patch | 703 - patches/nobara/amdgpu-si-cik-default.patch | 70 - patches/nobara/lenovo-legion-laptop.patch | 6143 -- patches/nobara/linux-surface.patch | 9117 --- ...isable-powersave-features-by-default.patch | 42 - .../nobara/set-ps4-bt-poll-rate-1000hz.patch | 27 - patches/nobara/steam-deck.patch | 2497 - patches/nobara/uinput.patch | 133 - patches/series | 15 - release.sh | 2 +- {scripts => scripts-v3}/build.sh | 0 config => scripts-v3/config | 0 scripts-v3/config.sh | 49 + {scripts => scripts-v3}/output.sh | 0 scripts-v3/patch.sh | 8 + {scripts => scripts-v3}/source.sh | 0 scripts/config.sh | 51 - scripts/patch.sh | 5 - 50 files changed, 217 insertions(+), 79331 deletions(-) create mode 100644 .github/ build-canary-v3 create mode 100644 .github/build-nest-v3 create mode 100644 .github/release-canary-v3 create mode 100644 .github/release-nest-v3 create mode 100644 .github/workflows/build-canaryv3.yml create mode 100644 .github/workflows/build-nestv3.yml delete mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/release-canaryv3.yml create mode 100644 .github/workflows/release-nestv3.yml delete mode 100644 .github/workflows/release.yml delete mode 100755 main.sh create mode 100755 mainv3.sh delete mode 100644 output/key.gpg delete mode 100644 patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch delete mode 100644 patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch delete mode 100644 patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch delete mode 100644 patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch delete mode 100644 patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch delete mode 100644 patches/cachyos/0001-bore-cachy.patch delete mode 100644 patches/cachyos/0001-cachyos-base-all.patch delete mode 100644 patches/cachyos/0003-nvidia.patch delete mode 100644 patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch delete mode 100644 patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch delete mode 100644 patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch delete mode 100644 patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch delete mode 100644 patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch delete mode 100644 patches/nobara/0001-add-acpi_call.patch delete mode 100644 patches/nobara/0001-amd-hdr.patch delete mode 100644 patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch delete mode 100644 patches/nobara/0001-hid-asus-nero-patches-rogue.patch delete mode 100644 patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch delete mode 100644 patches/nobara/OpenRGB.patch delete mode 100644 patches/nobara/amdgpu-si-cik-default.patch delete mode 100644 patches/nobara/lenovo-legion-laptop.patch delete mode 100644 patches/nobara/linux-surface.patch delete mode 100644 patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch delete mode 100644 patches/nobara/set-ps4-bt-poll-rate-1000hz.patch delete mode 100644 patches/nobara/steam-deck.patch delete mode 100644 patches/nobara/uinput.patch delete mode 100644 patches/series rename {scripts => scripts-v3}/build.sh (100%) rename config => scripts-v3/config (100%) create mode 100755 scripts-v3/config.sh rename {scripts => scripts-v3}/output.sh (100%) create mode 100755 scripts-v3/patch.sh rename {scripts => scripts-v3}/source.sh (100%) delete mode 100755 scripts/config.sh delete mode 100755 scripts/patch.sh diff --git a/.github/ build-canary-v3 b/.github/ build-canary-v3 new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/.github/ build-canary-v3 @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/.github/build-nest-v3 b/.github/build-nest-v3 new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/.github/build-nest-v3 @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/.github/release-canary-v3 b/.github/release-canary-v3 new file mode 100644 index 0000000..b8626c4 --- /dev/null +++ b/.github/release-canary-v3 @@ -0,0 +1 @@ +4 diff --git a/.github/release-nest-v3 b/.github/release-nest-v3 new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/.github/release-nest-v3 @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/.github/workflows/build-canaryv3.yml b/.github/workflows/build-canaryv3.yml new file mode 100644 index 0000000..72adde7 --- /dev/null +++ b/.github/workflows/build-canaryv3.yml @@ -0,0 +1,34 @@ +name: PikaOS Package Build Only (Canary) (amd64-v3) + +on: + push: + branches: + - main + paths: + - '.github/build-canary-v3' + +jobs: + build: + runs-on: ubuntu-latest + container: + image: ghcr.io/pikaos-linux/pikaos-builder:canaryv3 + volumes: + - /proc:/proc + options: --privileged -it + + steps: + - uses: actions/checkout@v3 + + - name: Install SSH key + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ vars.SSH_KEY }} + name: id_rsa + known_hosts: ${{ vars.KNOWN_HOSTS }} + if_key_exists: replace + + - name: Update APT Cache + run: apt-get update -y + + - name: Build Package + run: ./mainv3.sh diff --git a/.github/workflows/build-nestv3.yml b/.github/workflows/build-nestv3.yml new file mode 100644 index 0000000..3098818 --- /dev/null +++ b/.github/workflows/build-nestv3.yml @@ -0,0 +1,34 @@ +name: PikaOS Package Build Only (amd64-v3) + +on: + push: + branches: + - main + paths: + - '.github/build-nest-v3' + +jobs: + build: + runs-on: ubuntu-latest + container: + image: ghcr.io/pikaos-linux/pikaos-builder:nestv3 + volumes: + - /proc:/proc + options: --privileged -it + + steps: + - uses: actions/checkout@v3 + + - name: Install SSH key + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ vars.SSH_KEY }} + name: id_rsa + known_hosts: ${{ vars.KNOWN_HOSTS }} + if_key_exists: replace + + - name: Update APT Cache + run: apt-get update -y + + - name: Build Package + run: ./mainv3.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index cbe6947..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: PikaOS Kernel Build Only - -on: - workflow_dispatch - -jobs: - build: - runs-on: self-hosted - container: - image: ghcr.io/pikaos-linux/pikaos-builder:canary - volumes: - - /proc:/proc - options: --privileged -it - - steps: - - uses: actions/checkout@v3 - - - name: Build Kernel - run: ./main.sh - - - uses: actions/upload-artifact@v3 - with: - name: PikaOS Kernel - path: output/ diff --git a/.github/workflows/release-canaryv3.yml b/.github/workflows/release-canaryv3.yml new file mode 100644 index 0000000..3e837ff --- /dev/null +++ b/.github/workflows/release-canaryv3.yml @@ -0,0 +1,37 @@ +name: PikaOS Package Build & Release (Canary) (amd64-v3) + +on: + push: + branches: + - main + paths: + - '.github/release-canary-v3' + +jobs: + build: + runs-on: ubuntu-latest + container: + image: ghcr.io/pikaos-linux/pikaos-builder:canaryv3 + volumes: + - /proc:/proc + options: --privileged -it + + steps: + - uses: actions/checkout@v3 + + - name: Install SSH key + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ vars.SSH_KEY }} + name: id_rsa + known_hosts: ${{ vars.KNOWN_HOSTS }} + if_key_exists: replace + + - name: Update APT Cache + run: apt-get update -y + + - name: Build Package + run: ./mainv3.sh + + - name: Release Package + run: ./release.sh diff --git a/.github/workflows/release-nestv3.yml b/.github/workflows/release-nestv3.yml new file mode 100644 index 0000000..bb9261e --- /dev/null +++ b/.github/workflows/release-nestv3.yml @@ -0,0 +1,37 @@ +name: PikaOS Package Build & Release (amd64-v3) + +on: + push: + branches: + - main + paths: + - '.github/release-nest-v3' + +jobs: + build: + runs-on: ubuntu-latest + container: + image: ghcr.io/pikaos-linux/pikaos-builder:nestv3 + volumes: + - /proc:/proc + options: --privileged -it + + steps: + - uses: actions/checkout@v3 + + - name: Install SSH key + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ vars.SSH_KEY }} + name: id_rsa + known_hosts: ${{ vars.KNOWN_HOSTS }} + if_key_exists: replace + + - name: Update APT Cache + run: apt-get update -y + + - name: Build Package + run: ./mainv3.sh + + - name: Release Package + run: ./release.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 9b21726..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: PikaOS Kernel Build And Release - -on: - workflow_dispatch - -jobs: - build: - runs-on: self-hosted - container: - image: ghcr.io/pikaos-linux/pikaos-builder:canary - volumes: - - /proc:/proc - options: --privileged -it - - steps: - - uses: actions/checkout@v3 - - - name: Import GPG key - id: import_gpg - uses: crazy-max/ghaction-import-gpg@v5 - with: - gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} - passphrase: ${{ secrets.PASSPHRASE }} - - - name: Install SSH key - uses: shimataro/ssh-key-action@v2 - with: - key: ${{ secrets.SSH_KEY }} - name: id_rsa - known_hosts: ${{ secrets.KNOWN_HOSTS }} - if_key_exists: replace - - - name: Build Kernel - run: ./main.sh - - - name: Release Kernel - run: ./release.sh diff --git a/VERSION b/VERSION index 5a33ecb..ad3cafd 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.10 +#PUT LINUX UPSTREAM VERSION HERE# diff --git a/main.sh b/main.sh deleted file mode 100755 index baba420..0000000 --- a/main.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -. ./scripts/source.sh -. ../scripts/patch.sh -. ../scripts/config.sh -. ../scripts/build.sh -. ../scripts/output.sh diff --git a/mainv3.sh b/mainv3.sh new file mode 100755 index 0000000..9f1654e --- /dev/null +++ b/mainv3.sh @@ -0,0 +1,12 @@ +#! /bin/bash + +set -e + +# Move the debs to output +mkdir -p ./output + +. ./scripts-v3/source.sh +. ../scripts-v3/patch.sh +. ../scripts-v3/config.sh +. ../scripts-v3/build.sh +. ../scripts-v3/output.sh diff --git a/output/key.gpg b/output/key.gpg deleted file mode 100644 index 9b5a79f..0000000 --- a/output/key.gpg +++ /dev/null @@ -1,30 +0,0 @@ ------BEGIN PGP PUBLIC KEY BLOCK----- - -mQENBGPJoigBCADZ8tDzkO2LlWIzXZLLyRLIaRnaNHG6P9xx0ABSFsqU+X+p9qDS -eQW6SmeCN+PauqAHlzrJ7p3XZi07E+h69PEk5R5n7qhVECW35Y1sB9EfC2nqVRxd -RcWtwQsipEHQmjvWIsD4hR5uhq62p7grSkQxv13SGLqyJkKIpkic2vZEgqubfZd4 -KLPFvaQZar6QWa3urfYnUZzc1TNkEYxghr/dQuCFSfYPM+yHT70MXrlPOgfslGgL -YtoN1YauF04wzAg1RFfrWX2AdHE792fVHrkHRsvQg1Pvw4KjPnM6jX2V8W8n7C++ -yxpiMUU2h9FqBWfHrqNLWtKdn6+lgHUq2Oj3ABEBAAG0IWZlcnJlbyA8aGFyZGVy -dGhhbmZpcmVAZ21haWwuY29tPokBTgQTAQoAOBYhBIvETfAmQkhf8fPMBKt4xg37 -WBYDBQJjyaIoAhsDBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAAAoJEKt4xg37WBYD -4/oH/2LRW4FwLHCsWeJfRx5Z7BwKrGqWIF2VujkvEjlFOGYO7aN5HxeX/QKeN+Wy -901hv4CO7T7aSye0qjaYz0I6ZUmr9CaINdXTH7fok3CXQYBfluaLiyxMPSm+Fe5o -vfiUiSMZ488uaUkFSww/TEP8wi5H02yqGJcx3yB54OTsVb8eUHLPXno0T4tooWvX -EOMUKkpj3tEylJoqL5d2iz2ZrkMdX9tVXOkKY3iJD2El0TPITrTIuRuurqzc4CWU -laV7bmZ1Mq5r21S7ISOhhzvEMwsiWylIFXmXNPvbU7DC43uT3+nKhBca8VESzvmu -r7zC6CcQAR5IVHMjd8weFfrnGXm5AQ0EY8miKAEIALnnC+U4gx0m0yLEVOHBoccb -T7CvhmBYer2shxe5o7zUZ5V4y1iJdzSSJksbQkZH4+JDwi7Hp3/lqI2EsxQ9TR+A -OdRvETfz88aK/e2vJ0j7Bt3Dr0u0mgoo9kSx6rLq1oH9Nha9ReOljmEfDtuINR86 -QGEd8PyvNDcUap+6QQa6/RBEDiH1zYBYtxv4rbuciKsh+e6r6C8TJb43nKr3YBGu -/GE1aDlGaKvFgUOZmaapgoQVdpXcg7ZtTpI8sNKdnLVEChIKk35n52XfQDZPVvAt -bsUIr77B4hi+GsjGli7ihr+JJEiHwOyCMZvV95ZWq2ThrXxRWA8mHqCLhz7oTV8A -EQEAAYkBNgQYAQoAIBYhBIvETfAmQkhf8fPMBKt4xg37WBYDBQJjyaIoAhsMAAoJ -EKt4xg37WBYDdwAIAI3yJwOa6P6wz3ddLt/4FTlCSnlJ8C904RDwtJEO/C/y9qZv -yE0qitUi7mntzYE6G7SES3Zn6b9HhdTS9kQv6VUg75TjD/WGPVju5cB11mte95Z9 -6iW5u65kxpawxiTUhaO+O4RO6fZ29rZyCQDfa7ESudkVE/yktAA5umnAbGpgxGa6 -8egCGiZ0LKUqcHxMAsoUUhlOTk3LR4yS6nKE1Q8Dr6E7NYlrWcoGDSQzKvXLqf8e -9eJLGckePwHDzhgO9LKGW3meTV6ldLehTsxm/ycHqXL7/wYjYy6ZXj/5Px3CGLPg -DH9mVj8ERsz096eQA+53gmcTsNtq/FLWS2MhtCc= -=+26V ------END PGP PUBLIC KEY BLOCK----- \ No newline at end of file diff --git a/patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch b/patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch deleted file mode 100644 index fff1b38..0000000 --- a/patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 55426abb60d99efed912d8309498c0c365e8dcec Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Sun, 10 Mar 2024 15:14:37 +1300 -Subject: [PATCH 1/5] platform/x86: asus-wmi: add support for 2024 ROG Mini-LED - -Support the 2024 mini-led backlight and adjust the related functions -to select the relevant dev-id. Also add `available_mini_led_mode` to the -platform sysfs since the available mini-led levels can be different. - -Signed-off-by: Luke D. Jones ---- - .../ABI/testing/sysfs-platform-asus-wmi | 8 ++++ - drivers/platform/x86/asus-wmi.c | 48 ++++++++++++++++--- - include/linux/platform_data/x86/asus-wmi.h | 1 + - 3 files changed, 51 insertions(+), 6 deletions(-) - -diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi -index 8a7e25bde085..e32b4f0ae15f 100644 ---- a/Documentation/ABI/testing/sysfs-platform-asus-wmi -+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi -@@ -126,6 +126,14 @@ Description: - Change the mini-LED mode: - * 0 - Single-zone, - * 1 - Multi-zone -+ * 2 - Multi-zone strong (available on newer generation mini-led) -+ -+What: /sys/devices/platform//avilable_mini_led_mode -+Date: Jun 2023 -+KernelVersion: 6.9 -+Contact: "Luke Jones" -+Description: -+ List the available mini-led modes. - - What: /sys/devices/platform//ppt_pl1_spl - Date: Jun 2023 -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index 18be35fdb381..a56152ccfbe7 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -297,6 +297,7 @@ struct asus_wmi { - - bool panel_overdrive_available; - bool mini_led_mode_available; -+ u32 mini_led_dev_id; - - struct hotplug_slot hotplug_slot; - struct mutex hotplug_lock; -@@ -2109,10 +2110,17 @@ static ssize_t mini_led_mode_show(struct device *dev, - struct asus_wmi *asus = dev_get_drvdata(dev); - int result; - -- result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MINI_LED_MODE); -- if (result < 0) -- return result; -+ result = asus_wmi_get_devstate_simple(asus, asus->mini_led_dev_id); - -+ // Remap the mode values to match previous generation mini-led including -+ // if errored -19 since some of these bios return a bad result if set to "2" -+ // which is mini-led off -+ if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) { -+ if (result >= 0 || result == -19) -+ result = result == 1 ? 2 : result == 0 ? 1 : 0; -+ } else if (result < 0) { -+ return result; -+ } - return sysfs_emit(buf, "%d\n", result); - } - -@@ -2129,10 +2137,15 @@ static ssize_t mini_led_mode_store(struct device *dev, - if (result) - return result; - -- if (mode > 1) -+ if (mode > 1 && asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE) - return -EINVAL; -+ if (mode > 2 && asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) -+ return -EINVAL; -+ // Remap the mode values to match previous generation mini-led -+ if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) -+ mode = mode == 2 ? 1 : mode == 0 ? 2 : 0; - -- err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MINI_LED_MODE, mode, &result); -+ err = asus_wmi_set_devstate(asus->mini_led_dev_id, mode, &result); - - if (err) { - pr_warn("Failed to set mini-LED: %d\n", err); -@@ -2150,6 +2163,21 @@ static ssize_t mini_led_mode_store(struct device *dev, - } - static DEVICE_ATTR_RW(mini_led_mode); - -+static ssize_t available_mini_led_mode_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE) -+ return sysfs_emit(buf, "0 1\n"); -+ if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) -+ return sysfs_emit(buf, "0 1 2\n"); -+ -+ return sysfs_emit(buf, "0\n"); -+} -+ -+static DEVICE_ATTR_RO(available_mini_led_mode); -+ - /* Quirks *********************************************************************/ - - static void asus_wmi_set_xusb2pr(struct asus_wmi *asus) -@@ -4174,6 +4202,7 @@ static struct attribute *platform_attributes[] = { - &dev_attr_nv_temp_target.attr, - &dev_attr_panel_od.attr, - &dev_attr_mini_led_mode.attr, -+ &dev_attr_available_mini_led_mode.attr, - NULL - }; - -@@ -4496,10 +4525,17 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->nv_dyn_boost_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_DYN_BOOST); - asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET); - asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD); -- asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE); - asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); - -+ if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) { -+ asus->mini_led_mode_available = true; -+ asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; -+ } else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE2)) { -+ asus->mini_led_mode_available = true; -+ asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE2; -+ } -+ - err = fan_boost_mode_check_present(asus); - if (err) - goto fail_fan_boost_mode; -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index ab1c7deff118..9cadce10ad9a 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -71,6 +71,7 @@ - #define ASUS_WMI_DEVID_LID_FLIP 0x00060062 - #define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077 - #define ASUS_WMI_DEVID_MINI_LED_MODE 0x0005001E -+#define ASUS_WMI_DEVID_MINI_LED_MODE2 0x0005002E - - /* Storage */ - #define ASUS_WMI_DEVID_CARDREADER 0x00080013 --- -2.44.0 - - diff --git a/patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch b/patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch deleted file mode 100644 index dbd8ee9..0000000 --- a/patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 06d5a9b83548d99b70764166d723489cc8336b1d Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Sun, 10 Mar 2024 17:10:05 +1300 -Subject: [PATCH 2/5] platform/x86: asus-wmi: add support for Vivobook GPU MUX - -Adjust existing MUX support to select whichever MUX support is available -so that ASUS Vivobook MUX can also be used if detected. - -Signed-off-by: Luke D. Jones ---- - drivers/platform/x86/asus-wmi.c | 18 +++++++++++++----- - include/linux/platform_data/x86/asus-wmi.h | 1 + - 2 files changed, 14 insertions(+), 5 deletions(-) - -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index a56152ccfbe7..b9a2fb8007c0 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -268,6 +268,7 @@ struct asus_wmi { - bool egpu_connect_available; - bool dgpu_disable_available; - bool gpu_mux_mode_available; -+ u32 gpu_mux_dev; - - /* Tunables provided by ASUS for gaming laptops */ - bool ppt_pl2_sppt_available; -@@ -682,7 +683,7 @@ static ssize_t dgpu_disable_store(struct device *dev, - return -EINVAL; - - if (asus->gpu_mux_mode_available) { -- result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPU_MUX); -+ result = asus_wmi_get_devstate_simple(asus, asus->gpu_mux_dev); - if (result < 0) - /* An error here may signal greater failure of GPU handling */ - return result; -@@ -748,7 +749,7 @@ static ssize_t egpu_enable_store(struct device *dev, - } - - if (asus->gpu_mux_mode_available) { -- result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPU_MUX); -+ result = asus_wmi_get_devstate_simple(asus, asus->gpu_mux_dev); - if (result < 0) { - /* An error here may signal greater failure of GPU handling */ - pr_warn("Failed to get gpu mux status: %d\n", result); -@@ -801,7 +802,7 @@ static ssize_t gpu_mux_mode_show(struct device *dev, - struct asus_wmi *asus = dev_get_drvdata(dev); - int result; - -- result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPU_MUX); -+ result = asus_wmi_get_devstate_simple(asus, asus->gpu_mux_dev); - if (result < 0) - return result; - -@@ -847,7 +848,7 @@ static ssize_t gpu_mux_mode_store(struct device *dev, - } - } - -- err = asus_wmi_set_devstate(ASUS_WMI_DEVID_GPU_MUX, optimus, &result); -+ err = asus_wmi_set_devstate(asus->gpu_mux_dev, optimus, &result); - if (err) { - dev_err(dev, "Failed to set GPU MUX mode: %d\n", err); - return err; -@@ -4514,7 +4515,6 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU); - asus->egpu_connect_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU_CONNECTED); - asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); -- asus->gpu_mux_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX); - asus->kbd_rgb_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE); - asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); - asus->ppt_pl2_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PL2_SPPT); -@@ -4536,6 +4536,14 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE2; - } - -+ if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX)) { -+ asus->gpu_mux_mode_available = true; -+ asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX; -+ } else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX_VIVO)) { -+ asus->gpu_mux_mode_available = true; -+ asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX_VIVO; -+ } -+ - err = fan_boost_mode_check_present(asus); - if (err) - goto fail_fan_boost_mode; -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index 9cadce10ad9a..b48b024dd844 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -128,6 +128,7 @@ - - /* gpu mux switch, 0 = dGPU, 1 = Optimus */ - #define ASUS_WMI_DEVID_GPU_MUX 0x00090016 -+#define ASUS_WMI_DEVID_GPU_MUX_VIVO 0x00090026 - - /* TUF laptop RGB modes/colours */ - #define ASUS_WMI_DEVID_TUF_RGB_MODE 0x00100056 --- -2.44.0 - diff --git a/patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch b/patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch deleted file mode 100644 index 1fd2ce7..0000000 --- a/patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 9b038d6db81b457738cf65e43f401ccb8bf505e6 Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Sun, 10 Mar 2024 17:20:02 +1300 -Subject: [PATCH 3/5] platform/x86: asus-wmi: add support variant of TUF RGB - -Adds support for a second TUF RGB wmi call that some versions of the TUF -laptop come with. Also adjusts existing support to select whichever is -available. - -Signed-off-by: Luke D. Jones ---- - drivers/platform/x86/asus-wmi.c | 12 +++++++++++- - include/linux/platform_data/x86/asus-wmi.h | 1 + - 2 files changed, 12 insertions(+), 1 deletion(-) - -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index b9a2fb8007c0..e1100726de53 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -280,6 +280,7 @@ struct asus_wmi { - bool nv_temp_tgt_available; - - bool kbd_rgb_mode_available; -+ u32 kbd_rgb_dev; - bool kbd_rgb_state_available; - - bool throttle_thermal_policy_available; -@@ -870,6 +871,7 @@ static ssize_t kbd_rgb_mode_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - u32 cmd, mode, r, g, b, speed; - int err; - -@@ -906,7 +908,7 @@ static ssize_t kbd_rgb_mode_store(struct device *dev, - speed = 0xeb; - } - -- err = asus_wmi_evaluate_method3(ASUS_WMI_METHODID_DEVS, ASUS_WMI_DEVID_TUF_RGB_MODE, -+ err = asus_wmi_evaluate_method3(ASUS_WMI_METHODID_DEVS, asus->kbd_rgb_dev, - cmd | (mode << 8) | (r << 16) | (g << 24), b | (speed << 8), NULL); - if (err) - return err; -@@ -4544,6 +4546,14 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX_VIVO; - } - -+ if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE)) { -+ asus->kbd_rgb_mode_available = true; -+ asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE; -+ } else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE2)) { -+ asus->kbd_rgb_mode_available = true; -+ asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE2; -+ } -+ - err = fan_boost_mode_check_present(asus); - if (err) - goto fail_fan_boost_mode; -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index b48b024dd844..3e9a01467c67 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -132,6 +132,7 @@ - - /* TUF laptop RGB modes/colours */ - #define ASUS_WMI_DEVID_TUF_RGB_MODE 0x00100056 -+#define ASUS_WMI_DEVID_TUF_RGB_MODE2 0x0010005A - - /* TUF laptop RGB power/state */ - #define ASUS_WMI_DEVID_TUF_RGB_STATE 0x00100057 --- -2.44.0 - diff --git a/patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch b/patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch deleted file mode 100644 index 2b0f7cf..0000000 --- a/patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 1c0f375634b3ddbcf479c4ddb81639e397795802 Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Sun, 10 Mar 2024 19:03:11 +1300 -Subject: [PATCH 4/5] platform/x86: asus-wmi: support toggling POST sound - -Add support for toggling the BIOS POST sound on some ASUS laptops. - -Signed-off-by: Luke D. Jones ---- - .../ABI/testing/sysfs-platform-asus-wmi | 7 +++ - drivers/platform/x86/asus-wmi.c | 54 +++++++++++++++++++ - include/linux/platform_data/x86/asus-wmi.h | 3 ++ - 3 files changed, 64 insertions(+) - -diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi -index e32b4f0ae15f..f3c53b7453f0 100644 ---- a/Documentation/ABI/testing/sysfs-platform-asus-wmi -+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi -@@ -194,3 +194,10 @@ Contact: "Luke Jones" - Description: - Set the target temperature limit of the Nvidia dGPU: - * min=75, max=87 -+ -+What: /sys/devices/platform//boot_sound -+Date: Jun 2023 -+KernelVersion: 6.9 -+Contact: "Luke Jones" -+Description: -+ Set if the BIOS POST sound is played on boot. -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index e1100726de53..e4341abb71e0 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -297,6 +297,7 @@ struct asus_wmi { - // The RSOC controls the maximum charging percentage. - bool battery_rsoc_available; - -+ bool boot_sound_available; - bool panel_overdrive_available; - bool mini_led_mode_available; - u32 mini_led_dev_id; -@@ -2106,6 +2107,55 @@ static ssize_t panel_od_store(struct device *dev, - } - static DEVICE_ATTR_RW(panel_od); - -+/* Bootup sound ***************************************************************/ -+ -+static ssize_t boot_sound_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ int result; -+ -+ result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_BOOT_SOUND); -+ if (result < 0) -+ return result; -+ -+ return sysfs_emit(buf, "%d\n", result); -+} -+ -+static ssize_t boot_sound_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int result, err; -+ u32 snd; -+ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ result = kstrtou32(buf, 10, &snd); -+ if (result) -+ return result; -+ -+ if (snd > 1) -+ return -EINVAL; -+ -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BOOT_SOUND, snd, &result); -+ -+ if (err) { -+ pr_warn("Failed to set boot sound: %d\n", err); -+ return err; -+ } -+ -+ if (result > 1) { -+ pr_warn("Failed to set panel boot sound (result): 0x%x\n", result); -+ return -EIO; -+ } -+ -+ sysfs_notify(&asus->platform_device->dev.kobj, NULL, "boot_sound"); -+ -+ return count; -+} -+static DEVICE_ATTR_RW(boot_sound); -+ - /* Mini-LED mode **************************************************************/ - static ssize_t mini_led_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -@@ -4203,6 +4253,7 @@ static struct attribute *platform_attributes[] = { - &dev_attr_ppt_platform_sppt.attr, - &dev_attr_nv_dynamic_boost.attr, - &dev_attr_nv_temp_target.attr, -+ &dev_attr_boot_sound.attr, - &dev_attr_panel_od.attr, - &dev_attr_mini_led_mode.attr, - &dev_attr_available_mini_led_mode.attr, -@@ -4255,6 +4306,8 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj, - ok = asus->nv_dyn_boost_available; - else if (attr == &dev_attr_nv_temp_target.attr) - ok = asus->nv_temp_tgt_available; -+ else if (attr == &dev_attr_boot_sound.attr) -+ ok = asus->boot_sound_available; - else if (attr == &dev_attr_panel_od.attr) - ok = asus->panel_overdrive_available; - else if (attr == &dev_attr_mini_led_mode.attr) -@@ -4526,6 +4579,7 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->ppt_plat_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PLAT_SPPT); - asus->nv_dyn_boost_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_DYN_BOOST); - asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET); -+ asus->boot_sound_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_BOOT_SOUND); - asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD); - asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index 3e9a01467c67..3eb5cd6773ad 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -137,6 +137,9 @@ - /* TUF laptop RGB power/state */ - #define ASUS_WMI_DEVID_TUF_RGB_STATE 0x00100057 - -+/* Bootup sound control */ -+#define ASUS_WMI_DEVID_BOOT_SOUND 0x00130022 -+ - /* DSTS masks */ - #define ASUS_WMI_DSTS_STATUS_BIT 0x00000001 - #define ASUS_WMI_DSTS_UNKNOWN_BIT 0x00000002 --- -2.44.0 - diff --git a/patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch b/patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch deleted file mode 100644 index 54402f0..0000000 --- a/patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch +++ /dev/null @@ -1,342 +0,0 @@ -From 6045f385154a2c0a4aaa692d13bb0fa14bbe1d12 Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Mon, 11 Mar 2024 12:15:46 +1300 -Subject: [PATCH 5/5] platform/x86: asus-wmi: store a min default for ppt - options - -Laptops with any of the ppt or nv tunables default to the minimum setting -on boot so we can safely assume a stored value is correct. - -This patch adds storing of those values in the local struct, and enables -reading of those values back. - -Secondary to the above it renames some internal variables to be more -consistent (which makes code grepping show all related parts) - -Signed-off-by: Luke D. Jones ---- - drivers/platform/x86/asus-wmi.c | 141 +++++++++++++++++++++++++------- - 1 file changed, 111 insertions(+), 30 deletions(-) - -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index e4341abb71e0..482e23b55e1e 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -272,12 +272,19 @@ struct asus_wmi { - - /* Tunables provided by ASUS for gaming laptops */ - bool ppt_pl2_sppt_available; -+ u32 ppt_pl2_sppt; - bool ppt_pl1_spl_available; -+ u32 ppt_pl1_spl; - bool ppt_apu_sppt_available; -- bool ppt_plat_sppt_available; -+ u32 ppt_apu_sppt; -+ bool ppt_platform_sppt_available; -+ u32 ppt_platform_sppt; - bool ppt_fppt_available; -- bool nv_dyn_boost_available; -- bool nv_temp_tgt_available; -+ u32 ppt_fppt; -+ bool nv_dynamic_boost_available; -+ u32 nv_dynamic_boost; -+ bool nv_temp_target_available; -+ u32 nv_temp_target; - - bool kbd_rgb_mode_available; - u32 kbd_rgb_dev; -@@ -999,11 +1006,10 @@ static ssize_t ppt_pl2_sppt_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1022,22 +1028,31 @@ static ssize_t ppt_pl2_sppt_store(struct device *dev, - return -EIO; - } - -+ asus->ppt_pl2_sppt = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_pl2_sppt"); - - return count; - } --static DEVICE_ATTR_WO(ppt_pl2_sppt); -+ -+static ssize_t ppt_pl2_sppt_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->ppt_pl2_sppt); -+} -+static DEVICE_ATTR_RW(ppt_pl2_sppt); - - /* Tunable: PPT, Intel=PL1, AMD=SPL ******************************************/ - static ssize_t ppt_pl1_spl_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1056,22 +1071,30 @@ static ssize_t ppt_pl1_spl_store(struct device *dev, - return -EIO; - } - -+ asus->ppt_pl1_spl = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_pl1_spl"); - - return count; - } --static DEVICE_ATTR_WO(ppt_pl1_spl); -+static ssize_t ppt_pl1_spl_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->ppt_pl1_spl); -+} -+static DEVICE_ATTR_RW(ppt_pl1_spl); - - /* Tunable: PPT APU FPPT ******************************************************/ - static ssize_t ppt_fppt_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1090,22 +1113,31 @@ static ssize_t ppt_fppt_store(struct device *dev, - return -EIO; - } - -+ asus->ppt_fppt = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_fpu_sppt"); - - return count; - } --static DEVICE_ATTR_WO(ppt_fppt); -+ -+static ssize_t ppt_fppt_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->ppt_fppt); -+} -+static DEVICE_ATTR_RW(ppt_fppt); - - /* Tunable: PPT APU SPPT *****************************************************/ - static ssize_t ppt_apu_sppt_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1124,22 +1156,31 @@ static ssize_t ppt_apu_sppt_store(struct device *dev, - return -EIO; - } - -+ asus->ppt_apu_sppt = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_apu_sppt"); - - return count; - } --static DEVICE_ATTR_WO(ppt_apu_sppt); -+ -+static ssize_t ppt_apu_sppt_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->ppt_apu_sppt); -+} -+static DEVICE_ATTR_RW(ppt_apu_sppt); - - /* Tunable: PPT platform SPPT ************************************************/ - static ssize_t ppt_platform_sppt_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1158,22 +1199,31 @@ static ssize_t ppt_platform_sppt_store(struct device *dev, - return -EIO; - } - -+ asus->ppt_platform_sppt = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_platform_sppt"); - - return count; - } --static DEVICE_ATTR_WO(ppt_platform_sppt); -+ -+static ssize_t ppt_platform_sppt_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->ppt_platform_sppt); -+} -+static DEVICE_ATTR_RW(ppt_platform_sppt); - - /* Tunable: NVIDIA dynamic boost *********************************************/ - static ssize_t nv_dynamic_boost_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1192,22 +1242,31 @@ static ssize_t nv_dynamic_boost_store(struct device *dev, - return -EIO; - } - -+ asus->nv_dynamic_boost = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "nv_dynamic_boost"); - - return count; - } --static DEVICE_ATTR_WO(nv_dynamic_boost); -+ -+static ssize_t nv_dynamic_boost_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->nv_dynamic_boost); -+} -+static DEVICE_ATTR_RW(nv_dynamic_boost); - - /* Tunable: NVIDIA temperature target ****************************************/ - static ssize_t nv_temp_target_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) - { -+ struct asus_wmi *asus = dev_get_drvdata(dev); - int result, err; - u32 value; - -- struct asus_wmi *asus = dev_get_drvdata(dev); -- - result = kstrtou32(buf, 10, &value); - if (result) - return result; -@@ -1226,11 +1285,21 @@ static ssize_t nv_temp_target_store(struct device *dev, - return -EIO; - } - -+ asus->nv_temp_target = value; - sysfs_notify(&asus->platform_device->dev.kobj, NULL, "nv_temp_target"); - - return count; - } --static DEVICE_ATTR_WO(nv_temp_target); -+ -+static ssize_t nv_temp_target_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", asus->nv_temp_target); -+} -+static DEVICE_ATTR_RW(nv_temp_target); - - /* Battery ********************************************************************/ - -@@ -4301,11 +4370,11 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj, - else if (attr == &dev_attr_ppt_apu_sppt.attr) - ok = asus->ppt_apu_sppt_available; - else if (attr == &dev_attr_ppt_platform_sppt.attr) -- ok = asus->ppt_plat_sppt_available; -+ ok = asus->ppt_platform_sppt_available; - else if (attr == &dev_attr_nv_dynamic_boost.attr) -- ok = asus->nv_dyn_boost_available; -+ ok = asus->nv_dynamic_boost_available; - else if (attr == &dev_attr_nv_temp_target.attr) -- ok = asus->nv_temp_tgt_available; -+ ok = asus->nv_temp_target_available; - else if (attr == &dev_attr_boot_sound.attr) - ok = asus->boot_sound_available; - else if (attr == &dev_attr_panel_od.attr) -@@ -4566,6 +4635,15 @@ static int asus_wmi_add(struct platform_device *pdev) - if (err) - goto fail_platform; - -+ /* ensure defaults for tunables */ -+ asus->ppt_pl2_sppt = 5; -+ asus->ppt_pl1_spl = 5; -+ asus->ppt_apu_sppt = 5; -+ asus->ppt_platform_sppt = 5; -+ asus->ppt_fppt = 5; -+ asus->nv_dynamic_boost = 5; -+ asus->nv_temp_target = 75; -+ - asus->charge_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CHARGE_MODE); - asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU); - asus->egpu_connect_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU_CONNECTED); -@@ -4576,9 +4654,12 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->ppt_pl1_spl_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PL1_SPL); - asus->ppt_fppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_FPPT); - asus->ppt_apu_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_APU_SPPT); -- asus->ppt_plat_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PLAT_SPPT); -- asus->nv_dyn_boost_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_DYN_BOOST); -- asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET); -+ asus->ppt_platform_sppt_available = asus_wmi_dev_is_present(asus, -+ ASUS_WMI_DEVID_PPT_PLAT_SPPT); -+ asus->nv_dynamic_boost_available = asus_wmi_dev_is_present(asus, -+ ASUS_WMI_DEVID_NV_DYN_BOOST); -+ asus->nv_temp_target_available = asus_wmi_dev_is_present(asus, -+ ASUS_WMI_DEVID_NV_THERM_TARGET); - asus->boot_sound_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_BOOT_SOUND); - asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD); - asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) --- -2.44.0 - diff --git a/patches/cachyos/0001-bore-cachy.patch b/patches/cachyos/0001-bore-cachy.patch deleted file mode 100644 index a49989b..0000000 --- a/patches/cachyos/0001-bore-cachy.patch +++ /dev/null @@ -1,929 +0,0 @@ -From fea4a499d6783faff756fe852c645f90aa73ccf7 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:57:19 +0200 -Subject: [PATCH] bore-cachy - -Signed-off-by: Peter Jung ---- - include/linux/sched.h | 10 ++ - init/Kconfig | 17 +++ - kernel/Kconfig.hz | 16 +++ - kernel/sched/core.c | 143 ++++++++++++++++++ - kernel/sched/debug.c | 60 +++++++- - kernel/sched/fair.c | 310 ++++++++++++++++++++++++++++++++++++---- - kernel/sched/features.h | 22 ++- - kernel/sched/sched.h | 7 + - 8 files changed, 555 insertions(+), 30 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index a5f4b48fca18..df62c56b13ae 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -547,6 +547,16 @@ struct sched_entity { - u64 sum_exec_runtime; - u64 prev_sum_exec_runtime; - u64 vruntime; -+#ifdef CONFIG_SCHED_BORE -+ u64 burst_time; -+ u8 prev_burst_penalty; -+ u8 curr_burst_penalty; -+ u8 burst_penalty; -+ u8 burst_score; -+ u8 child_burst; -+ u32 child_burst_cnt; -+ u64 child_burst_last_cached; -+#endif // CONFIG_SCHED_BORE - s64 vlag; - u64 slice; - -diff --git a/init/Kconfig b/init/Kconfig -index 3ba6142f2f42..2966dec64df7 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1303,6 +1303,23 @@ config CHECKPOINT_RESTORE - - If unsure, say N here. - -+config SCHED_BORE -+ bool "Burst-Oriented Response Enhancer" -+ default y -+ help -+ In Desktop and Mobile computing, one might prefer interactive -+ tasks to keep responsive no matter what they run in the background. -+ -+ Enabling this kernel feature modifies the scheduler to discriminate -+ tasks by their burst time (runtime since it last went sleeping or -+ yielding state) and prioritize those that run less bursty. -+ Such tasks usually include window compositor, widgets backend, -+ terminal emulator, video playback, games and so on. -+ With a little impact to scheduling fairness, it may improve -+ responsiveness especially under heavy background workload. -+ -+ If unsure, say Y here. -+ - config SCHED_AUTOGROUP - bool "Automatic process group scheduling" - select CGROUPS -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index 0f78364efd4f..b50189ee5b93 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -79,5 +79,21 @@ config HZ - default 750 if HZ_750 - default 1000 if HZ_1000 - -+config MIN_BASE_SLICE_NS -+ int "Default value for min_base_slice_ns" -+ default 2000000 -+ help -+ The BORE Scheduler automatically calculates the optimal base -+ slice for the configured HZ using the following equation: -+ -+ base_slice_ns = max(min_base_slice_ns, 1000000000/HZ) -+ -+ This option sets the default lower bound limit of the base slice -+ to prevent the loss of task throughput due to overscheduling. -+ -+ Setting this value too high can cause the system to boot with -+ an unnecessarily large base slice, resulting in high scheduling -+ latency and poor system responsiveness. -+ - config SCHED_HRTICK - def_bool HIGH_RES_TIMERS -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 59ce0841eb1f..c5d10b464779 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -4515,6 +4515,138 @@ int wake_up_state(struct task_struct *p, unsigned int state) - return try_to_wake_up(p, state, 0); - } - -+#ifdef CONFIG_SCHED_BORE -+extern u8 sched_burst_fork_atavistic; -+extern uint sched_burst_cache_lifetime; -+ -+static void __init sched_init_bore(void) { -+ init_task.se.burst_time = 0; -+ init_task.se.prev_burst_penalty = 0; -+ init_task.se.curr_burst_penalty = 0; -+ init_task.se.burst_penalty = 0; -+ init_task.se.burst_score = 0; -+ init_task.se.child_burst_last_cached = 0; -+} -+ -+inline void sched_fork_bore(struct task_struct *p) { -+ p->se.burst_time = 0; -+ p->se.curr_burst_penalty = 0; -+ p->se.burst_score = 0; -+ p->se.child_burst_last_cached = 0; -+} -+ -+static u32 count_child_tasks(struct task_struct *p) { -+ struct task_struct *child; -+ u32 cnt = 0; -+ list_for_each_entry(child, &p->children, sibling) {cnt++;} -+ return cnt; -+} -+ -+static inline bool task_is_inheritable(struct task_struct *p) { -+ return (p->sched_class == &fair_sched_class); -+} -+ -+static inline bool child_burst_cache_expired(struct task_struct *p, u64 now) { -+ u64 expiration_time = -+ p->se.child_burst_last_cached + sched_burst_cache_lifetime; -+ return ((s64)(expiration_time - now) < 0); -+} -+ -+static void __update_child_burst_cache( -+ struct task_struct *p, u32 cnt, u32 sum, u64 now) { -+ u8 avg = 0; -+ if (cnt) avg = sum / cnt; -+ p->se.child_burst = max(avg, p->se.burst_penalty); -+ p->se.child_burst_cnt = cnt; -+ p->se.child_burst_last_cached = now; -+} -+ -+static inline void update_child_burst_direct(struct task_struct *p, u64 now) { -+ struct task_struct *child; -+ u32 cnt = 0; -+ u32 sum = 0; -+ -+ list_for_each_entry(child, &p->children, sibling) { -+ if (!task_is_inheritable(child)) continue; -+ cnt++; -+ sum += child->se.burst_penalty; -+ } -+ -+ __update_child_burst_cache(p, cnt, sum, now); -+} -+ -+static inline u8 __inherit_burst_direct(struct task_struct *p, u64 now) { -+ struct task_struct *parent = p->real_parent; -+ if (child_burst_cache_expired(parent, now)) -+ update_child_burst_direct(parent, now); -+ -+ return parent->se.child_burst; -+} -+ -+static void update_child_burst_topological( -+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) { -+ struct task_struct *child, *dec; -+ u32 cnt = 0, dcnt = 0; -+ u32 sum = 0; -+ -+ list_for_each_entry(child, &p->children, sibling) { -+ dec = child; -+ while ((dcnt = count_child_tasks(dec)) == 1) -+ dec = list_first_entry(&dec->children, struct task_struct, sibling); -+ -+ if (!dcnt || !depth) { -+ if (!task_is_inheritable(dec)) continue; -+ cnt++; -+ sum += dec->se.burst_penalty; -+ continue; -+ } -+ if (!child_burst_cache_expired(dec, now)) { -+ cnt += dec->se.child_burst_cnt; -+ sum += (u32)dec->se.child_burst * dec->se.child_burst_cnt; -+ continue; -+ } -+ update_child_burst_topological(dec, now, depth - 1, &cnt, &sum); -+ } -+ -+ __update_child_burst_cache(p, cnt, sum, now); -+ *acnt += cnt; -+ *asum += sum; -+} -+ -+static inline u8 __inherit_burst_topological(struct task_struct *p, u64 now) { -+ struct task_struct *anc = p->real_parent; -+ u32 cnt = 0, sum = 0; -+ -+ while (anc->real_parent != anc && count_child_tasks(anc) == 1) -+ anc = anc->real_parent; -+ -+ if (child_burst_cache_expired(anc, now)) -+ update_child_burst_topological( -+ anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); -+ -+ return anc->se.child_burst; -+} -+ -+static inline void inherit_burst(struct task_struct *p) { -+ u8 burst_cache; -+ u64 now = ktime_get_ns(); -+ -+ read_lock(&tasklist_lock); -+ burst_cache = likely(sched_burst_fork_atavistic)? -+ __inherit_burst_topological(p, now): -+ __inherit_burst_direct(p, now); -+ read_unlock(&tasklist_lock); -+ -+ p->se.prev_burst_penalty = max(p->se.prev_burst_penalty, burst_cache); -+} -+ -+static void sched_post_fork_bore(struct task_struct *p) { -+ if (p->sched_class == &fair_sched_class) -+ inherit_burst(p); -+ p->se.burst_penalty = p->se.prev_burst_penalty; -+} -+#endif // CONFIG_SCHED_BORE -+ - /* - * Perform scheduler related setup for a newly forked process p. - * p is forked by current. -@@ -4531,6 +4663,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) - p->se.prev_sum_exec_runtime = 0; - p->se.nr_migrations = 0; - p->se.vruntime = 0; -+#ifdef CONFIG_SCHED_BORE -+ sched_fork_bore(p); -+#endif // CONFIG_SCHED_BORE - p->se.vlag = 0; - p->se.slice = sysctl_sched_base_slice; - INIT_LIST_HEAD(&p->se.group_node); -@@ -4846,6 +4981,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) - - void sched_post_fork(struct task_struct *p) - { -+#ifdef CONFIG_SCHED_BORE -+ sched_post_fork_bore(p); -+#endif // CONFIG_SCHED_BORE - uclamp_post_fork(p); - } - -@@ -9933,6 +10071,11 @@ void __init sched_init(void) - BUG_ON(&dl_sched_class != &stop_sched_class + 1); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ sched_init_bore(); -+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.2.5 by Masahito Suzuki"); -+#endif // CONFIG_SCHED_BORE -+ - wait_bit_init(); - - #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index c1eb9a1afd13..e2da8d773877 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -167,7 +167,52 @@ static const struct file_operations sched_feat_fops = { - }; - - #ifdef CONFIG_SMP -+#ifdef CONFIG_SCHED_BORE -+static ssize_t sched_min_base_slice_write(struct file *filp, const char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ char buf[16]; -+ unsigned int value; -+ -+ if (cnt > 15) -+ cnt = 15; -+ -+ if (copy_from_user(&buf, ubuf, cnt)) -+ return -EFAULT; -+ buf[cnt] = '\0'; -+ -+ if (kstrtouint(buf, 10, &value)) -+ return -EINVAL; - -+ if (!value) -+ return -EINVAL; -+ -+ sysctl_sched_min_base_slice = value; -+ sched_update_min_base_slice(); -+ -+ *ppos += cnt; -+ return cnt; -+} -+ -+static int sched_min_base_slice_show(struct seq_file *m, void *v) -+{ -+ seq_printf(m, "%d\n", sysctl_sched_min_base_slice); -+ return 0; -+} -+ -+static int sched_min_base_slice_open(struct inode *inode, struct file *filp) -+{ -+ return single_open(filp, sched_min_base_slice_show, NULL); -+} -+ -+static const struct file_operations sched_min_base_slice_fops = { -+ .open = sched_min_base_slice_open, -+ .write = sched_min_base_slice_write, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+#else // !CONFIG_SCHED_BORE - static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) - { -@@ -213,7 +258,7 @@ static const struct file_operations sched_scaling_fops = { - .llseek = seq_lseek, - .release = single_release, - }; -- -+#endif // CONFIG_SCHED_BORE - #endif /* SMP */ - - #ifdef CONFIG_PREEMPT_DYNAMIC -@@ -347,13 +392,20 @@ static __init int sched_init_debug(void) - debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); - #endif - -+#ifdef CONFIG_SCHED_BORE -+ debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops); -+ debugfs_create_u32("base_slice_ns", 0400, debugfs_sched, &sysctl_sched_base_slice); -+#else // !CONFIG_SCHED_BORE - debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice); -+#endif // CONFIG_SCHED_BORE - - debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); - debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); - - #ifdef CONFIG_SMP -+#if !defined(CONFIG_SCHED_BORE) - debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); -+#endif // CONFIG_SCHED_BORE - debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); - debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); - -@@ -596,6 +648,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), - SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); - -+#ifdef CONFIG_SCHED_BORE -+ SEQ_printf(m, " %2d", p->se.burst_score); -+#endif // CONFIG_SCHED_BORE - #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); - #endif -@@ -1069,6 +1124,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, - - P(se.load.weight); - #ifdef CONFIG_SMP -+#ifdef CONFIG_SCHED_BORE -+ P(se.burst_score); -+#endif // CONFIG_SCHED_BORE - P(se.avg.load_sum); - P(se.avg.runnable_sum); - P(se.avg.util_sum); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index c2bb8eb1d6ba..9e8b220f27e6 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -19,6 +19,9 @@ - * - * Adaptive scheduling granularity, math enhancements by Peter Zijlstra - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra -+ * -+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler -+ * Copyright (C) 2021-2024 Masahito Suzuki - */ - #include - #include -@@ -64,28 +67,126 @@ - * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) - * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus - * -- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) -+ * (BORE default SCHED_TUNABLESCALING_NONE = *1 constant) -+ * (EEVDF default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) - */ -+#ifdef CONFIG_SCHED_BORE -+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; -+#else // !CONFIG_SCHED_BORE - unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; -+#endif // CONFIG_SCHED_BORE - - /* - * Minimal preemption granularity for CPU-bound tasks: - * -- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) -+ * (BORE default: max(1 sec / HZ, min_base_slice) constant, units: nanoseconds) -+ * (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) - */ --#ifdef CONFIG_CACHY --unsigned int sysctl_sched_base_slice = 350000ULL; --static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; --#else -+#ifdef CONFIG_SCHED_BORE -+unsigned int sysctl_sched_base_slice = 1000000000ULL / HZ; -+static unsigned int configured_sched_base_slice = 1000000000ULL / HZ; -+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS; -+#else // !CONFIG_SCHED_BORE - unsigned int sysctl_sched_base_slice = 750000ULL; - static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; --#endif -+#endif // CONFIG_SCHED_BORE - --#ifdef CONFIG_CACHY --const_debug unsigned int sysctl_sched_migration_cost = 300000UL; --#else - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; --#endif -+ -+#ifdef CONFIG_SCHED_BORE -+u8 __read_mostly sched_bore = 1; -+u8 __read_mostly sched_burst_smoothness_long = 1; -+u8 __read_mostly sched_burst_smoothness_short = 0; -+u8 __read_mostly sched_burst_fork_atavistic = 2; -+u8 __read_mostly sched_burst_penalty_offset = 22; -+uint __read_mostly sched_burst_penalty_scale = 1280; -+uint __read_mostly sched_burst_cache_lifetime = 60000000; -+uint __read_mostly sched_deadline_boost_mask = 0x81; // ENQUEUE_INITIAL | ENQUEUE_WAKEUP -+uint __read_mostly sched_deadline_preserve_mask = 0x42; // ENQUEUE_RESTORE | ENQUEUE_MIGRATED -+static int __maybe_unused sixty_four = 64; -+static int __maybe_unused maxval_12_bits = 4095; -+ -+#define MAX_BURST_PENALTY (39U <<2) -+ -+static inline u32 log2plus1_u64_u32f8(u64 v) { -+ u32 msb = fls64(v); -+ s32 excess_bits = msb - 9; -+ u8 fractional = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits; -+ return msb << 8 | fractional; -+} -+ -+static inline u32 calc_burst_penalty(u64 burst_time) { -+ u32 greed, tolerance, penalty, scaled_penalty; -+ -+ greed = log2plus1_u64_u32f8(burst_time); -+ tolerance = sched_burst_penalty_offset << 8; -+ penalty = max(0, (s32)greed - (s32)tolerance); -+ scaled_penalty = penalty * sched_burst_penalty_scale >> 16; -+ -+ return min(MAX_BURST_PENALTY, scaled_penalty); -+} -+ -+static inline u64 scale_slice(u64 delta, struct sched_entity *se) { -+ return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->burst_score], 22); -+} -+ -+static inline u64 __unscale_slice(u64 delta, u8 score) { -+ return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10); -+} -+ -+static inline u64 unscale_slice(u64 delta, struct sched_entity *se) { -+ return __unscale_slice(delta, se->burst_score); -+} -+ -+void reweight_task(struct task_struct *p, int prio); -+ -+static void update_burst_score(struct sched_entity *se) { -+ if (!entity_is_task(se)) return; -+ struct task_struct *p = task_of(se); -+ u8 prio = p->static_prio - MAX_RT_PRIO; -+ u8 prev_prio = min(39, prio + se->burst_score); -+ -+ se->burst_score = se->burst_penalty >> 2; -+ -+ u8 new_prio = min(39, prio + se->burst_score); -+ if (new_prio != prev_prio) -+ reweight_task(p, new_prio); -+} -+ -+static void update_burst_penalty(struct sched_entity *se) { -+ se->curr_burst_penalty = calc_burst_penalty(se->burst_time); -+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); -+ update_burst_score(se); -+} -+ -+static inline u32 binary_smooth(u32 new, u32 old) { -+ int increment = new - old; -+ return (0 <= increment)? -+ old + ( increment >> (int)sched_burst_smoothness_long): -+ old - (-increment >> (int)sched_burst_smoothness_short); -+} -+ -+static void restart_burst(struct sched_entity *se) { -+ se->burst_penalty = se->prev_burst_penalty = -+ binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); -+ se->curr_burst_penalty = 0; -+ se->burst_time = 0; -+ update_burst_score(se); -+} -+ -+static void restart_burst_rescale_deadline(struct sched_entity *se) { -+ s64 vscaled, wremain, vremain = se->deadline - se->vruntime; -+ u8 prev_score = se->burst_score; -+ restart_burst(se); -+ if (prev_score > se->burst_score) { -+ wremain = __unscale_slice(abs(vremain), prev_score); -+ vscaled = scale_slice(wremain, se); -+ if (unlikely(vremain < 0)) -+ vscaled = -vscaled; -+ se->deadline = se->vruntime + vscaled; -+ } -+} -+#endif // CONFIG_SCHED_BORE - - static int __init setup_sched_thermal_decay_shift(char *str) - { -@@ -130,12 +231,8 @@ int __weak arch_asym_cpu_priority(int cpu) - * - * (default: 5 msec, units: microseconds) - */ --#ifdef CONFIG_CACHY --static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; --#else - static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; - #endif --#endif - - #ifdef CONFIG_NUMA_BALANCING - /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ -@@ -144,6 +241,83 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; - - #ifdef CONFIG_SYSCTL - static struct ctl_table sched_fair_sysctls[] = { -+#ifdef CONFIG_SCHED_BORE -+ { -+ .procname = "sched_bore", -+ .data = &sched_bore, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ONE, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_smoothness_long", -+ .data = &sched_burst_smoothness_long, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_smoothness_short", -+ .data = &sched_burst_smoothness_short, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE, -+ }, -+ { -+ .procname = "sched_burst_fork_atavistic", -+ .data = &sched_burst_fork_atavistic, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_THREE, -+ }, -+ { -+ .procname = "sched_burst_penalty_offset", -+ .data = &sched_burst_penalty_offset, -+ .maxlen = sizeof(u8), -+ .mode = 0644, -+ .proc_handler = proc_dou8vec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &sixty_four, -+ }, -+ { -+ .procname = "sched_burst_penalty_scale", -+ .data = &sched_burst_penalty_scale, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &maxval_12_bits, -+ }, -+ { -+ .procname = "sched_burst_cache_lifetime", -+ .data = &sched_burst_cache_lifetime, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+ { -+ .procname = "sched_deadline_boost_mask", -+ .data = &sched_deadline_boost_mask, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+ { -+ .procname = "sched_deadline_preserve_mask", -+ .data = &sched_deadline_preserve_mask, -+ .maxlen = sizeof(uint), -+ .mode = 0644, -+ .proc_handler = proc_douintvec, -+ }, -+#endif // CONFIG_SCHED_BORE - #ifdef CONFIG_CFS_BANDWIDTH - { - .procname = "sched_cfs_bandwidth_slice_us", -@@ -201,6 +375,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) - * - * This idea comes from the SD scheduler of Con Kolivas: - */ -+#ifdef CONFIG_SCHED_BORE -+static void update_sysctl(void) { -+ sysctl_sched_base_slice = -+ max(sysctl_sched_min_base_slice, configured_sched_base_slice); -+} -+void sched_update_min_base_slice(void) { update_sysctl(); } -+#else // !CONFIG_SCHED_BORE - static unsigned int get_update_sysctl_factor(void) - { - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); -@@ -231,6 +412,7 @@ static void update_sysctl(void) - SET_SYSCTL(sched_base_slice); - #undef SET_SYSCTL - } -+#endif // CONFIG_SCHED_BORE - - void __init sched_init_granularity(void) - { -@@ -708,6 +890,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se) - - vlag = avruntime - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); -+#ifdef CONFIG_SCHED_BORE -+ limit >>= 1; -+#endif // CONFIG_SCHED_BORE - - return clamp(vlag, -limit, limit); - } -@@ -868,6 +1053,39 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) - return __node_2_se(left); - } - -+static inline bool pick_curr(struct cfs_rq *cfs_rq, -+ struct sched_entity *curr, struct sched_entity *wakee) -+{ -+ /* -+ * Nothing to preserve... -+ */ -+ if (!curr || !sched_feat(RESPECT_SLICE)) -+ return false; -+ -+ /* -+ * Allow preemption at the 0-lag point -- even if not all of the slice -+ * is consumed. Note: placement of positive lag can push V left and render -+ * @curr instantly ineligible irrespective the time on-cpu. -+ */ -+ if (sched_feat(RUN_TO_PARITY) && !entity_eligible(cfs_rq, curr)) -+ return false; -+ -+ /* -+ * Don't preserve @curr when the @wakee has a shorter slice and earlier -+ * deadline. IOW, explicitly allow preemption. -+ */ -+ if (sched_feat(PREEMPT_SHORT) && wakee && -+ wakee->slice < curr->slice && -+ (s64)(wakee->deadline - curr->deadline) < 0) -+ return false; -+ -+ /* -+ * Preserve @curr to allow it to finish its first slice. -+ * See the HACK in set_next_entity(). -+ */ -+ return curr->vlag == curr->deadline; -+} -+ - /* - * Earliest Eligible Virtual Deadline First - * -@@ -887,28 +1105,27 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) - * - * Which allows tree pruning through eligibility. - */ --static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) -+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *wakee) - { - struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node; - struct sched_entity *se = __pick_first_entity(cfs_rq); - struct sched_entity *curr = cfs_rq->curr; - struct sched_entity *best = NULL; - -+ if (curr && !curr->on_rq) -+ curr = NULL; -+ - /* - * We can safely skip eligibility check if there is only one entity - * in this cfs_rq, saving some cycles. - */ - if (cfs_rq->nr_running == 1) -- return curr && curr->on_rq ? curr : se; -- -- if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr))) -- curr = NULL; -+ return curr ?: se; - - /* -- * Once selected, run a task until it either becomes non-eligible or -- * until it gets a new slice. See the HACK in set_next_entity(). -+ * Preserve @curr to let it finish its slice. - */ -- if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline) -+ if (pick_curr(cfs_rq, curr, wakee)) - return curr; - - /* Pick the leftmost entity if it's eligible */ -@@ -967,6 +1184,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) - * Scheduling class statistics methods: - */ - #ifdef CONFIG_SMP -+#if !defined(CONFIG_SCHED_BORE) - int sched_update_scaling(void) - { - unsigned int factor = get_update_sysctl_factor(); -@@ -978,6 +1196,7 @@ int sched_update_scaling(void) - - return 0; - } -+#endif // CONFIG_SCHED_BORE - #endif - #endif - -@@ -1178,7 +1397,13 @@ static void update_curr(struct cfs_rq *cfs_rq) - if (unlikely(delta_exec <= 0)) - return; - -+#ifdef CONFIG_SCHED_BORE -+ curr->burst_time += delta_exec; -+ update_burst_penalty(curr); -+ curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr)); -+#else // !CONFIG_SCHED_BORE - curr->vruntime += calc_delta_fair(delta_exec, curr); -+#endif // CONFIG_SCHED_BORE - update_deadline(cfs_rq, curr); - update_min_vruntime(cfs_rq); - -@@ -5193,6 +5418,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - s64 lag = 0; - - se->slice = sysctl_sched_base_slice; -+#ifdef CONFIG_SCHED_BORE -+ if (flags & ~sched_deadline_boost_mask & sched_deadline_preserve_mask) -+ vslice = se->deadline - se->vruntime; -+ else -+#endif // CONFIG_SCHED_BORE - vslice = calc_delta_fair(se->slice, se); - - /* -@@ -5203,6 +5433,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - * - * EEVDF: placement strategy #1 / #2 - */ -+#ifdef CONFIG_SCHED_BORE -+ if (se->vlag) -+#endif // CONFIG_SCHED_BORE - if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) { - struct sched_entity *curr = cfs_rq->curr; - unsigned long load; -@@ -5278,7 +5511,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - * on average, halfway through their slice, as such start tasks - * off with half a slice to ease into the competition. - */ -+#if !defined(CONFIG_SCHED_BORE) - if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL)) -+#else // CONFIG_SCHED_BORE -+ if (flags & sched_deadline_boost_mask) -+#endif // CONFIG_SCHED_BORE - vslice /= 2; - - /* -@@ -5492,7 +5729,7 @@ pick_next_entity(struct cfs_rq *cfs_rq) - cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) - return cfs_rq->next; - -- return pick_eevdf(cfs_rq); -+ return pick_eevdf(cfs_rq, NULL); - } - - static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); -@@ -6860,6 +7097,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) - bool was_sched_idle = sched_idle_rq(rq); - - util_est_dequeue(&rq->cfs, p); -+#ifdef CONFIG_SCHED_BORE -+ if (task_sleep) { -+ cfs_rq = cfs_rq_of(se); -+ if (cfs_rq->curr == se) -+ update_curr(cfs_rq); -+ restart_burst(se); -+ } -+#endif // CONFIG_SCHED_BORE - - for_each_sched_entity(se) { - cfs_rq = cfs_rq_of(se); -@@ -8425,10 +8670,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int - cfs_rq = cfs_rq_of(se); - update_curr(cfs_rq); - -- /* -- * XXX pick_eevdf(cfs_rq) != se ? -- */ -- if (pick_eevdf(cfs_rq) == pse) -+ if (pick_eevdf(cfs_rq, pse) == pse) - goto preempt; - - return; -@@ -8646,16 +8888,25 @@ static void yield_task_fair(struct rq *rq) - /* - * Are we the only task in the tree? - */ -+#if !defined(CONFIG_SCHED_BORE) - if (unlikely(rq->nr_running == 1)) - return; - - clear_buddies(cfs_rq, se); -+#endif // CONFIG_SCHED_BORE - - update_rq_clock(rq); - /* - * Update run-time statistics of the 'current'. - */ - update_curr(cfs_rq); -+#ifdef CONFIG_SCHED_BORE -+ restart_burst_rescale_deadline(se); -+ if (unlikely(rq->nr_running == 1)) -+ return; -+ -+ clear_buddies(cfs_rq, se); -+#endif // CONFIG_SCHED_BORE - /* - * Tell update_rq_clock() that we've just updated, - * so we don't do microscopic update in schedule() -@@ -12723,6 +12974,9 @@ static void task_fork_fair(struct task_struct *p) - curr = cfs_rq->curr; - if (curr) - update_curr(cfs_rq); -+#ifdef CONFIG_SCHED_BORE -+ update_burst_score(se); -+#endif // CONFIG_SCHED_BORE - place_entity(cfs_rq, se, ENQUEUE_INITIAL); - rq_unlock(rq, &rf); - } -diff --git a/kernel/sched/features.h b/kernel/sched/features.h -index 143f55df890b..3aad8900c35e 100644 ---- a/kernel/sched/features.h -+++ b/kernel/sched/features.h -@@ -5,8 +5,28 @@ - * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled. - */ - SCHED_FEAT(PLACE_LAG, true) -+/* -+ * Give new tasks half a slice to ease into the competition. -+ */ -+#if !defined(CONFIG_SCHED_BORE) - SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) --SCHED_FEAT(RUN_TO_PARITY, true) -+#endif // CONFIG_SCHED_BORE -+/* -+ * Inhibit (wakeup) preemption until the current task has exhausted its slice. -+ */ -+#ifdef CONFIG_SCHED_BORE -+SCHED_FEAT(RESPECT_SLICE, false) -+#else // !CONFIG_SCHED_BORE -+SCHED_FEAT(RESPECT_SLICE, true) -+#endif // CONFIG_SCHED_BORE -+/* -+ * Relax RESPECT_SLICE to allow preemption once current has reached 0-lag. -+ */ -+SCHED_FEAT(RUN_TO_PARITY, false) -+/* -+ * Allow tasks with a shorter slice to disregard RESPECT_SLICE -+ */ -+SCHED_FEAT(PREEMPT_SHORT, true) - - /* - * Prefer to schedule the task we woke last (assuming it failed -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 10c1caff5e06..5d845dbd0cf9 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -1969,7 +1969,11 @@ static inline void dirty_sched_domain_sysctl(int cpu) - } - #endif - -+#ifdef CONFIG_SCHED_BORE -+extern void sched_update_min_base_slice(void); -+#else // !CONFIG_SCHED_BORE - extern int sched_update_scaling(void); -+#endif // CONFIG_SCHED_BORE - - static inline const struct cpumask *task_user_cpus(struct task_struct *p) - { -@@ -2554,6 +2558,9 @@ extern const_debug unsigned int sysctl_sched_nr_migrate; - extern const_debug unsigned int sysctl_sched_migration_cost; - - extern unsigned int sysctl_sched_base_slice; -+#ifdef CONFIG_SCHED_BORE -+extern unsigned int sysctl_sched_min_base_slice; -+#endif // CONFIG_SCHED_BORE - - #ifdef CONFIG_SCHED_DEBUG - extern int sysctl_resched_latency_warn_ms; --- -2.46.0.rc0 diff --git a/patches/cachyos/0001-cachyos-base-all.patch b/patches/cachyos/0001-cachyos-base-all.patch deleted file mode 100644 index b5b57c9..0000000 --- a/patches/cachyos/0001-cachyos-base-all.patch +++ /dev/null @@ -1,53760 +0,0 @@ -From 35b09dfe053ff6308ab58d44175727d0d20f4ce0 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:23:07 +0200 -Subject: [PATCH 01/11] amd-pstate - -Signed-off-by: Peter Jung ---- - Documentation/admin-guide/pm/amd-pstate.rst | 18 +- - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/msr-index.h | 2 + - arch/x86/kernel/cpu/scattered.c | 1 + - drivers/cpufreq/Kconfig.x86 | 1 + - drivers/cpufreq/acpi-cpufreq.c | 3 +- - drivers/cpufreq/amd-pstate-ut.c | 12 +- - drivers/cpufreq/amd-pstate.c | 350 ++++++++++++++------ - drivers/cpufreq/amd-pstate.h | 2 + - drivers/cpufreq/cpufreq.c | 11 +- - 10 files changed, 281 insertions(+), 120 deletions(-) - -diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst -index 1e0d101b020a..d0324d44f548 100644 ---- a/Documentation/admin-guide/pm/amd-pstate.rst -+++ b/Documentation/admin-guide/pm/amd-pstate.rst -@@ -281,6 +281,22 @@ integer values defined between 0 to 255 when EPP feature is enabled by platform - firmware, if EPP feature is disabled, driver will ignore the written value - This attribute is read-write. - -+``boost`` -+The `boost` sysfs attribute provides control over the CPU core -+performance boost, allowing users to manage the maximum frequency limitation -+of the CPU. This attribute can be used to enable or disable the boost feature -+on individual CPUs. -+ -+When the boost feature is enabled, the CPU can dynamically increase its frequency -+beyond the base frequency, providing enhanced performance for demanding workloads. -+On the other hand, disabling the boost feature restricts the CPU to operate at the -+base frequency, which may be desirable in certain scenarios to prioritize power -+efficiency or manage temperature. -+ -+To manipulate the `boost` attribute, users can write a value of `0` to disable the -+boost or `1` to enable it, for the respective CPU using the sysfs path -+`/sys/devices/system/cpu/cpuX/cpufreq/boost`, where `X` represents the CPU number. -+ - Other performance and frequency values can be read back from - ``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`. - -@@ -406,7 +422,7 @@ control its functionality at the system level. They are located in the - ``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs. - - ``status`` -- Operation mode of the driver: "active", "passive" or "disable". -+ Operation mode of the driver: "active", "passive", "guided" or "disable". - - "active" - The driver is functional and in the ``active mode`` -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 3c7434329661..6c128d463a14 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -470,6 +470,7 @@ - #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ - #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ - #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ -+#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* "" AMD Fast CPPC */ - - /* - * BUG word(s) -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index e022e6eb766c..384739d592af 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -781,6 +781,8 @@ - #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) - #define MSR_K7_FID_VID_CTL 0xc0010041 - #define MSR_K7_FID_VID_STATUS 0xc0010042 -+#define MSR_K7_HWCR_CPB_DIS_BIT 25 -+#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) - - /* K6 MSRs */ - #define MSR_K6_WHCR 0xc0000082 -diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c -index af5aa2c754c2..c84c30188fdf 100644 ---- a/arch/x86/kernel/cpu/scattered.c -+++ b/arch/x86/kernel/cpu/scattered.c -@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, - { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, -+ { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, - { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, - { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, - { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, -diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 -index 438c9e75a04d..97c2d4f15d76 100644 ---- a/drivers/cpufreq/Kconfig.x86 -+++ b/drivers/cpufreq/Kconfig.x86 -@@ -71,6 +71,7 @@ config X86_AMD_PSTATE_DEFAULT_MODE - config X86_AMD_PSTATE_UT - tristate "selftest for AMD Processor P-State driver" - depends on X86 && ACPI_PROCESSOR -+ depends on X86_AMD_PSTATE - default n - help - This kernel module is used for testing. It's safe to say M here. -diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c -index 4ac3a35dcd98..f4f8587c4ea0 100644 ---- a/drivers/cpufreq/acpi-cpufreq.c -+++ b/drivers/cpufreq/acpi-cpufreq.c -@@ -50,8 +50,6 @@ enum { - #define AMD_MSR_RANGE (0x7) - #define HYGON_MSR_RANGE (0x7) - --#define MSR_K7_HWCR_CPB_DIS (1ULL << 25) -- - struct acpi_cpufreq_data { - unsigned int resume; - unsigned int cpu_feature; -@@ -139,6 +137,7 @@ static int set_boost(struct cpufreq_policy *policy, int val) - (void *)(long)val, 1); - pr_debug("CPU %*pbl: Core Boosting %s.\n", - cpumask_pr_args(policy->cpus), str_enabled_disabled(val)); -+ policy->boost_enabled = val; - - return 0; - } -diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c -index fc275d41d51e..66b73c308ce6 100644 ---- a/drivers/cpufreq/amd-pstate-ut.c -+++ b/drivers/cpufreq/amd-pstate-ut.c -@@ -202,6 +202,7 @@ static void amd_pstate_ut_check_freq(u32 index) - int cpu = 0; - struct cpufreq_policy *policy = NULL; - struct amd_cpudata *cpudata = NULL; -+ u32 nominal_freq_khz; - - for_each_possible_cpu(cpu) { - policy = cpufreq_cpu_get(cpu); -@@ -209,13 +210,14 @@ static void amd_pstate_ut_check_freq(u32 index) - break; - cpudata = policy->driver_data; - -- if (!((cpudata->max_freq >= cpudata->nominal_freq) && -- (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && -+ nominal_freq_khz = cpudata->nominal_freq*1000; -+ if (!((cpudata->max_freq >= nominal_freq_khz) && -+ (nominal_freq_khz > cpudata->lowest_nonlinear_freq) && - (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && - (cpudata->min_freq > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", -- __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, -+ __func__, cpu, cpudata->max_freq, nominal_freq_khz, - cpudata->lowest_nonlinear_freq, cpudata->min_freq); - goto skip_test; - } -@@ -229,13 +231,13 @@ static void amd_pstate_ut_check_freq(u32 index) - - if (cpudata->boost_supported) { - if ((policy->max == cpudata->max_freq) || -- (policy->max == cpudata->nominal_freq)) -+ (policy->max == nominal_freq_khz)) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", - __func__, cpu, policy->max, cpudata->max_freq, -- cpudata->nominal_freq); -+ nominal_freq_khz); - goto skip_test; - } - } else { -diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c -index 9ad62dbe8bfb..804fab4ebb26 100644 ---- a/drivers/cpufreq/amd-pstate.c -+++ b/drivers/cpufreq/amd-pstate.c -@@ -51,6 +51,7 @@ - - #define AMD_PSTATE_TRANSITION_LATENCY 20000 - #define AMD_PSTATE_TRANSITION_DELAY 1000 -+#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 - #define CPPC_HIGHEST_PERF_PERFORMANCE 196 - #define CPPC_HIGHEST_PERF_DEFAULT 166 - -@@ -85,15 +86,6 @@ struct quirk_entry { - u32 lowest_freq; - }; - --/* -- * TODO: We need more time to fine tune processors with shared memory solution -- * with community together. -- * -- * There are some performance drops on the CPU benchmarks which reports from -- * Suse. We are co-working with them to fine tune the shared memory solution. So -- * we disable it by default to go acpi-cpufreq on these processors and add a -- * module parameter to be able to enable it manually for debugging. -- */ - static struct cpufreq_driver *current_pstate_driver; - static struct cpufreq_driver amd_pstate_driver; - static struct cpufreq_driver amd_pstate_epp_driver; -@@ -157,7 +149,7 @@ static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) - * broken BIOS lack of nominal_freq and lowest_freq capabilities - * definition in ACPI tables - */ -- if (boot_cpu_has(X86_FEATURE_ZEN2)) { -+ if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { - quirks = dmi->driver_data; - pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); - return 1; -@@ -199,7 +191,7 @@ static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) - u64 epp; - int ret; - -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); -@@ -247,12 +239,32 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) - return index; - } - -+static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, -+ u32 des_perf, u32 max_perf, bool fast_switch) -+{ -+ if (fast_switch) -+ wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); -+ else -+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, -+ READ_ONCE(cpudata->cppc_req_cached)); -+} -+ -+DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); -+ -+static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, -+ u32 min_perf, u32 des_perf, -+ u32 max_perf, bool fast_switch) -+{ -+ static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, -+ max_perf, fast_switch); -+} -+ - static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) - { - int ret; - struct cppc_perf_ctrls perf_ctrls; - -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~GENMASK_ULL(31, 24); -@@ -263,6 +275,9 @@ static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) - if (!ret) - cpudata->epp_cached = epp; - } else { -+ amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, -+ cpudata->max_limit_perf, false); -+ - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { -@@ -281,10 +296,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int epp = -EINVAL; - int ret; - -- if (!pref_index) { -- pr_debug("EPP pref_index is invalid\n"); -- return -EINVAL; -- } -+ if (!pref_index) -+ epp = cpudata->epp_default; - - if (epp == -EINVAL) - epp = epp_values[pref_index]; -@@ -452,16 +465,6 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) - return static_call(amd_pstate_init_perf)(cpudata); - } - --static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, -- u32 des_perf, u32 max_perf, bool fast_switch) --{ -- if (fast_switch) -- wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); -- else -- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, -- READ_ONCE(cpudata->cppc_req_cached)); --} -- - static void cppc_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) -@@ -475,16 +478,6 @@ static void cppc_update_perf(struct amd_cpudata *cpudata, - cppc_set_perf(cpudata->cpu, &perf_ctrls); - } - --DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); -- --static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, -- u32 min_perf, u32 des_perf, -- u32 max_perf, bool fast_switch) --{ -- static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, -- max_perf, fast_switch); --} -- - static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) - { - u64 aperf, mperf, tsc; -@@ -521,7 +514,10 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) - static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) - { -+ unsigned long max_freq; -+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u64 prev = READ_ONCE(cpudata->cppc_req_cached); -+ u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); - u64 value = prev; - - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, -@@ -530,6 +526,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - cpudata->max_limit_perf); - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - -+ max_freq = READ_ONCE(cpudata->max_limit_freq); -+ policy->cur = div_u64(des_perf * max_freq, max_perf); -+ - if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { - min_perf = des_perf; - des_perf = 0; -@@ -541,6 +540,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - -+ /* limit the max perf when core performance boost feature is disabled */ -+ if (!cpudata->boost_supported) -+ max_perf = min_t(unsigned long, nominal_perf, max_perf); -+ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - -@@ -651,10 +654,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu, - unsigned long capacity) - { - unsigned long max_perf, min_perf, des_perf, -- cap_perf, lowest_nonlinear_perf, max_freq; -+ cap_perf, lowest_nonlinear_perf; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; -- unsigned int target_freq; - - if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) - amd_pstate_update_min_max_limit(policy); -@@ -662,7 +664,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, - - cap_perf = READ_ONCE(cpudata->highest_perf); - lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); -- max_freq = READ_ONCE(cpudata->max_freq); - - des_perf = cap_perf; - if (target_perf < capacity) -@@ -680,51 +681,111 @@ static void amd_pstate_adjust_perf(unsigned int cpu, - max_perf = min_perf; - - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); -- target_freq = div_u64(des_perf * max_freq, max_perf); -- policy->cur = target_freq; - - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, - policy->governor->flags); - cpufreq_cpu_put(policy); - } - --static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) -+static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) - { - struct amd_cpudata *cpudata = policy->driver_data; -+ struct cppc_perf_ctrls perf_ctrls; -+ u32 highest_perf, nominal_perf, nominal_freq, max_freq; - int ret; - -- if (!cpudata->boost_supported) { -- pr_err("Boost mode is not supported by this processor or SBIOS\n"); -- return -EINVAL; -+ highest_perf = READ_ONCE(cpudata->highest_perf); -+ nominal_perf = READ_ONCE(cpudata->nominal_perf); -+ nominal_freq = READ_ONCE(cpudata->nominal_freq); -+ max_freq = READ_ONCE(cpudata->max_freq); -+ -+ if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ u64 value = READ_ONCE(cpudata->cppc_req_cached); -+ -+ value &= ~GENMASK_ULL(7, 0); -+ value |= on ? highest_perf : nominal_perf; -+ WRITE_ONCE(cpudata->cppc_req_cached, value); -+ -+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); -+ } else { -+ perf_ctrls.max_perf = on ? highest_perf : nominal_perf; -+ ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); -+ if (ret) { -+ cpufreq_cpu_release(policy); -+ pr_debug("Failed to set max perf on CPU:%d. ret:%d\n", -+ cpudata->cpu, ret); -+ return ret; -+ } - } - -- if (state) -- policy->cpuinfo.max_freq = cpudata->max_freq; -- else -- policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000; -+ if (on) -+ policy->cpuinfo.max_freq = max_freq; -+ else if (policy->cpuinfo.max_freq > nominal_freq * 1000) -+ policy->cpuinfo.max_freq = nominal_freq * 1000; - - policy->max = policy->cpuinfo.max_freq; - -- ret = freq_qos_update_request(&cpudata->req[1], -- policy->cpuinfo.max_freq); -- if (ret < 0) -- return ret; -+ if (cppc_state == AMD_PSTATE_PASSIVE) { -+ ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); -+ if (ret < 0) -+ pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu); -+ } - -- return 0; -+ return ret < 0 ? ret : 0; - } - --static void amd_pstate_boost_init(struct amd_cpudata *cpudata) -+static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) - { -- u32 highest_perf, nominal_perf; -+ struct amd_cpudata *cpudata = policy->driver_data; -+ int ret; - -- highest_perf = READ_ONCE(cpudata->highest_perf); -- nominal_perf = READ_ONCE(cpudata->nominal_perf); -+ if (!cpudata->boost_supported) { -+ pr_err("Boost mode is not supported by this processor or SBIOS\n"); -+ return -EOPNOTSUPP; -+ } -+ mutex_lock(&amd_pstate_driver_lock); -+ ret = amd_pstate_cpu_boost_update(policy, state); -+ WRITE_ONCE(cpudata->boost_state, !ret ? state : false); -+ policy->boost_enabled = !ret ? state : false; -+ refresh_frequency_limits(policy); -+ mutex_unlock(&amd_pstate_driver_lock); - -- if (highest_perf <= nominal_perf) -- return; -+ return ret; -+} -+ -+static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) -+{ -+ u64 boost_val; -+ int ret = -1; -+ -+ /* -+ * If platform has no CPB support or disable it, initialize current driver -+ * boost_enabled state to be false, it is not an error for cpufreq core to handle. -+ */ -+ if (!cpu_feature_enabled(X86_FEATURE_CPB)) { -+ pr_debug_once("Boost CPB capabilities not present in the processor\n"); -+ ret = 0; -+ goto exit_err; -+ } - -- cpudata->boost_supported = true; -+ /* at least one CPU supports CPB, even if others fail later on to set up */ - current_pstate_driver->boost_enabled = true; -+ -+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); -+ if (ret) { -+ pr_err_once("failed to read initial CPU boost state!\n"); -+ ret = -EIO; -+ goto exit_err; -+ } -+ -+ if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) -+ cpudata->boost_supported = true; -+ -+ return 0; -+ -+exit_err: -+ cpudata->boost_supported = false; -+ return ret; - } - - static void amd_perf_ctl_reset(unsigned int cpu) -@@ -753,7 +814,7 @@ static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) - { - int ret; - -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 cap1; - - ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); -@@ -849,8 +910,12 @@ static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) - u32 transition_delay_ns; - - transition_delay_ns = cppc_get_transition_latency(cpu); -- if (transition_delay_ns == CPUFREQ_ETERNAL) -- return AMD_PSTATE_TRANSITION_DELAY; -+ if (transition_delay_ns == CPUFREQ_ETERNAL) { -+ if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC)) -+ return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; -+ else -+ return AMD_PSTATE_TRANSITION_DELAY; -+ } - - return transition_delay_ns / NSEC_PER_USEC; - } -@@ -921,12 +986,30 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - WRITE_ONCE(cpudata->max_freq, max_freq); - -+ /** -+ * Below values need to be initialized correctly, otherwise driver will fail to load -+ * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf -+ * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] -+ * Check _CPC in ACPI table objects if any values are incorrect -+ */ -+ if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { -+ pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", -+ min_freq, max_freq, nominal_freq * 1000); -+ return -EINVAL; -+ } -+ -+ if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { -+ pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", -+ lowest_nonlinear_freq, min_freq, nominal_freq * 1000); -+ return -EINVAL; -+ } -+ - return 0; - } - - static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - { -- int min_freq, max_freq, nominal_freq, ret; -+ int min_freq, max_freq, ret; - struct device *dev; - struct amd_cpudata *cpudata; - -@@ -955,18 +1038,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - if (ret) - goto free_cpudata1; - -+ ret = amd_pstate_init_boost_support(cpudata); -+ if (ret) -+ goto free_cpudata1; -+ - min_freq = READ_ONCE(cpudata->min_freq); - max_freq = READ_ONCE(cpudata->max_freq); -- nominal_freq = READ_ONCE(cpudata->nominal_freq); -- -- if (min_freq <= 0 || max_freq <= 0 || -- nominal_freq <= 0 || min_freq > max_freq) { -- dev_err(dev, -- "min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n", -- min_freq, max_freq, nominal_freq); -- ret = -EINVAL; -- goto free_cpudata1; -- } - - policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); - policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); -@@ -977,10 +1054,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; - -+ policy->boost_enabled = READ_ONCE(cpudata->boost_supported); -+ - /* It will be updated by governor */ - policy->cur = policy->cpuinfo.min_freq; - -- if (boot_cpu_has(X86_FEATURE_CPPC)) -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) - policy->fast_switch_possible = true; - - ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], -@@ -1002,7 +1081,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) - - policy->driver_data = cpudata; - -- amd_pstate_boost_init(cpudata); - if (!current_pstate_driver->adjust_perf) - current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; - -@@ -1213,7 +1291,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode) - - cppc_state = mode; - -- if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) -+ if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) - return 0; - - for_each_present_cpu(cpu) { -@@ -1386,7 +1464,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) - - static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - { -- int min_freq, max_freq, nominal_freq, ret; -+ int min_freq, max_freq, ret; - struct amd_cpudata *cpudata; - struct device *dev; - u64 value; -@@ -1417,17 +1495,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - if (ret) - goto free_cpudata1; - -+ ret = amd_pstate_init_boost_support(cpudata); -+ if (ret) -+ goto free_cpudata1; -+ - min_freq = READ_ONCE(cpudata->min_freq); - max_freq = READ_ONCE(cpudata->max_freq); -- nominal_freq = READ_ONCE(cpudata->nominal_freq); -- if (min_freq <= 0 || max_freq <= 0 || -- nominal_freq <= 0 || min_freq > max_freq) { -- dev_err(dev, -- "min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n", -- min_freq, max_freq, nominal_freq); -- ret = -EINVAL; -- goto free_cpudata1; -- } - - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; -@@ -1436,11 +1509,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - - policy->driver_data = cpudata; - -- cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); -+ cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); - - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - -+ policy->boost_enabled = READ_ONCE(cpudata->boost_supported); -+ - /* - * Set the policy to provide a valid fallback value in case - * the default cpufreq governor is neither powersave nor performance. -@@ -1451,7 +1526,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - else - policy->policy = CPUFREQ_POLICY_POWERSAVE; - -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); - if (ret) - return ret; -@@ -1462,7 +1537,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) - return ret; - WRITE_ONCE(cpudata->cppc_cap1_cached, value); - } -- amd_pstate_boost_init(cpudata); - - return 0; - -@@ -1541,7 +1615,7 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) - epp = 0; - - /* Set initial EPP value */ -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - } -@@ -1564,6 +1638,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) - - amd_pstate_epp_update_limit(policy); - -+ /* -+ * policy->cur is never updated with the amd_pstate_epp driver, but it -+ * is used as a stale frequency value. So, keep it within limits. -+ */ -+ policy->cur = policy->min; -+ - return 0; - } - -@@ -1580,7 +1660,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) - value = READ_ONCE(cpudata->cppc_req_cached); - max_perf = READ_ONCE(cpudata->highest_perf); - -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; -@@ -1614,7 +1694,7 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) - value = READ_ONCE(cpudata->cppc_req_cached); - - mutex_lock(&amd_pstate_limits_lock); -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - - /* Set max perf same as min perf */ -@@ -1718,6 +1798,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { - .suspend = amd_pstate_epp_suspend, - .resume = amd_pstate_epp_resume, - .update_limits = amd_pstate_update_limits, -+ .set_boost = amd_pstate_set_boost, - .name = "amd-pstate-epp", - .attr = amd_pstate_epp_attr, - }; -@@ -1741,6 +1822,46 @@ static int __init amd_pstate_set_driver(int mode_idx) - return -EINVAL; - } - -+/** -+ * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. -+ * show the debug message that helps to check if the CPU has CPPC support for loading issue. -+ */ -+static bool amd_cppc_supported(void) -+{ -+ struct cpuinfo_x86 *c = &cpu_data(0); -+ bool warn = false; -+ -+ if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { -+ pr_debug_once("CPPC feature is not supported by the processor\n"); -+ return false; -+ } -+ -+ /* -+ * If the CPPC feature is disabled in the BIOS for processors that support MSR-based CPPC, -+ * the AMD Pstate driver may not function correctly. -+ * Check the CPPC flag and display a warning message if the platform supports CPPC. -+ * Note: below checking code will not abort the driver registeration process because of -+ * the code is added for debugging purposes. -+ */ -+ if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_ZEN1) || cpu_feature_enabled(X86_FEATURE_ZEN2)) { -+ if (c->x86_model > 0x60 && c->x86_model < 0xaf) -+ warn = true; -+ } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || cpu_feature_enabled(X86_FEATURE_ZEN4)) { -+ if ((c->x86_model > 0x10 && c->x86_model < 0x1F) || -+ (c->x86_model > 0x40 && c->x86_model < 0xaf)) -+ warn = true; -+ } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { -+ warn = true; -+ } -+ } -+ -+ if (warn) -+ pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" -+ "Please enable it if your BIOS has the CPPC option.\n"); -+ return true; -+} -+ - static int __init amd_pstate_init(void) - { - struct device *dev_root; -@@ -1749,6 +1870,11 @@ static int __init amd_pstate_init(void) - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return -ENODEV; - -+ /* show debug message only if CPPC is not supported */ -+ if (!amd_cppc_supported()) -+ return -EOPNOTSUPP; -+ -+ /* show warning message when BIOS broken or ACPI disabled */ - if (!acpi_cpc_valid()) { - pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); - return -ENODEV; -@@ -1763,35 +1889,43 @@ static int __init amd_pstate_init(void) - /* check if this machine need CPPC quirks */ - dmi_check_system(amd_pstate_quirks_table); - -- switch (cppc_state) { -- case AMD_PSTATE_UNDEFINED: -+ /* -+ * determine the driver mode from the command line or kernel config. -+ * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. -+ * command line options will override the kernel config settings. -+ */ -+ -+ if (cppc_state == AMD_PSTATE_UNDEFINED) { - /* Disable on the following configs by default: - * 1. Undefined platforms - * 2. Server platforms -- * 3. Shared memory designs - */ - if (amd_pstate_acpi_pm_profile_undefined() || -- amd_pstate_acpi_pm_profile_server() || -- !boot_cpu_has(X86_FEATURE_CPPC)) { -+ amd_pstate_acpi_pm_profile_server()) { - pr_info("driver load is disabled, boot with specific mode to enable this\n"); - return -ENODEV; - } -- ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); -- if (ret) -- return ret; -- break; -+ /* get driver mode from kernel config option [1:4] */ -+ cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; -+ } -+ -+ switch (cppc_state) { - case AMD_PSTATE_DISABLE: -+ pr_info("driver load is disabled, boot with specific mode to enable this\n"); - return -ENODEV; - case AMD_PSTATE_PASSIVE: - case AMD_PSTATE_ACTIVE: - case AMD_PSTATE_GUIDED: -+ ret = amd_pstate_set_driver(cppc_state); -+ if (ret) -+ return ret; - break; - default: - return -EINVAL; - } - - /* capability check */ -- if (boot_cpu_has(X86_FEATURE_CPPC)) { -+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - pr_debug("AMD CPPC MSR based functionality is supported\n"); - if (cppc_state != AMD_PSTATE_ACTIVE) - current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; -@@ -1805,13 +1939,15 @@ static int __init amd_pstate_init(void) - /* enable amd pstate feature */ - ret = amd_pstate_enable(true); - if (ret) { -- pr_err("failed to enable with return %d\n", ret); -+ pr_err("failed to enable driver mode(%d)\n", cppc_state); - return ret; - } - - ret = cpufreq_register_driver(current_pstate_driver); -- if (ret) -+ if (ret) { - pr_err("failed to register with return %d\n", ret); -+ goto disable_driver; -+ } - - dev_root = bus_get_dev_root(&cpu_subsys); - if (dev_root) { -@@ -1827,6 +1963,8 @@ static int __init amd_pstate_init(void) - - global_attr_free: - cpufreq_unregister_driver(current_pstate_driver); -+disable_driver: -+ amd_pstate_enable(false); - return ret; - } - device_initcall(amd_pstate_init); -diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h -index e6a28e7f4dbf..cc8bb2bc325a 100644 ---- a/drivers/cpufreq/amd-pstate.h -+++ b/drivers/cpufreq/amd-pstate.h -@@ -99,6 +99,8 @@ struct amd_cpudata { - u32 policy; - u64 cppc_cap1_cached; - bool suspended; -+ s16 epp_default; -+ bool boost_state; - }; - - #endif /* _LINUX_AMD_PSTATE_H */ -diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c -index 9e5060b27864..270ea04fb616 100644 ---- a/drivers/cpufreq/cpufreq.c -+++ b/drivers/cpufreq/cpufreq.c -@@ -614,10 +614,9 @@ static ssize_t show_boost(struct kobject *kobj, - static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) - { -- int ret, enable; -+ bool enable; - -- ret = sscanf(buf, "%d", &enable); -- if (ret != 1 || enable < 0 || enable > 1) -+ if (kstrtobool(buf, &enable)) - return -EINVAL; - - if (cpufreq_boost_trigger_state(enable)) { -@@ -641,10 +640,10 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) - static ssize_t store_local_boost(struct cpufreq_policy *policy, - const char *buf, size_t count) - { -- int ret, enable; -+ int ret; -+ bool enable; - -- ret = kstrtoint(buf, 10, &enable); -- if (ret || enable < 0 || enable > 1) -+ if (kstrtobool(buf, &enable)) - return -EINVAL; - - if (!cpufreq_driver->boost_enabled) --- -2.46.0.rc1 - -From fdecce0ee8a06092cd381604a8f4f26ef0c9561a Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:23:19 +0200 -Subject: [PATCH 02/11] bbr3 - -Signed-off-by: Peter Jung ---- - include/linux/tcp.h | 4 +- - include/net/inet_connection_sock.h | 4 +- - include/net/tcp.h | 72 +- - include/uapi/linux/inet_diag.h | 23 + - include/uapi/linux/rtnetlink.h | 4 +- - include/uapi/linux/tcp.h | 1 + - net/ipv4/Kconfig | 21 +- - net/ipv4/bpf_tcp_ca.c | 9 +- - net/ipv4/tcp.c | 3 + - net/ipv4/tcp_bbr.c | 2230 +++++++++++++++++++++------- - net/ipv4/tcp_cong.c | 1 + - net/ipv4/tcp_input.c | 40 +- - net/ipv4/tcp_minisocks.c | 2 + - net/ipv4/tcp_output.c | 48 +- - net/ipv4/tcp_rate.c | 30 +- - net/ipv4/tcp_timer.c | 1 + - 16 files changed, 1940 insertions(+), 553 deletions(-) - -diff --git a/include/linux/tcp.h b/include/linux/tcp.h -index 6a5e08b937b3..27aab715490e 100644 ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -369,7 +369,9 @@ struct tcp_sock { - u8 compressed_ack; - u8 dup_ack_counter:2, - tlp_retrans:1, /* TLP is a retransmission */ -- unused:5; -+ fast_ack_mode:2, /* which fast ack mode ? */ -+ tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */ -+ unused:2; - u8 thin_lto : 1,/* Use linear timeouts for thin streams */ - fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ - fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ -diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h -index c0deaafebfdc..d53f042d936e 100644 ---- a/include/net/inet_connection_sock.h -+++ b/include/net/inet_connection_sock.h -@@ -137,8 +137,8 @@ struct inet_connection_sock { - u32 icsk_probes_tstamp; - u32 icsk_user_timeout; - -- u64 icsk_ca_priv[104 / sizeof(u64)]; --#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) -+#define ICSK_CA_PRIV_SIZE (144) -+ u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)]; - }; - - #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -diff --git a/include/net/tcp.h b/include/net/tcp.h -index 060e95b331a2..953244eefe7d 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -375,6 +375,8 @@ static inline void tcp_dec_quickack_mode(struct sock *sk) - #define TCP_ECN_QUEUE_CWR 2 - #define TCP_ECN_DEMAND_CWR 4 - #define TCP_ECN_SEEN 8 -+#define TCP_ECN_LOW 16 -+#define TCP_ECN_ECT_PERMANENT 32 - - enum tcp_tw_status { - TCP_TW_SUCCESS = 0, -@@ -778,6 +780,15 @@ static inline void tcp_fast_path_check(struct sock *sk) - - u32 tcp_delack_max(const struct sock *sk); - -+static inline void tcp_set_ecn_low_from_dst(struct sock *sk, -+ const struct dst_entry *dst) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW)) -+ tp->ecn_flags |= TCP_ECN_LOW; -+} -+ - /* Compute the actual rto_min value */ - static inline u32 tcp_rto_min(const struct sock *sk) - { -@@ -883,6 +894,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) - return max_t(s64, t1 - t0, 0); - } - -+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) -+{ -+ return max_t(s32, t1 - t0, 0); -+} -+ - /* provide the departure time in us unit */ - static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) - { -@@ -972,9 +988,14 @@ struct tcp_skb_cb { - /* pkts S/ACKed so far upon tx of skb, incl retrans: */ - __u32 delivered; - /* start of send pipeline phase */ -- u64 first_tx_mstamp; -+ u32 first_tx_mstamp; - /* when we reached the "delivered" count */ -- u64 delivered_mstamp; -+ u32 delivered_mstamp; -+#define TCPCB_IN_FLIGHT_BITS 20 -+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) -+ u32 in_flight:20, /* packets in flight at transmit */ -+ unused2:12; -+ u32 lost; /* packets lost so far upon tx of skb */ - } tx; /* only used for outgoing skbs */ - union { - struct inet_skb_parm h4; -@@ -1078,6 +1099,7 @@ enum tcp_ca_event { - CA_EVENT_LOSS, /* loss timeout */ - CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ - CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ -+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */ - }; - - /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ -@@ -1100,7 +1122,11 @@ enum tcp_ca_ack_event_flags { - #define TCP_CONG_NON_RESTRICTED 0x1 - /* Requires ECN/ECT set on all packets */ - #define TCP_CONG_NEEDS_ECN 0x2 --#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) -+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ -+#define TCP_CONG_WANTS_CE_EVENTS 0x4 -+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ -+ TCP_CONG_NEEDS_ECN | \ -+ TCP_CONG_WANTS_CE_EVENTS) - - union tcp_cc_info; - -@@ -1120,10 +1146,13 @@ struct ack_sample { - */ - struct rate_sample { - u64 prior_mstamp; /* starting timestamp for interval */ -+ u32 prior_lost; /* tp->lost at "prior_mstamp" */ - u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ - u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ -+ u32 tx_in_flight; /* packets in flight at starting timestamp */ -+ s32 lost; /* number of packets lost over interval */ - s32 delivered; /* number of packets delivered over interval */ -- s32 delivered_ce; /* number of packets delivered w/ CE marks*/ -+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */ - long interval_us; /* time for tp->delivered to incr "delivered" */ - u32 snd_interval_us; /* snd interval for delivered packets */ - u32 rcv_interval_us; /* rcv interval for delivered packets */ -@@ -1134,7 +1163,9 @@ struct rate_sample { - u32 last_end_seq; /* end_seq of most recently ACKed packet */ - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ -+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ -+ bool is_ece; /* did this ACK have ECN marked? */ - }; - - struct tcp_congestion_ops { -@@ -1158,8 +1189,11 @@ struct tcp_congestion_ops { - /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - -- /* override sysctl_tcp_min_tso_segs */ -- u32 (*min_tso_segs)(struct sock *sk); -+ /* pick target number of segments per TSO/GSO skb (optional): */ -+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); -+ -+ /* react to a specific lost skb (optional) */ -+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); - - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) -@@ -1225,6 +1259,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) - } - #endif - -+static inline bool tcp_ca_wants_ce_events(const struct sock *sk) -+{ -+ const struct inet_connection_sock *icsk = inet_csk(sk); -+ -+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | -+ TCP_CONG_WANTS_CE_EVENTS); -+} -+ - static inline bool tcp_ca_needs_ecn(const struct sock *sk) - { - const struct inet_connection_sock *icsk = inet_csk(sk); -@@ -1244,6 +1286,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) - void tcp_set_ca_state(struct sock *sk, const u8 ca_state); - - /* From tcp_rate.c */ -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - struct rate_sample *rs); -@@ -1256,6 +1299,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) - return t1 > t2 || (t1 == t2 && after(seq1, seq2)); - } - -+/* If a retransmit failed due to local qdisc congestion or other local issues, -+ * then we may have called tcp_set_skb_tso_segs() to increase the number of -+ * segments in the skb without increasing the tx.in_flight. In all other cases, -+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We -+ * do not have the state to know whether a retransmit failed due to local qdisc -+ * congestion or other local issues, so to avoid spurious warnings we consider -+ * that any skb marked lost may have suffered that fate. -+ */ -+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount, -+ u32 skb_sacked_flags, -+ u32 tx_in_flight) -+{ -+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST); -+} -+ - /* These functions determine how the current flow behaves in respect of SACK - * handling. SACK is negotiated with the peer, and therefore it can vary - * between different flows. -@@ -2418,7 +2476,7 @@ struct tcp_plb_state { - u8 consec_cong_rounds:5, /* consecutive congested rounds */ - unused:3; - u32 pause_until; /* jiffies32 when PLB can resume rerouting */ --}; -+} __attribute__ ((__packed__)); - - static inline void tcp_plb_init(const struct sock *sk, - struct tcp_plb_state *plb) -diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h -index 50655de04c9b..82f8bd8f0d16 100644 ---- a/include/uapi/linux/inet_diag.h -+++ b/include/uapi/linux/inet_diag.h -@@ -229,6 +229,29 @@ struct tcp_bbr_info { - __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ - __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ - __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ -+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ -+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ -+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ -+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ -+ __u8 bbr_mode; /* current bbr_mode in state machine */ -+ __u8 bbr_phase; /* current state machine phase */ -+ __u8 unused1; /* alignment padding; not used yet */ -+ __u8 bbr_version; /* BBR algorithm version */ -+ __u32 bbr_inflight_lo; /* lower short-term data volume bound */ -+ __u32 bbr_inflight_hi; /* higher long-term data volume bound */ -+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ -+}; -+ -+/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */ -+enum tcp_bbr_phase { -+ BBR_PHASE_INVALID = 0, -+ BBR_PHASE_STARTUP = 1, -+ BBR_PHASE_DRAIN = 2, -+ BBR_PHASE_PROBE_RTT = 3, -+ BBR_PHASE_PROBE_BW_UP = 4, -+ BBR_PHASE_PROBE_BW_DOWN = 5, -+ BBR_PHASE_PROBE_BW_CRUISE = 6, -+ BBR_PHASE_PROBE_BW_REFILL = 7, - }; - - union tcp_cc_info { -diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h -index 3b687d20c9ed..a7c30c243b54 100644 ---- a/include/uapi/linux/rtnetlink.h -+++ b/include/uapi/linux/rtnetlink.h -@@ -507,12 +507,14 @@ enum { - #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ - #define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ - #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) -+#define RTAX_FEATURE_ECN_LOW (1 << 5) - - #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ - RTAX_FEATURE_SACK | \ - RTAX_FEATURE_TIMESTAMP | \ - RTAX_FEATURE_ALLFRAG | \ -- RTAX_FEATURE_TCP_USEC_TS) -+ RTAX_FEATURE_TCP_USEC_TS | \ -+ RTAX_FEATURE_ECN_LOW) - - struct rta_session { - __u8 proto; -diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h -index dbf896f3146c..4702cd2f1ffc 100644 ---- a/include/uapi/linux/tcp.h -+++ b/include/uapi/linux/tcp.h -@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail { - #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ - #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ - #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ -+#define TCPI_OPT_ECN_LOW 128 /* Low-latency ECN configured at init */ - - /* - * Sender's congestion state indicating normal or abnormal situations -diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig -index 8e94ed7c56a0..50dc9970cad2 100644 ---- a/net/ipv4/Kconfig -+++ b/net/ipv4/Kconfig -@@ -668,15 +668,18 @@ config TCP_CONG_BBR - default n - help - -- BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to -- maximize network utilization and minimize queues. It builds an explicit -- model of the bottleneck delivery rate and path round-trip propagation -- delay. It tolerates packet loss and delay unrelated to congestion. It -- can operate over LAN, WAN, cellular, wifi, or cable modem links. It can -- coexist with flows that use loss-based congestion control, and can -- operate with shallow buffers, deep buffers, bufferbloat, policers, or -- AQM schemes that do not provide a delay signal. It requires the fq -- ("Fair Queue") pacing packet scheduler. -+ BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a -+ model-based congestion control algorithm that aims to maximize -+ network utilization, keep queues and retransmit rates low, and to be -+ able to coexist with Reno/CUBIC in common scenarios. It builds an -+ explicit model of the network path. It tolerates a targeted degree -+ of random packet loss and delay. It can operate over LAN, WAN, -+ cellular, wifi, or cable modem links, and can use shallow-threshold -+ ECN signals. It can coexist to some degree with flows that use -+ loss-based congestion control, and can operate with shallow buffers, -+ deep buffers, bufferbloat, policers, or AQM schemes that do not -+ provide a delay signal. It requires pacing, using either TCP internal -+ pacing or the fq ("Fair Queue") pacing packet scheduler. - - choice - prompt "Default TCP congestion control" -diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c -index 18227757ec0c..f180befc28bd 100644 ---- a/net/ipv4/bpf_tcp_ca.c -+++ b/net/ipv4/bpf_tcp_ca.c -@@ -305,11 +305,15 @@ static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *samp - { - } - --static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) -+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now) - { - return 0; - } - -+static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb) -+{ -+} -+ - static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag, - const struct rate_sample *rs) - { -@@ -340,7 +344,8 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { - .cwnd_event = bpf_tcp_ca_cwnd_event, - .in_ack_event = bpf_tcp_ca_in_ack_event, - .pkts_acked = bpf_tcp_ca_pkts_acked, -- .min_tso_segs = bpf_tcp_ca_min_tso_segs, -+ .tso_segs = bpf_tcp_ca_tso_segs, -+ .skb_marked_lost = bpf_tcp_ca_skb_marked_lost, - .cong_control = bpf_tcp_ca_cong_control, - .undo_cwnd = bpf_tcp_ca_undo_cwnd, - .sndbuf_expand = bpf_tcp_ca_sndbuf_expand, -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index e6790ea74877..b63e27eba536 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -3120,6 +3120,7 @@ int tcp_disconnect(struct sock *sk, int flags) - tp->rx_opt.dsack = 0; - tp->rx_opt.num_sacks = 0; - tp->rcv_ooopack = 0; -+ tp->fast_ack_mode = 0; - - - /* Clean up fastopen related fields */ -@@ -3846,6 +3847,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) - info->tcpi_options |= TCPI_OPT_ECN; - if (tp->ecn_flags & TCP_ECN_SEEN) - info->tcpi_options |= TCPI_OPT_ECN_SEEN; -+ if (tp->ecn_flags & TCP_ECN_LOW) -+ info->tcpi_options |= TCPI_OPT_ECN_LOW; - if (tp->syn_data_acked) - info->tcpi_options |= TCPI_OPT_SYN_DATA; - if (tp->tcp_usec_ts) -diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c -index 760941e55153..a180fa648d5e 100644 ---- a/net/ipv4/tcp_bbr.c -+++ b/net/ipv4/tcp_bbr.c -@@ -1,18 +1,19 @@ --/* Bottleneck Bandwidth and RTT (BBR) congestion control -+/* BBR (Bottleneck Bandwidth and RTT) congestion control - * -- * BBR congestion control computes the sending rate based on the delivery -- * rate (throughput) estimated from ACKs. In a nutshell: -+ * BBR is a model-based congestion control algorithm that aims for low queues, -+ * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the -+ * network path, it uses measurements of bandwidth and RTT, as well as (if they -+ * occur) packet loss and/or shallow-threshold ECN signals. Note that although -+ * it can use ECN or loss signals explicitly, it does not require either; it -+ * can bound its in-flight data based on its estimate of the BDP. - * -- * On each ACK, update our model of the network path: -- * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) -- * min_rtt = windowed_min(rtt, 10 seconds) -- * pacing_rate = pacing_gain * bottleneck_bandwidth -- * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) -- * -- * The core algorithm does not react directly to packet losses or delays, -- * although BBR may adjust the size of next send per ACK when loss is -- * observed, or adjust the sending rate if it estimates there is a -- * traffic policer, in order to keep the drop rate reasonable. -+ * The model has both higher and lower bounds for the operating range: -+ * lo: bw_lo, inflight_lo: conservative short-term lower bound -+ * hi: bw_hi, inflight_hi: robust long-term upper bound -+ * The bandwidth-probing time scale is (a) extended dynamically based on -+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by -+ * an interactive wall-clock time-scale to be more scalable and responsive -+ * than Reno and CUBIC. - * - * Here is a state transition diagram for BBR: - * -@@ -65,6 +66,13 @@ - #include - #include - -+#include -+#include "tcp_dctcp.h" -+ -+#define BBR_VERSION 3 -+ -+#define bbr_param(sk,name) (bbr_ ## name) -+ - /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth - * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. - * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. -@@ -85,36 +93,41 @@ enum bbr_mode { - BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ - }; - -+/* How does the incoming ACK stream relate to our bandwidth probing? */ -+enum bbr_ack_phase { -+ BBR_ACKS_INIT, /* not probing; not getting probe feedback */ -+ BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ -+ BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ -+ BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ -+ BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ -+}; -+ - /* BBR congestion control block */ - struct bbr { - u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ - u32 min_rtt_stamp; /* timestamp of min_rtt_us */ - u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ -- struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ -- u32 rtt_cnt; /* count of packet-timed rounds elapsed */ -+ u32 probe_rtt_min_us; /* min RTT in probe_rtt_win_ms win */ -+ u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ - u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ - u64 cycle_mstamp; /* time of this cycle phase start */ -- u32 mode:3, /* current bbr_mode in state machine */ -+ u32 mode:2, /* current bbr_mode in state machine */ - prev_ca_state:3, /* CA state on previous ACK */ -- packet_conservation:1, /* use packet conservation? */ - round_start:1, /* start of packet-timed tx->ack round? */ -+ ce_state:1, /* If most recent data has CE bit set */ -+ bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ -+ try_fast_path:1, /* can we take fast path? */ - idle_restart:1, /* restarting after idle? */ - probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ -- unused:13, -- lt_is_sampling:1, /* taking long-term ("LT") samples now? */ -- lt_rtt_cnt:7, /* round trips in long-term interval */ -- lt_use_bw:1; /* use lt_bw as our bw estimate? */ -- u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ -- u32 lt_last_delivered; /* LT intvl start: tp->delivered */ -- u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ -- u32 lt_last_lost; /* LT intvl start: tp->lost */ -+ init_cwnd:7, /* initial cwnd */ -+ unused_1:10; - u32 pacing_gain:10, /* current gain for setting pacing rate */ - cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_reached:1, /* reached full bw in Startup? */ - full_bw_cnt:2, /* number of rounds without large bw gains */ -- cycle_idx:3, /* current index in pacing_gain cycle array */ -+ cycle_idx:2, /* current index in pacing_gain cycle array */ - has_seen_rtt:1, /* have we seen an RTT sample yet? */ -- unused_b:5; -+ unused_2:6; - u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ - u32 full_bw; /* recent bw, to estimate if pipe is full */ - -@@ -124,19 +137,67 @@ struct bbr { - u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ - extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ - extra_acked_win_idx:1, /* current index in extra_acked array */ -- unused_c:6; -+ /* BBR v3 state: */ -+ full_bw_now:1, /* recently reached full bw plateau? */ -+ startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ -+ loss_in_cycle:1, /* packet loss in this cycle? */ -+ ecn_in_cycle:1, /* ECN in this cycle? */ -+ unused_3:1; -+ u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ -+ u32 undo_bw_lo; /* bw_lo before latest losses */ -+ u32 undo_inflight_lo; /* inflight_lo before latest losses */ -+ u32 undo_inflight_hi; /* inflight_hi before latest losses */ -+ u32 bw_latest; /* max delivered bw in last round trip */ -+ u32 bw_lo; /* lower bound on sending bandwidth */ -+ u32 bw_hi[2]; /* max recent measured bw sample */ -+ u32 inflight_latest; /* max delivered data in last round trip */ -+ u32 inflight_lo; /* lower bound of inflight data range */ -+ u32 inflight_hi; /* upper bound of inflight data range */ -+ u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ -+ u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ -+ u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ -+ u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ -+ u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ -+ ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ -+ bw_probe_samples:1, /* rate samples reflect bw probing? */ -+ prev_probe_too_high:1, /* did last PROBE_UP go too high? */ -+ stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ -+ rounds_since_probe:8, /* packet-timed rounds since probed bw */ -+ loss_round_start:1, /* loss_round_delivered round trip? */ -+ loss_in_round:1, /* loss marked in this round trip? */ -+ ecn_in_round:1, /* ECN marked in this round trip? */ -+ ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ -+ loss_events_in_round:4,/* losses in STARTUP round */ -+ initialized:1; /* has bbr_init() been called? */ -+ u32 alpha_last_delivered; /* tp->delivered at alpha update */ -+ u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ -+ -+ u8 unused_4; /* to preserve alignment */ -+ struct tcp_plb_state plb; - }; - --#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ -+struct bbr_context { -+ u32 sample_bw; -+}; - --/* Window length of bw filter (in rounds): */ --static const int bbr_bw_rtts = CYCLE_LEN + 2; - /* Window length of min_rtt filter (in sec): */ - static const u32 bbr_min_rtt_win_sec = 10; - /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ - static const u32 bbr_probe_rtt_mode_ms = 200; --/* Skip TSO below the following bandwidth (bits/sec): */ --static const int bbr_min_tso_rate = 1200000; -+/* Window length of probe_rtt_min_us filter (in ms), and consequently the -+ * typical interval between PROBE_RTT mode entries. The default is 5000ms. -+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC -+ */ -+static const u32 bbr_probe_rtt_win_ms = 5000; -+/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */ -+static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; -+ -+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting -+ * in bigger TSO bursts. We cut the RTT-based allowance in half -+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance -+ * is below 1500 bytes after 6 * ~500 usec = 3ms. -+ */ -+static const u32 bbr_tso_rtt_shift = 9; - - /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. - * In order to help drive the network toward lower queues and low latency while -@@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000; - */ - static const int bbr_pacing_margin_percent = 1; - --/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain -+/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value - * that will allow a smoothly increasing pacing rate that will double each RTT - * and send the same number of packets per RTT that an un-paced, slow-starting - * Reno or CUBIC flow would: - */ --static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; --/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain -+static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1; -+/* The gain for deriving startup cwnd: */ -+static const int bbr_startup_cwnd_gain = BBR_UNIT * 2; -+/* The pacing gain in BBR_DRAIN is calculated to typically drain - * the queue created in BBR_STARTUP in a single round: - */ - static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; -@@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; - static const int bbr_cwnd_gain = BBR_UNIT * 2; - /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ - static const int bbr_pacing_gain[] = { -- BBR_UNIT * 5 / 4, /* probe for more available bw */ -- BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ -- BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ -- BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ -+ BBR_UNIT * 5 / 4, /* UP: probe for more available bw */ -+ BBR_UNIT * 91 / 100, /* DOWN: drain queue and/or yield bw */ -+ BBR_UNIT, /* CRUISE: try to use pipe w/ some headroom */ -+ BBR_UNIT, /* REFILL: refill pipe to estimated 100% */ -+}; -+enum bbr_pacing_gain_phase { -+ BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ -+ BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ -+ BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ -+ BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ - }; --/* Randomize the starting gain cycling phase over N phases: */ --static const u32 bbr_cycle_rand = 7; - - /* Try to keep at least this many packets in flight, if things go smoothly. For - * smooth functioning, a sliding window protocol ACKing every other packet -@@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7; - */ - static const u32 bbr_cwnd_min_target = 4; - --/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ -+/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */ - /* If bw has increased significantly (1.25x), there may be more bw available: */ - static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; - /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ - static const u32 bbr_full_bw_cnt = 3; - --/* "long-term" ("LT") bandwidth estimator parameters... */ --/* The minimum number of rounds in an LT bw sampling interval: */ --static const u32 bbr_lt_intvl_min_rtts = 4; --/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ --static const u32 bbr_lt_loss_thresh = 50; --/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ --static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; --/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ --static const u32 bbr_lt_bw_diff = 4000 / 8; --/* If we estimate we're policed, use lt_bw for this many round trips: */ --static const u32 bbr_lt_bw_max_rtts = 48; -- - /* Gain factor for adding extra_acked to target cwnd: */ - static const int bbr_extra_acked_gain = BBR_UNIT; - /* Window length of extra_acked window. */ -@@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; - /* Time period for clamping cwnd increment due to ack aggregation */ - static const u32 bbr_extra_acked_max_us = 100 * 1000; - -+/* Flags to control BBR ECN-related behavior... */ -+ -+/* Ensure ACKs only ACK packets with consistent ECN CE status? */ -+static const bool bbr_precise_ece_ack = true; -+ -+/* Max RTT (in usec) at which to use sender-side ECN logic. -+ * Disabled when 0 (ECN allowed at any RTT). -+ */ -+static const u32 bbr_ecn_max_rtt_us = 5000; -+ -+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. -+ * No loss response when 0. -+ */ -+static const u32 bbr_beta = BBR_UNIT * 30 / 100; -+ -+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */ -+static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; -+ -+/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly -+ * to congestion if the bottleneck is congested when the flow starts up. -+ */ -+static const u32 bbr_ecn_alpha_init = BBR_UNIT; -+ -+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. -+ * No ECN based bounding when 0. -+ */ -+static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ -+ -+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. -+ * Scaled by BBR_SCALE. Disabled when 0. -+ */ -+static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ -+ -+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN -+ * clears then make the first round's increment to inflight_hi the following -+ * fraction of inflight_hi. -+ */ -+static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2; -+ -+/* Estimate bw probing has gone too far if loss rate exceeds this level. */ -+static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ -+ -+/* Slow down for a packet loss recovered by TLP? */ -+static const bool bbr_loss_probe_recovery = true; -+ -+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, -+ * and loss rate is higher than bbr_loss_thresh. -+ * Disabled if 0. -+ */ -+static const u32 bbr_full_loss_cnt = 6; -+ -+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh -+ * meets this count. -+ */ -+static const u32 bbr_full_ecn_cnt = 2; -+ -+/* Fraction of unutilized headroom to try to leave in path upon high loss. */ -+static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; -+ -+/* How much do we increase cwnd_gain when probing for bandwidth in -+ * BBR_BW_PROBE_UP? This specifies the increment in units of -+ * BBR_UNIT/4. The default is 1, meaning 0.25. -+ * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75). -+ */ -+static const u32 bbr_bw_probe_cwnd_gain = 1; -+ -+/* Max number of packet-timed rounds to wait before probing for bandwidth. If -+ * we want to tolerate 1% random loss per round, and not have this cut our -+ * inflight too much, we must probe for bw periodically on roughly this scale. -+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. -+ * We aim to be fair with Reno/CUBIC up to a BDP of at least: -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ */ -+static const u32 bbr_bw_probe_max_rounds = 63; -+ -+/* Max amount of randomness to inject in round counting for Reno-coexistence. -+ */ -+static const u32 bbr_bw_probe_rand_rounds = 2; -+ -+/* Use BBR-native probe time scale starting at this many usec. -+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: -+ * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs -+ */ -+static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ -+ -+/* Use BBR-native probes spread over this many usec: */ -+static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ -+ -+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ -+static const bool bbr_fast_path = true; -+ -+/* Use fast ack mode? */ -+static const bool bbr_fast_ack_mode = true; -+ -+static u32 bbr_max_bw(const struct sock *sk); -+static u32 bbr_bw(const struct sock *sk); -+static void bbr_exit_probe_rtt(struct sock *sk); -+static void bbr_reset_congestion_signals(struct sock *sk); -+static void bbr_run_loss_probe_recovery(struct sock *sk); -+ - static void bbr_check_probe_rtt_done(struct sock *sk); - -+/* This connection can use ECN if both endpoints have signaled ECN support in -+ * the handshake and the per-route settings indicated this is a -+ * shallow-threshold ECN environment, meaning both: -+ * (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and -+ * (b) TCP endpoints provide precise ACKs that only ACK data segments -+ * with consistent ECN CE status -+ */ -+static bool bbr_can_use_ecn(const struct sock *sk) -+{ -+ return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) && -+ (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW); -+} -+ - /* Do we estimate that STARTUP filled the pipe? */ - static bool bbr_full_bw_reached(const struct sock *sk) - { -@@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const struct sock *sk) - /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ - static u32 bbr_max_bw(const struct sock *sk) - { -- struct bbr *bbr = inet_csk_ca(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); - -- return minmax_get(&bbr->bw); -+ return max(bbr->bw_hi[0], bbr->bw_hi[1]); - } - - /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ - static u32 bbr_bw(const struct sock *sk) - { -- struct bbr *bbr = inet_csk_ca(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); - -- return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); -+ return min(bbr_max_bw(sk), bbr->bw_lo); - } - - /* Return maximum extra acked in past k-2k round trips, -@@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct sock *sk) - * The order here is chosen carefully to avoid overflow of u64. This should - * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. - */ --static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) -+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, -+ int margin) - { - unsigned int mss = tcp_sk(sk)->mss_cache; - - rate *= mss; - rate *= gain; - rate >>= BBR_SCALE; -- rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent); -- return rate >> BW_SCALE; -+ rate *= USEC_PER_SEC / 100 * (100 - margin); -+ rate >>= BW_SCALE; -+ rate = max(rate, 1ULL); -+ return rate; -+} -+ -+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) -+{ -+ return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); - } - - /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ -@@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) - { - u64 rate = bw; - -- rate = bbr_rate_bytes_per_sec(sk, rate, gain); -+ rate = bbr_rate_bytes_per_sec(sk, rate, gain, -+ bbr_pacing_margin_percent); - rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)); - return rate; - } - --/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ -+/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */ - static void bbr_init_pacing_rate_from_rtt(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -279,7 +456,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) - bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; - do_div(bw, rtt_us); - WRITE_ONCE(sk->sk_pacing_rate, -- bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain)); -+ bbr_bw_to_pacing_rate(sk, bw, bbr_param(sk, startup_pacing_gain))); - } - - /* Pace using current bw estimate and a gain factor. */ -@@ -295,26 +472,48 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) - WRITE_ONCE(sk->sk_pacing_rate, rate); - } - --/* override sysctl_tcp_min_tso_segs */ --__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) -+/* Return the number of segments BBR would like in a TSO/GSO skb, given a -+ * particular max gso size as a constraint. TODO: make this simpler and more -+ * consistent by switching bbr to just call tcp_tso_autosize(). -+ */ -+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, -+ u32 gso_max_size) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 segs, r; -+ u64 bytes; -+ -+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ -+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); -+ -+ /* Budget a TSO/GSO burst size allowance based on min_rtt. For every -+ * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. -+ * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) -+ */ -+ if (bbr_param(sk, tso_rtt_shift)) { -+ r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift); -+ if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ -+ bytes += GSO_LEGACY_MAX_SIZE >> r; -+ } -+ -+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); -+ segs = max_t(u32, bytes / mss_now, -+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); -+ return segs; -+} -+ -+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ -+__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) - { -- return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2; -+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); - } - -+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ - static u32 bbr_tso_segs_goal(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -- u32 segs, bytes; -- -- /* Sort of tcp_tso_autosize() but ignoring -- * driver provided sk_gso_max_size. -- */ -- bytes = min_t(unsigned long, -- READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift), -- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); -- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - -- return min(segs, 0x7FU); -+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE); - } - - /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -@@ -334,7 +533,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - -- if (event == CA_EVENT_TX_START && tp->app_limited) { -+ if (event == CA_EVENT_TX_START) { -+ if (!tp->app_limited) -+ return; - bbr->idle_restart = 1; - bbr->ack_epoch_mstamp = tp->tcp_mstamp; - bbr->ack_epoch_acked = 0; -@@ -345,6 +546,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) - bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); - else if (bbr->mode == BBR_PROBE_RTT) - bbr_check_probe_rtt_done(sk); -+ } else if ((event == CA_EVENT_ECN_IS_CE || -+ event == CA_EVENT_ECN_NO_CE) && -+ bbr_can_use_ecn(sk) && -+ bbr_param(sk, precise_ece_ack)) { -+ u32 state = bbr->ce_state; -+ dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); -+ bbr->ce_state = state; -+ } else if (event == CA_EVENT_TLP_RECOVERY && -+ bbr_param(sk, loss_probe_recovery)) { -+ bbr_run_loss_probe_recovery(sk); - } - } - -@@ -367,10 +578,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) - * default. This should only happen when the connection is not using TCP - * timestamps and has retransmitted all of the SYN/SYNACK/data packets - * ACKed so far. In this case, an RTO can cut cwnd to 1, in which -- * case we need to slow-start up toward something safe: TCP_INIT_CWND. -+ * case we need to slow-start up toward something safe: initial cwnd. - */ - if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ -- return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ -+ return bbr->init_cwnd; /* be safe: cap at initial cwnd */ - - w = (u64)bw * bbr->min_rtt_us; - -@@ -387,23 +598,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) - * - one skb in sending host Qdisc, - * - one skb in sending host TSO/GSO engine - * - one skb being received by receiver host LRO/GRO/delayed-ACK engine -- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because -- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, -+ * Don't worry, at low rates this won't bloat cwnd because -+ * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets, - * which allows 2 outstanding 2-packet sequences, to try to keep pipe - * full even with ACK-every-other-packet delayed ACKs. - */ - static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) - { - struct bbr *bbr = inet_csk_ca(sk); -+ u32 tso_segs_goal; - -- /* Allow enough full-sized skbs in flight to utilize end systems. */ -- cwnd += 3 * bbr_tso_segs_goal(sk); -- -- /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ -- cwnd = (cwnd + 1) & ~1U; -+ tso_segs_goal = 3 * bbr_tso_segs_goal(sk); - -+ /* Allow enough full-sized skbs in flight to utilize end systems. */ -+ cwnd = max_t(u32, cwnd, tso_segs_goal); -+ cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); - /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ -- if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) - cwnd += 2; - - return cwnd; -@@ -458,10 +669,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) - { - u32 max_aggr_cwnd, aggr_cwnd = 0; - -- if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { -+ if (bbr_param(sk, extra_acked_gain)) { - max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) - / BW_UNIT; -- aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) -+ aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk)) - >> BBR_SCALE; - aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); - } -@@ -469,66 +680,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) - return aggr_cwnd; - } - --/* An optimization in BBR to reduce losses: On the first round of recovery, we -- * follow the packet conservation principle: send P packets per P packets acked. -- * After that, we slow-start and send at most 2*P packets per P packets acked. -- * After recovery finishes, or upon undo, we restore the cwnd we had when -- * recovery started (capped by the target cwnd based on estimated BDP). -- * -- * TODO(ycheng/ncardwell): implement a rate-based approach. -- */ --static bool bbr_set_cwnd_to_recover_or_restore( -- struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) -+/* Returns the cwnd for PROBE_RTT mode. */ -+static u32 bbr_probe_rtt_cwnd(struct sock *sk) - { -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; -- u32 cwnd = tcp_snd_cwnd(tp); -- -- /* An ACK for P pkts should release at most 2*P packets. We do this -- * in two steps. First, here we deduct the number of lost packets. -- * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. -- */ -- if (rs->losses > 0) -- cwnd = max_t(s32, cwnd - rs->losses, 1); -- -- if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { -- /* Starting 1st round of Recovery, so do packet conservation. */ -- bbr->packet_conservation = 1; -- bbr->next_rtt_delivered = tp->delivered; /* start round now */ -- /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ -- cwnd = tcp_packets_in_flight(tp) + acked; -- } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { -- /* Exiting loss recovery; restore cwnd saved before recovery. */ -- cwnd = max(cwnd, bbr->prior_cwnd); -- bbr->packet_conservation = 0; -- } -- bbr->prev_ca_state = state; -- -- if (bbr->packet_conservation) { -- *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); -- return true; /* yes, using packet conservation */ -- } -- *new_cwnd = cwnd; -- return false; -+ return max_t(u32, bbr_param(sk, cwnd_min_target), -+ bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain))); - } - - /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss - * has drawn us down below target), or snap down to target if we're above it. - */ - static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, -- u32 acked, u32 bw, int gain) -+ u32 acked, u32 bw, int gain, u32 cwnd, -+ struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; -+ u32 target_cwnd = 0; - - if (!acked) - goto done; /* no packet fully ACKed; just apply caps */ - -- if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) -- goto done; -- - target_cwnd = bbr_bdp(sk, bw, gain); - - /* Increment the cwnd to account for excess ACKed data that seems -@@ -537,74 +709,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, - target_cwnd += bbr_ack_aggregation_cwnd(sk); - target_cwnd = bbr_quantization_budget(sk, target_cwnd); - -- /* If we're below target cwnd, slow start cwnd toward target cwnd. */ -- if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ -- cwnd = min(cwnd + acked, target_cwnd); -- else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) -- cwnd = cwnd + acked; -- cwnd = max(cwnd, bbr_cwnd_min_target); -+ /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ -+ bbr->try_fast_path = 0; -+ if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ -+ cwnd += acked; -+ if (cwnd >= target_cwnd) { -+ cwnd = target_cwnd; -+ bbr->try_fast_path = 1; -+ } -+ } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { -+ cwnd += acked; -+ } else { -+ bbr->try_fast_path = 1; -+ } - -+ cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); - done: -- tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ -+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* global cap */ - if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ -- tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); --} -- --/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ --static bool bbr_is_next_cycle_phase(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- bool is_full_length = -- tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > -- bbr->min_rtt_us; -- u32 inflight, bw; -- -- /* The pacing_gain of 1.0 paces at the estimated bw to try to fully -- * use the pipe without increasing the queue. -- */ -- if (bbr->pacing_gain == BBR_UNIT) -- return is_full_length; /* just use wall clock time */ -- -- inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -- bw = bbr_max_bw(sk); -- -- /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at -- * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is -- * small (e.g. on a LAN). We do not persist if packets are lost, since -- * a path with small buffers may not hold that much. -- */ -- if (bbr->pacing_gain > BBR_UNIT) -- return is_full_length && -- (rs->losses || /* perhaps pacing_gain*BDP won't fit */ -- inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); -- -- /* A pacing_gain < 1.0 tries to drain extra queue we added if bw -- * probing didn't find more bw. If inflight falls to match BDP then we -- * estimate queue is drained; persisting would underutilize the pipe. -- */ -- return is_full_length || -- inflight <= bbr_inflight(sk, bw, BBR_UNIT); --} -- --static void bbr_advance_cycle_phase(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); -- bbr->cycle_mstamp = tp->delivered_mstamp; --} -- --/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ --static void bbr_update_cycle_phase(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) -- bbr_advance_cycle_phase(sk); -+ tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp), -+ bbr_probe_rtt_cwnd(sk))); - } - - static void bbr_reset_startup_mode(struct sock *sk) -@@ -614,191 +738,49 @@ static void bbr_reset_startup_mode(struct sock *sk) - bbr->mode = BBR_STARTUP; - } - --static void bbr_reset_probe_bw_mode(struct sock *sk) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->mode = BBR_PROBE_BW; -- bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand); -- bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ --} -- --static void bbr_reset_mode(struct sock *sk) --{ -- if (!bbr_full_bw_reached(sk)) -- bbr_reset_startup_mode(sk); -- else -- bbr_reset_probe_bw_mode(sk); --} -- --/* Start a new long-term sampling interval. */ --static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); -- bbr->lt_last_delivered = tp->delivered; -- bbr->lt_last_lost = tp->lost; -- bbr->lt_rtt_cnt = 0; --} -- --/* Completely reset long-term bandwidth sampling. */ --static void bbr_reset_lt_bw_sampling(struct sock *sk) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->lt_bw = 0; -- bbr->lt_use_bw = 0; -- bbr->lt_is_sampling = false; -- bbr_reset_lt_bw_sampling_interval(sk); --} -- --/* Long-term bw sampling interval is done. Estimate whether we're policed. */ --static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- u32 diff; -- -- if (bbr->lt_bw) { /* do we have bw from a previous interval? */ -- /* Is new bw close to the lt_bw from the previous interval? */ -- diff = abs(bw - bbr->lt_bw); -- if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || -- (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= -- bbr_lt_bw_diff)) { -- /* All criteria are met; estimate we're policed. */ -- bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ -- bbr->lt_use_bw = 1; -- bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ -- bbr->lt_rtt_cnt = 0; -- return; -- } -- } -- bbr->lt_bw = bw; -- bbr_reset_lt_bw_sampling_interval(sk); --} -- --/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of -- * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and -- * explicitly models their policed rate, to reduce unnecessary losses. We -- * estimate that we're policed if we see 2 consecutive sampling intervals with -- * consistent throughput and high packet loss. If we think we're being policed, -- * set lt_bw to the "long-term" average delivery rate from those 2 intervals. -+/* See if we have reached next round trip. Upon start of the new round, -+ * returns packets delivered since previous round start plus this ACK. - */ --static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- u32 lost, delivered; -- u64 bw; -- u32 t; -- -- if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ -- if (bbr->mode == BBR_PROBE_BW && bbr->round_start && -- ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { -- bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ -- bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ -- } -- return; -- } -- -- /* Wait for the first loss before sampling, to let the policer exhaust -- * its tokens and estimate the steady-state rate allowed by the policer. -- * Starting samples earlier includes bursts that over-estimate the bw. -- */ -- if (!bbr->lt_is_sampling) { -- if (!rs->losses) -- return; -- bbr_reset_lt_bw_sampling_interval(sk); -- bbr->lt_is_sampling = true; -- } -- -- /* To avoid underestimates, reset sampling if we run out of data. */ -- if (rs->is_app_limited) { -- bbr_reset_lt_bw_sampling(sk); -- return; -- } -- -- if (bbr->round_start) -- bbr->lt_rtt_cnt++; /* count round trips in this interval */ -- if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) -- return; /* sampling interval needs to be longer */ -- if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { -- bbr_reset_lt_bw_sampling(sk); /* interval is too long */ -- return; -- } -- -- /* End sampling interval when a packet is lost, so we estimate the -- * policer tokens were exhausted. Stopping the sampling before the -- * tokens are exhausted under-estimates the policed rate. -- */ -- if (!rs->losses) -- return; -- -- /* Calculate packets lost and delivered in sampling interval. */ -- lost = tp->lost - bbr->lt_last_lost; -- delivered = tp->delivered - bbr->lt_last_delivered; -- /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ -- if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) -- return; -- -- /* Find average delivery rate in this sampling interval. */ -- t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; -- if ((s32)t < 1) -- return; /* interval is less than one ms, so wait */ -- /* Check if can multiply without overflow */ -- if (t >= ~0U / USEC_PER_MSEC) { -- bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ -- return; -- } -- t *= USEC_PER_MSEC; -- bw = (u64)delivered * BW_UNIT; -- do_div(bw, t); -- bbr_lt_bw_interval_done(sk, bw); --} -- --/* Estimate the bandwidth based on how fast packets are delivered */ --static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) -+static u32 bbr_update_round_start(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u64 bw; -+ u32 round_delivered = 0; - - bbr->round_start = 0; -- if (rs->delivered < 0 || rs->interval_us <= 0) -- return; /* Not a valid observation */ - - /* See if we've reached the next RTT */ -- if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ if (rs->interval_us > 0 && -+ !before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ round_delivered = tp->delivered - bbr->next_rtt_delivered; - bbr->next_rtt_delivered = tp->delivered; -- bbr->rtt_cnt++; - bbr->round_start = 1; -- bbr->packet_conservation = 0; - } -+ return round_delivered; -+} - -- bbr_lt_bw_sampling(sk, rs); -+/* Calculate the bandwidth based on how fast packets are delivered */ -+static void bbr_calculate_bw_sample(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ u64 bw = 0; - - /* Divide delivered by the interval to find a (lower bound) bottleneck - * bandwidth sample. Delivered is in packets and interval_us in uS and - * ratio will be <<1 for most connections. So delivered is first scaled. -+ * Round up to allow growth at low rates, even with integer division. - */ -- bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); -- -- /* If this sample is application-limited, it is likely to have a very -- * low delivered count that represents application behavior rather than -- * the available network rate. Such a sample could drag down estimated -- * bw, causing needless slow-down. Thus, to continue to send at the -- * last measured network rate, we filter out app-limited samples unless -- * they describe the path bw at least as well as our bw model. -- * -- * So the goal during app-limited phase is to proceed with the best -- * network rate no matter how long. We automatically leave this -- * phase when app writes faster than the network can deliver :) -- */ -- if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { -- /* Incorporate new sample into our max bw filter. */ -- minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); -+ if (rs->interval_us > 0) { -+ if (WARN_ONCE(rs->delivered < 0, -+ "negative delivered: %d interval_us: %ld\n", -+ rs->delivered, rs->interval_us)) -+ return; -+ -+ bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); - } -+ -+ ctx->sample_bw = bw; - } - - /* Estimates the windowed max degree of ack aggregation. -@@ -812,7 +794,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) - * - * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). - * Max filter is an approximate sliding window of 5-10 (packet timed) round -- * trips. -+ * trips for non-startup phase, and 1-2 round trips for startup. - */ - static void bbr_update_ack_aggregation(struct sock *sk, - const struct rate_sample *rs) -@@ -820,15 +802,19 @@ static void bbr_update_ack_aggregation(struct sock *sk, - u32 epoch_us, expected_acked, extra_acked; - struct bbr *bbr = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); -+ u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts); - -- if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || -+ if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 || - rs->delivered < 0 || rs->interval_us <= 0) - return; - - if (bbr->round_start) { - bbr->extra_acked_win_rtts = min(0x1F, - bbr->extra_acked_win_rtts + 1); -- if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { -+ if (!bbr_full_bw_reached(sk)) -+ extra_acked_win_rtts_thresh = 1; -+ if (bbr->extra_acked_win_rtts >= -+ extra_acked_win_rtts_thresh) { - bbr->extra_acked_win_rtts = 0; - bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? - 0 : 1; -@@ -862,49 +848,6 @@ static void bbr_update_ack_aggregation(struct sock *sk, - bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; - } - --/* Estimate when the pipe is full, using the change in delivery rate: BBR -- * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by -- * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -- * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -- * higher rwin, 3: we get higher delivery rate samples. Or transient -- * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -- * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -- */ --static void bbr_check_full_bw_reached(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- u32 bw_thresh; -- -- if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) -- return; -- -- bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; -- if (bbr_max_bw(sk) >= bw_thresh) { -- bbr->full_bw = bbr_max_bw(sk); -- bbr->full_bw_cnt = 0; -- return; -- } -- ++bbr->full_bw_cnt; -- bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; --} -- --/* If pipe is probably full, drain the queue and then enter steady-state. */ --static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -- bbr->mode = BBR_DRAIN; /* drain queue we created */ -- tcp_sk(sk)->snd_ssthresh = -- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -- } /* fall through to check if in-flight is already small: */ -- if (bbr->mode == BBR_DRAIN && -- bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) -- bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ --} -- - static void bbr_check_probe_rtt_done(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -914,9 +857,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk) - after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) - return; - -- bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ - tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); -- bbr_reset_mode(sk); -+ bbr_exit_probe_rtt(sk); - } - - /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and -@@ -942,23 +885,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- bool filter_expired; -+ bool probe_rtt_expired, min_rtt_expired; -+ u32 expire; - -- /* Track min RTT seen in the min_rtt_win_sec filter window: */ -- filter_expired = after(tcp_jiffies32, -- bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); -+ /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ -+ expire = bbr->probe_rtt_min_stamp + -+ msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms)); -+ probe_rtt_expired = after(tcp_jiffies32, expire); - if (rs->rtt_us >= 0 && -- (rs->rtt_us < bbr->min_rtt_us || -- (filter_expired && !rs->is_ack_delayed))) { -- bbr->min_rtt_us = rs->rtt_us; -- bbr->min_rtt_stamp = tcp_jiffies32; -+ (rs->rtt_us < bbr->probe_rtt_min_us || -+ (probe_rtt_expired && !rs->is_ack_delayed))) { -+ bbr->probe_rtt_min_us = rs->rtt_us; -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ } -+ /* Track min RTT seen in the min_rtt_win_sec filter window: */ -+ expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ; -+ min_rtt_expired = after(tcp_jiffies32, expire); -+ if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || -+ min_rtt_expired) { -+ bbr->min_rtt_us = bbr->probe_rtt_min_us; -+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; - } - -- if (bbr_probe_rtt_mode_ms > 0 && filter_expired && -+ if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired && - !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { - bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ - bbr_save_cwnd(sk); /* note cwnd so we can restore it */ - bbr->probe_rtt_done_stamp = 0; -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; - } - - if (bbr->mode == BBR_PROBE_RTT) { -@@ -967,9 +922,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) - (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; - /* Maintain min packets in flight for max(200 ms, 1 round). */ - if (!bbr->probe_rtt_done_stamp && -- tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { -+ tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { - bbr->probe_rtt_done_stamp = tcp_jiffies32 + -- msecs_to_jiffies(bbr_probe_rtt_mode_ms); -+ msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms)); - bbr->probe_rtt_round_done = 0; - bbr->next_rtt_delivered = tp->delivered; - } else if (bbr->probe_rtt_done_stamp) { -@@ -990,18 +945,20 @@ static void bbr_update_gains(struct sock *sk) - - switch (bbr->mode) { - case BBR_STARTUP: -- bbr->pacing_gain = bbr_high_gain; -- bbr->cwnd_gain = bbr_high_gain; -+ bbr->pacing_gain = bbr_param(sk, startup_pacing_gain); -+ bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); - break; - case BBR_DRAIN: -- bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ -- bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ -+ bbr->pacing_gain = bbr_param(sk, drain_gain); /* slow, to drain */ -+ bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); /* keep cwnd */ - break; - case BBR_PROBE_BW: -- bbr->pacing_gain = (bbr->lt_use_bw ? -- BBR_UNIT : -- bbr_pacing_gain[bbr->cycle_idx]); -- bbr->cwnd_gain = bbr_cwnd_gain; -+ bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; -+ bbr->cwnd_gain = bbr_param(sk, cwnd_gain); -+ if (bbr_param(sk, bw_probe_cwnd_gain) && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr->cwnd_gain += -+ BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4; - break; - case BBR_PROBE_RTT: - bbr->pacing_gain = BBR_UNIT; -@@ -1013,144 +970,1387 @@ static void bbr_update_gains(struct sock *sk) - } - } - --static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) -+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) - { -- bbr_update_bw(sk, rs); -- bbr_update_ack_aggregation(sk, rs); -- bbr_update_cycle_phase(sk, rs); -- bbr_check_full_bw_reached(sk, rs); -- bbr_check_drain(sk, rs); -- bbr_update_min_rtt(sk, rs); -- bbr_update_gains(sk); -+ /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -+ return 3; - } - --__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) -+/* Incorporate a new bw sample into the current window of our max filter. */ -+static void bbr_take_max_bw_sample(struct sock *sk, u32 bw) - { - struct bbr *bbr = inet_csk_ca(sk); -- u32 bw; -- -- bbr_update_model(sk, rs); - -- bw = bbr_bw(sk); -- bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); -- bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); -+ bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); - } - --__bpf_kfunc static void bbr_init(struct sock *sk) -+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ -+static void bbr_advance_max_bw_filter(struct sock *sk) - { -- struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - -- bbr->prior_cwnd = 0; -- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -- bbr->rtt_cnt = 0; -- bbr->next_rtt_delivered = tp->delivered; -- bbr->prev_ca_state = TCP_CA_Open; -- bbr->packet_conservation = 0; -- -- bbr->probe_rtt_done_stamp = 0; -- bbr->probe_rtt_round_done = 0; -- bbr->min_rtt_us = tcp_min_rtt(tp); -- bbr->min_rtt_stamp = tcp_jiffies32; -- -- minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ -+ if (!bbr->bw_hi[1]) -+ return; /* no samples in this window; remember old window */ -+ bbr->bw_hi[0] = bbr->bw_hi[1]; -+ bbr->bw_hi[1] = 0; -+} - -- bbr->has_seen_rtt = 0; -- bbr_init_pacing_rate_from_rtt(sk); -+/* Reset the estimator for reaching full bandwidth based on bw plateau. */ -+static void bbr_reset_full_bw(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); - -- bbr->round_start = 0; -- bbr->idle_restart = 0; -- bbr->full_bw_reached = 0; - bbr->full_bw = 0; - bbr->full_bw_cnt = 0; -- bbr->cycle_mstamp = 0; -- bbr->cycle_idx = 0; -- bbr_reset_lt_bw_sampling(sk); -- bbr_reset_startup_mode(sk); -+ bbr->full_bw_now = 0; -+} - -- bbr->ack_epoch_mstamp = tp->tcp_mstamp; -- bbr->ack_epoch_acked = 0; -- bbr->extra_acked_win_rtts = 0; -- bbr->extra_acked_win_idx = 0; -- bbr->extra_acked[0] = 0; -- bbr->extra_acked[1] = 0; -+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ -+static u32 bbr_target_inflight(struct sock *sk) -+{ -+ u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); - -- cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); -+ return min(bdp, tcp_sk(sk)->snd_cwnd); - } - --__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) -+static bool bbr_is_probing_bandwidth(struct sock *sk) - { -- /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -- return 3; -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return (bbr->mode == BBR_STARTUP) || -+ (bbr->mode == BBR_PROBE_BW && -+ (bbr->cycle_idx == BBR_BW_PROBE_REFILL || -+ bbr->cycle_idx == BBR_BW_PROBE_UP)); -+} -+ -+/* Has the given amount of time elapsed since we marked the phase start? */ -+static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); -+ -+ return tcp_stamp_us_delta(tp->tcp_mstamp, -+ bbr->cycle_mstamp + interval_us) > 0; -+} -+ -+static void bbr_handle_queue_too_high_in_startup(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bdp; /* estimated BDP in packets, with quantization budget */ -+ -+ bbr->full_bw_reached = 1; -+ -+ bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr->inflight_hi = max(bdp, bbr->inflight_latest); -+} -+ -+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ -+static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || -+ !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh)) -+ return; -+ -+ if (ce_ratio >= bbr_param(sk, ecn_thresh)) -+ bbr->startup_ecn_rounds++; -+ else -+ bbr->startup_ecn_rounds = 0; -+ -+ if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) { -+ bbr_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+} -+ -+/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */ -+static int bbr_update_ecn_alpha(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct net *net = sock_net(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ s32 delivered, delivered_ce; -+ u64 alpha, ce_ratio; -+ u32 gain; -+ bool want_ecn_alpha; -+ -+ /* See if we should use ECN sender logic for this connection. */ -+ if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) && -+ bbr_param(sk, ecn_factor) && -+ (bbr->min_rtt_us <= bbr_ecn_max_rtt_us || -+ !bbr_ecn_max_rtt_us)) -+ bbr->ecn_eligible = 1; -+ -+ /* Skip updating alpha only if not ECN-eligible and PLB is disabled. */ -+ want_ecn_alpha = (bbr->ecn_eligible || -+ (bbr_can_use_ecn(sk) && -+ READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))); -+ if (!want_ecn_alpha) -+ return -1; -+ -+ delivered = tp->delivered - bbr->alpha_last_delivered; -+ delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; -+ -+ if (delivered == 0 || /* avoid divide by zero */ -+ WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ -+ return -1; -+ -+ BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE); -+ ce_ratio = (u64)delivered_ce << BBR_SCALE; -+ do_div(ce_ratio, delivered); -+ -+ gain = bbr_param(sk, ecn_alpha_gain); -+ alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; -+ alpha += (gain * ce_ratio) >> BBR_SCALE; -+ bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); -+ -+ bbr->alpha_last_delivered = tp->delivered; -+ bbr->alpha_last_delivered_ce = tp->delivered_ce; -+ -+ bbr_check_ecn_too_high_in_startup(sk, ce_ratio); -+ return (int)ce_ratio; - } - --/* In theory BBR does not need to undo the cwnd since it does not -- * always reduce cwnd on losses (see bbr_main()). Keep it for now. -+/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6 -+ * flow label) if it encounters sustained congestion in the form of ECN marks. - */ --__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) -+static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->round_start && ce_ratio >= 0) -+ tcp_plb_update_state(sk, &bbr->plb, ce_ratio); -+ -+ tcp_plb_check_rehash(sk, &bbr->plb); -+} -+ -+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ -+static void bbr_raise_inflight_hi_slope(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 growth_this_round, cnt; -+ -+ /* Calculate "slope": packets S/Acked per inflight_hi increment. */ -+ growth_this_round = 1 << bbr->bw_probe_up_rounds; -+ bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); -+ cnt = tcp_snd_cwnd(tp) / growth_this_round; -+ cnt = max(cnt, 1U); -+ bbr->bw_probe_up_cnt = cnt; -+} -+ -+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ -+static void bbr_probe_inflight_hi_upward(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 delta; -+ -+ if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi) -+ return; /* not fully using inflight_hi, so don't grow it */ -+ -+ /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ -+ bbr->bw_probe_up_acks += rs->acked_sacked; -+ if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { -+ delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; -+ bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; -+ bbr->inflight_hi += delta; -+ bbr->try_fast_path = 0; /* Need to update cwnd */ -+ } -+ -+ if (bbr->round_start) -+ bbr_raise_inflight_hi_slope(sk); -+} -+ -+/* Does loss/ECN rate for this sample say inflight is "too high"? -+ * This is used by both the bbr_check_loss_too_high_in_startup() function, -+ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which -+ * uses it to notice when loss/ECN rates suggest inflight is too high. -+ */ -+static bool bbr_is_inflight_too_high(const struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ const struct bbr *bbr = inet_csk_ca(sk); -+ u32 loss_thresh, ecn_thresh; -+ -+ if (rs->lost > 0 && rs->tx_in_flight) { -+ loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >> -+ BBR_SCALE; -+ if (rs->lost > loss_thresh) { -+ return true; -+ } -+ } -+ -+ if (rs->delivered_ce > 0 && rs->delivered > 0 && -+ bbr->ecn_eligible && bbr_param(sk, ecn_thresh)) { -+ ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >> -+ BBR_SCALE; -+ if (rs->delivered_ce > ecn_thresh) { -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+/* Calculate the tx_in_flight level that corresponded to excessive loss. -+ * We find "lost_prefix" segs of the skb where loss rate went too high, -+ * by solving for "lost_prefix" in the following equation: -+ * lost / inflight >= loss_thresh -+ * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh -+ * Then we take that equation, convert it to fixed point, and -+ * round up to the nearest packet. -+ */ -+static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk, -+ const struct rate_sample *rs, -+ const struct sk_buff *skb) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ u32 loss_thresh = bbr_param(sk, loss_thresh); -+ u32 pcount, divisor, inflight_hi; -+ s32 inflight_prev, lost_prev; -+ u64 loss_budget, lost_prefix; -+ -+ pcount = tcp_skb_pcount(skb); -+ -+ /* How much data was in flight before this skb? */ -+ inflight_prev = rs->tx_in_flight - pcount; -+ if (inflight_prev < 0) { -+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( -+ pcount, -+ TCP_SKB_CB(skb)->sacked, -+ rs->tx_in_flight), -+ "tx_in_flight: %u pcount: %u reneg: %u", -+ rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg); -+ return ~0U; -+ } -+ -+ /* How much inflight data was marked lost before this skb? */ -+ lost_prev = rs->lost - pcount; -+ if (WARN_ONCE(lost_prev < 0, -+ "cwnd: %u ca: %d out: %u lost: %u pif: %u " -+ "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d " -+ "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u", -+ tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state, -+ tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp), -+ rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost, -+ rs->lost, lost_prev, pcount, -+ TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, -+ tp->is_sack_reneg)) -+ return ~0U; -+ -+ /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ -+ loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; -+ loss_budget >>= BBR_SCALE; -+ if (lost_prev >= loss_budget) { -+ lost_prefix = 0; /* previous losses crossed loss_thresh */ -+ } else { -+ lost_prefix = loss_budget - lost_prev; -+ lost_prefix <<= BBR_SCALE; -+ divisor = BBR_UNIT - loss_thresh; -+ if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ -+ return ~0U; -+ do_div(lost_prefix, divisor); -+ } -+ -+ inflight_hi = inflight_prev + lost_prefix; -+ return inflight_hi; -+} -+ -+/* If loss/ECN rates during probing indicated we may have overfilled a -+ * buffer, return an operating point that tries to leave unutilized headroom in -+ * the path for other flows, for fairness convergence and lower RTTs and loss. -+ */ -+static u32 bbr_inflight_with_headroom(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 headroom, headroom_fraction; -+ -+ if (bbr->inflight_hi == ~0U) -+ return ~0U; -+ -+ headroom_fraction = bbr_param(sk, inflight_headroom); -+ headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; -+ headroom = max(headroom, 1U); -+ return max_t(s32, bbr->inflight_hi - headroom, -+ bbr_param(sk, cwnd_min_target)); -+} -+ -+/* Bound cwnd to a sensible level, based on our current probing state -+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi). -+ */ -+static void bbr_bound_cwnd_for_inflight_model(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 cap; -+ -+ /* tcp_rcv_synsent_state_process() currently calls tcp_ack() -+ * and thus cong_control() without first initializing us(!). -+ */ -+ if (!bbr->initialized) -+ return; -+ -+ cap = ~0U; -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { -+ /* Probe to see if more packets fit in the path. */ -+ cap = bbr->inflight_hi; -+ } else { -+ if (bbr->mode == BBR_PROBE_RTT || -+ (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) -+ cap = bbr_inflight_with_headroom(sk); -+ } -+ /* Adapt to any loss/ECN since our last bw probe. */ -+ cap = min(cap, bbr->inflight_lo); -+ -+ cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target)); -+ tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp))); -+} -+ -+/* How should we multiplicatively cut bw or inflight limits based on ECN? */ -+static u32 bbr_ecn_cut(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return BBR_UNIT - -+ ((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE); -+} -+ -+/* Init lower bounds if have not inited yet. */ -+static void bbr_init_lower_bounds(struct sock *sk, bool init_bw) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (init_bw && bbr->bw_lo == ~0U) -+ bbr->bw_lo = bbr_max_bw(sk); -+ if (bbr->inflight_lo == ~0U) -+ bbr->inflight_lo = tcp_snd_cwnd(tp); -+} -+ -+/* Reduce bw and inflight to (1 - beta). */ -+static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight) -+{ -+ struct bbr* bbr = inet_csk_ca(sk); -+ u32 loss_cut = BBR_UNIT - bbr_param(sk, beta); -+ -+ *bw = max_t(u32, bbr->bw_latest, -+ (u64)bbr->bw_lo * loss_cut >> BBR_SCALE); -+ *inflight = max_t(u32, bbr->inflight_latest, -+ (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE); -+} -+ -+/* Reduce inflight to (1 - alpha*ecn_factor). */ -+static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_cut = bbr_ecn_cut(sk); -+ -+ *inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; -+} -+ -+/* Estimate a short-term lower bound on the capacity available now, based -+ * on measurements of the current delivery process and recent history. When we -+ * are seeing loss/ECN at times when we are not probing bw, then conservatively -+ * move toward flow balance by multiplicatively cutting our short-term -+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a -+ * multiplicative decrease in order to converge to a lower capacity in time -+ * logarithmic in the magnitude of the decrease. -+ * -+ * However, we do not cut our short-term estimates lower than the current rate -+ * and volume of delivered data from this round trip, since from the current -+ * delivery process we can estimate the measured capacity available now. -+ * -+ * Anything faster than that approach would knowingly risk high loss, which can -+ * cause low bw for Reno/CUBIC and high loss recovery latency for -+ * request/response flows using any congestion control. -+ */ -+static void bbr_adapt_lower_bounds(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_inflight_lo = ~0U; -+ -+ /* We only use lower-bound estimates when not probing bw. -+ * When probing we need to push inflight higher to probe bw. -+ */ -+ if (bbr_is_probing_bandwidth(sk)) -+ return; -+ -+ /* ECN response. */ -+ if (bbr->ecn_in_round && bbr_param(sk, ecn_factor)) { -+ bbr_init_lower_bounds(sk, false); -+ bbr_ecn_lower_bounds(sk, &ecn_inflight_lo); -+ } -+ -+ /* Loss response. */ -+ if (bbr->loss_in_round) { -+ bbr_init_lower_bounds(sk, true); -+ bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo); -+ } -+ -+ /* Adjust to the lower of the levels implied by loss/ECN. */ -+ bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); -+ bbr->bw_lo = max(1U, bbr->bw_lo); -+} -+ -+/* Reset any short-term lower-bound adaptation to congestion, so that we can -+ * push our inflight up. -+ */ -+static void bbr_reset_lower_bounds(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->bw_lo = ~0U; -+ bbr->inflight_lo = ~0U; -+} -+ -+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state -+ * machine phase where we adapt our lower bound based on congestion signals. -+ */ -+static void bbr_reset_congestion_signals(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+ bbr->loss_in_cycle = 0; -+ bbr->ecn_in_cycle = 0; -+ bbr->bw_latest = 0; -+ bbr->inflight_latest = 0; -+} -+ -+static void bbr_exit_loss_recovery(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); -+ bbr->try_fast_path = 0; /* bound cwnd using latest model */ -+} -+ -+/* Update rate and volume of delivered data from latest round trip. */ -+static void bbr_update_latest_delivery_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_round_start = 0; -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ -+ bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); -+ bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); -+ -+ if (!before(rs->prior_delivered, bbr->loss_round_delivered)) { -+ bbr->loss_round_delivered = tp->delivered; -+ bbr->loss_round_start = 1; /* mark start of new round trip */ -+ } -+} -+ -+/* Once per round, reset filter for latest rate and volume of delivered data. */ -+static void bbr_advance_latest_delivery_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* If ACK matches a TLP retransmit, persist the filter. If we detect -+ * that a TLP retransmit plugged a tail loss, we'll want to remember -+ * how much data the path delivered before the tail loss. -+ */ -+ if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) { -+ bbr->bw_latest = ctx->sample_bw; -+ bbr->inflight_latest = rs->delivered; -+ } -+} -+ -+/* Update (most of) our congestion signals: track the recent rate and volume of -+ * delivered data, presence of loss, and EWMA degree of ECN marking. -+ */ -+static void bbr_update_congestion_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) - { - struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw; -+ -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ bw = ctx->sample_bw; - -- bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ -+ if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) -+ bbr_take_max_bw_sample(sk, bw); -+ -+ bbr->loss_in_round |= (rs->losses > 0); -+ -+ if (!bbr->loss_round_start) -+ return; /* skip the per-round-trip updates */ -+ /* Now do per-round-trip updates. */ -+ bbr_adapt_lower_bounds(sk, rs); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+} -+ -+/* Bandwidth probing can cause loss. To help coexistence with loss-based -+ * congestion control we spread out our probing in a Reno-conscious way. Due to -+ * the shape of the Reno sawtooth, the time required between loss epochs for an -+ * idealized Reno flow is a number of round trips that is the BDP of that -+ * flow. We count packet-timed round trips directly, since measured RTT can -+ * vary widely, and Reno is driven by packet-timed round trips. -+ */ -+static bool bbr_is_reno_coexistence_probe_time(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 rounds; -+ -+ /* Random loss can shave some small percentage off of our inflight -+ * in each round. To survive this, flows need robust periodic probes. -+ */ -+ rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk)); -+ return bbr->rounds_since_probe >= rounds; -+} -+ -+/* How long do we want to wait before probing for bandwidth (and risking -+ * loss)? We randomize the wait, for better mixing and fairness convergence. -+ * -+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. -+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, -+ * (eg 4K video to a broadband user): -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ * -+ * We bound the BBR-native inter-bw-probe wall clock time to be: -+ * (a) higher than 2 sec: to try to avoid causing loss for a long enough time -+ * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must -+ * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs -+ * (b) lower than 3 sec: to ensure flows can start probing in a reasonable -+ * amount of time to discover unutilized bw on human-scale interactive -+ * time-scales (e.g. perhaps traffic from a web page download that we -+ * were competing with is now complete). -+ */ -+static void bbr_pick_probe_wait(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Decide the random round-trip bound for wait until probe: */ -+ bbr->rounds_since_probe = -+ get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds)); -+ /* Decide the random wall clock bound for wait until probe: */ -+ bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) + -+ get_random_u32_below(bbr_param(sk, bw_probe_rand_us)); -+} -+ -+static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->cycle_idx = cycle_idx; -+ /* New phase, so need to update cwnd and pacing rate. */ -+ bbr->try_fast_path = 0; -+} -+ -+/* Send at estimated bw to fill the pipe, but not queue. We need this phase -+ * before PROBE_UP, because as soon as we send faster than the available bw -+ * we will start building a queue, and if the buffer is shallow we can cause -+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and -+ * inflight_hi estimates will underestimate. -+ */ -+static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_lower_bounds(sk); -+ bbr->bw_probe_up_rounds = bw_probe_up_rounds; -+ bbr->bw_probe_up_acks = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_REFILLING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); -+} -+ -+/* Now probe max deliverable data rate and volume. */ -+static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->ack_phase = BBR_ACKS_PROBE_STARTING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->cycle_mstamp = tp->tcp_mstamp; -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP); -+ bbr_raise_inflight_hi_slope(sk); -+} -+ -+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall -+ * clock time at which to probe beyond an inflight that we think to be -+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to -+ * keep packet loss rates low. Also start a round-trip counter, to probe faster -+ * if we estimate a Reno flow at our BDP would probe faster. -+ */ -+static void bbr_start_bw_probe_down(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_congestion_signals(sk); -+ bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ -+ bbr_pick_probe_wait(sk); -+ bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); -+} -+ -+/* Cruise: maintain what we estimate to be a neutral, conservative -+ * operating point, without attempting to probe up for bandwidth or down for -+ * RTT, and only reducing inflight in response to loss/ECN signals. -+ */ -+static void bbr_start_bw_probe_cruise(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->inflight_lo != ~0U) -+ bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); -+ -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); -+} -+ -+/* Loss and/or ECN rate is too high while probing. -+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. -+ */ -+static void bbr_handle_inflight_too_high(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ const u32 beta = bbr_param(sk, beta); -+ -+ bbr->prev_probe_too_high = 1; -+ bbr->bw_probe_samples = 0; /* only react once per probe */ -+ /* If we are app-limited then we are not robustly -+ * probing the max volume of inflight data we think -+ * might be safe (analogous to how app-limited bw -+ * samples are not known to be robustly probing bw). -+ */ -+ if (!rs->is_app_limited) { -+ bbr->inflight_hi = max_t(u32, rs->tx_in_flight, -+ (u64)bbr_target_inflight(sk) * -+ (BBR_UNIT - beta) >> BBR_SCALE); -+ } -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr_start_bw_probe_down(sk); -+} -+ -+/* If we're seeing bw and loss samples reflecting our bw probing, adapt -+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt -+ * inflight_hi downward. If we're able to push inflight higher without such -+ * signals, push higher: adapt inflight_hi upward. -+ */ -+static bool bbr_adapt_upper_bounds(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Track when we'll see bw/loss samples resulting from our bw probes. */ -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) -+ bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { -+ /* End of samples from bw probing phase. */ -+ bbr->bw_probe_samples = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ /* At this point in the cycle, our current bw sample is also -+ * our best recent chance at finding the highest available bw -+ * for this flow. So now is the best time to forget the bw -+ * samples from the previous cycle, by advancing the window. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) -+ bbr_advance_max_bw_filter(sk); -+ /* If we had an inflight_hi, then probed and pushed inflight all -+ * the way up to hit that inflight_hi without seeing any -+ * high loss/ECN in all the resulting ACKs from that probing, -+ * then probe up again, this time letting inflight persist at -+ * inflight_hi for a round trip, then accelerating beyond. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { -+ bbr_start_bw_probe_refill(sk, 0); -+ return true; /* yes, decided state transition */ -+ } -+ } -+ if (bbr_is_inflight_too_high(sk, rs)) { -+ if (bbr->bw_probe_samples) /* sample is from bw probing? */ -+ bbr_handle_inflight_too_high(sk, rs); -+ } else { -+ /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ -+ -+ if (bbr->inflight_hi == ~0U) -+ return false; /* no excess queue signals yet */ -+ -+ /* To be resilient to random loss, we must raise bw/inflight_hi -+ * if we observe in any phase that a higher level is safe. -+ */ -+ if (rs->tx_in_flight > bbr->inflight_hi) { -+ bbr->inflight_hi = rs->tx_in_flight; -+ } -+ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr_probe_inflight_hi_upward(sk, rs); -+ } -+ -+ return false; -+} -+ -+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ -+static bool bbr_check_time_to_probe_bw(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 n; -+ -+ /* If we seem to be at an operating point where we are not seeing loss -+ * but we are seeing ECN marks, then when the ECN marks cease we reprobe -+ * quickly (in case cross-traffic has ceased and freed up bw). -+ */ -+ if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible && -+ bbr->ecn_in_cycle && !bbr->loss_in_cycle && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { -+ /* Calculate n so that when bbr_raise_inflight_hi_slope() -+ * computes growth_this_round as 2^n it will be roughly the -+ * desired volume of data (inflight_hi*ecn_reprobe_gain). -+ */ -+ n = ilog2((((u64)bbr->inflight_hi * -+ bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE)); -+ bbr_start_bw_probe_refill(sk, n); -+ return true; -+ } -+ -+ if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) || -+ bbr_is_reno_coexistence_probe_time(sk)) { -+ bbr_start_bw_probe_refill(sk, 0); -+ return true; -+ } -+ return false; -+} -+ -+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ -+static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) -+{ -+ /* Always need to pull inflight down to leave headroom in queue. */ -+ if (inflight > bbr_inflight_with_headroom(sk)) -+ return false; -+ -+ return inflight <= bbr_inflight(sk, bw, BBR_UNIT); -+} -+ -+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ -+static void bbr_update_cycle_phase(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ bool is_bw_probe_done = false; -+ u32 inflight, bw; -+ -+ if (!bbr_full_bw_reached(sk)) -+ return; -+ -+ /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ -+ if (bbr_adapt_upper_bounds(sk, rs, ctx)) -+ return; /* already decided state transition */ -+ -+ if (bbr->mode != BBR_PROBE_BW) -+ return; -+ -+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -+ bw = bbr_max_bw(sk); -+ -+ switch (bbr->cycle_idx) { -+ /* First we spend most of our time cruising with a pacing_gain of 1.0, -+ * which paces at the estimated bw, to try to fully use the pipe -+ * without building queue. If we encounter loss/ECN marks, we adapt -+ * by slowing down. -+ */ -+ case BBR_BW_PROBE_CRUISE: -+ if (bbr_check_time_to_probe_bw(sk, rs)) -+ return; /* already decided state transition */ -+ break; -+ -+ /* After cruising, when it's time to probe, we first "refill": we send -+ * at the estimated bw to fill the pipe, before probing higher and -+ * knowingly risking overflowing the bottleneck buffer (causing loss). -+ */ -+ case BBR_BW_PROBE_REFILL: -+ if (bbr->round_start) { -+ /* After one full round trip of sending in REFILL, we -+ * start to see bw samples reflecting our REFILL, which -+ * may be putting too much data in flight. -+ */ -+ bbr->bw_probe_samples = 1; -+ bbr_start_bw_probe_up(sk, ctx); -+ } -+ break; -+ -+ /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to -+ * probe for bw. If we have not seen loss/ECN, we try to raise inflight -+ * to at least pacing_gain*BDP; note that this may take more than -+ * min_rtt if min_rtt is small (e.g. on a LAN). -+ * -+ * We terminate PROBE_UP bandwidth probing upon any of the following: -+ * -+ * (1) We've pushed inflight up to hit the inflight_hi target set in the -+ * most recent previous bw probe phase. Thus we want to start -+ * draining the queue immediately because it's very likely the most -+ * recently sent packets will fill the queue and cause drops. -+ * (2) If inflight_hi has not limited bandwidth growth recently, and -+ * yet delivered bandwidth has not increased much recently -+ * (bbr->full_bw_now). -+ * (3) Loss filter says loss rate is "too high". -+ * (4) ECN filter says ECN mark rate is "too high". -+ * -+ * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high() -+ */ -+ case BBR_BW_PROBE_UP: -+ if (bbr->prev_probe_too_high && -+ inflight >= bbr->inflight_hi) { -+ bbr->stopped_risky_probe = 1; -+ is_bw_probe_done = true; -+ } else { -+ if (tp->is_cwnd_limited && -+ tcp_snd_cwnd(tp) >= bbr->inflight_hi) { -+ /* inflight_hi is limiting bw growth */ -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ } else if (bbr->full_bw_now) { -+ /* Plateau in estimated bw. Pipe looks full. */ -+ is_bw_probe_done = true; -+ } -+ } -+ if (is_bw_probe_done) { -+ bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ -+ bbr_start_bw_probe_down(sk); /* restart w/ down */ -+ } -+ break; -+ -+ /* After probing in PROBE_UP, we have usually accumulated some data in -+ * the bottleneck buffer (if bw probing didn't find more bw). We next -+ * enter PROBE_DOWN to try to drain any excess data from the queue. To -+ * do this, we use a pacing_gain < 1.0. We hold this pacing gain until -+ * our inflight is less then that target cruising point, which is the -+ * minimum of (a) the amount needed to leave headroom, and (b) the -+ * estimated BDP. Once inflight falls to match the target, we estimate -+ * the queue is drained; persisting would underutilize the pipe. -+ */ -+ case BBR_BW_PROBE_DOWN: -+ if (bbr_check_time_to_probe_bw(sk, rs)) -+ return; /* already decided state transition */ -+ if (bbr_check_time_to_cruise(sk, inflight, bw)) -+ bbr_start_bw_probe_cruise(sk); -+ break; -+ -+ default: -+ WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); -+ } -+} -+ -+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ -+static void bbr_exit_probe_rtt(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_lower_bounds(sk); -+ if (bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_PROBE_BW; -+ /* Raising inflight after PROBE_RTT may cause loss, so reset -+ * the PROBE_BW clock and schedule the next bandwidth probe for -+ * a friendly and randomized future point in time. -+ */ -+ bbr_start_bw_probe_down(sk); -+ /* Since we are exiting PROBE_RTT, we know inflight is -+ * below our estimated BDP, so it is reasonable to cruise. -+ */ -+ bbr_start_bw_probe_cruise(sk); -+ } else { -+ bbr->mode = BBR_STARTUP; -+ } -+} -+ -+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until -+ * the end of the round in recovery to get a good estimate of how many packets -+ * have been lost, and how many we need to drain with a low pacing rate. -+ */ -+static void bbr_check_loss_too_high_in_startup(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk)) -+ return; -+ -+ /* For STARTUP exit, check the loss rate at the end of each round trip -+ * of Recovery episodes in STARTUP. We check the loss rate at the end -+ * of the round trip to filter out noisy/low loss and have a better -+ * sense of inflight (extent of loss), so we can drain more accurately. -+ */ -+ if (rs->losses && bbr->loss_events_in_round < 0xf) -+ bbr->loss_events_in_round++; /* update saturating counter */ -+ if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && -+ bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) && -+ bbr_is_inflight_too_high(sk, rs)) { -+ bbr_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+ if (bbr->loss_round_start) -+ bbr->loss_events_in_round = 0; -+} -+ -+/* Estimate when the pipe is full, using the change in delivery rate: BBR -+ * estimates bw probing filled the pipe if the estimated bw hasn't changed by -+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -+ * higher rwin, 3: we get higher delivery rate samples. Or transient -+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -+ */ -+static void bbr_check_full_bw_reached(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bw_thresh, full_cnt, thresh; -+ -+ if (bbr->full_bw_now || rs->is_app_limited) -+ return; -+ -+ thresh = bbr_param(sk, full_bw_thresh); -+ full_cnt = bbr_param(sk, full_bw_cnt); -+ bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE; -+ if (ctx->sample_bw >= bw_thresh) { -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ return; -+ } -+ if (!bbr->round_start) -+ return; -+ ++bbr->full_bw_cnt; -+ bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt; -+ bbr->full_bw_reached |= bbr->full_bw_now; -+} -+ -+/* If pipe is probably full, drain the queue and then enter steady-state. */ -+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_DRAIN; /* drain queue we created */ -+ /* Set ssthresh to export purely for monitoring, to signal -+ * completion of initial STARTUP by setting to a non- -+ * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR). -+ */ -+ tcp_sk(sk)->snd_ssthresh = -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr_reset_congestion_signals(sk); -+ } /* fall through to check if in-flight is already small: */ -+ if (bbr->mode == BBR_DRAIN && -+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) { -+ bbr->mode = BBR_PROBE_BW; -+ bbr_start_bw_probe_down(sk); -+ } -+} -+ -+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ bbr_update_congestion_signals(sk, rs, ctx); -+ bbr_update_ack_aggregation(sk, rs); -+ bbr_check_loss_too_high_in_startup(sk, rs); -+ bbr_check_full_bw_reached(sk, rs, ctx); -+ bbr_check_drain(sk, rs, ctx); -+ bbr_update_cycle_phase(sk, rs, ctx); -+ bbr_update_min_rtt(sk, rs); -+} -+ -+/* Fast path for app-limited case. -+ * -+ * On each ack, we execute bbr state machine, which primarily consists of: -+ * 1) update model based on new rate sample, and -+ * 2) update control based on updated model or state change. -+ * -+ * There are certain workload/scenarios, e.g. app-limited case, where -+ * either we can skip updating model or we can skip update of both model -+ * as well as control. This provides signifcant softirq cpu savings for -+ * processing incoming acks. -+ * -+ * In case of app-limited, if there is no congestion (loss/ecn) and -+ * if observed bw sample is less than current estimated bw, then we can -+ * skip some of the computation in bbr state processing: -+ * -+ * - if there is no rtt/mode/phase change: In this case, since all the -+ * parameters of the network model are constant, we can skip model -+ * as well control update. -+ * -+ * - else we can skip rest of the model update. But we still need to -+ * update the control to account for the new rtt/mode/phase. -+ * -+ * Returns whether we can take fast path or not. -+ */ -+static bool bbr_run_fast_path(struct sock *sk, bool *update_model, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 prev_min_rtt_us, prev_mode; -+ -+ if (bbr_param(sk, fast_path) && bbr->try_fast_path && -+ rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && -+ !bbr->loss_in_round && !bbr->ecn_in_round ) { -+ prev_mode = bbr->mode; -+ prev_min_rtt_us = bbr->min_rtt_us; -+ bbr_check_drain(sk, rs, ctx); -+ bbr_update_cycle_phase(sk, rs, ctx); -+ bbr_update_min_rtt(sk, rs); -+ -+ if (bbr->mode == prev_mode && -+ bbr->min_rtt_us == prev_min_rtt_us && -+ bbr->try_fast_path) { -+ return true; -+ } -+ -+ /* Skip model update, but control still needs to be updated */ -+ *update_model = false; -+ } -+ return false; -+} -+ -+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct bbr_context ctx = { 0 }; -+ bool update_model = true; -+ u32 bw, round_delivered; -+ int ce_ratio = -1; -+ -+ round_delivered = bbr_update_round_start(sk, rs, &ctx); -+ if (bbr->round_start) { -+ bbr->rounds_since_probe = -+ min_t(s32, bbr->rounds_since_probe + 1, 0xFF); -+ ce_ratio = bbr_update_ecn_alpha(sk); -+ } -+ bbr_plb(sk, rs, ce_ratio); -+ -+ bbr->ecn_in_round |= (bbr->ecn_eligible && rs->is_ece); -+ bbr_calculate_bw_sample(sk, rs, &ctx); -+ bbr_update_latest_delivery_signals(sk, rs, &ctx); -+ -+ if (bbr_run_fast_path(sk, &update_model, rs, &ctx)) -+ goto out; -+ -+ if (update_model) -+ bbr_update_model(sk, rs, &ctx); -+ -+ bbr_update_gains(sk); -+ bw = bbr_bw(sk); -+ bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); -+ bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, -+ tcp_snd_cwnd(tp), &ctx); -+ bbr_bound_cwnd_for_inflight_model(sk); -+ -+out: -+ bbr_advance_latest_delivery_signals(sk, rs, &ctx); -+ bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; -+ bbr->loss_in_cycle |= rs->lost > 0; -+ bbr->ecn_in_cycle |= rs->delivered_ce > 0; -+} -+ -+__bpf_kfunc static void bbr_init(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->initialized = 1; -+ -+ bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp)); -+ bbr->prior_cwnd = tp->prior_cwnd; -+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->prev_ca_state = TCP_CA_Open; -+ -+ bbr->probe_rtt_done_stamp = 0; -+ bbr->probe_rtt_round_done = 0; -+ bbr->probe_rtt_min_us = tcp_min_rtt(tp); -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ bbr->min_rtt_us = tcp_min_rtt(tp); -+ bbr->min_rtt_stamp = tcp_jiffies32; -+ -+ bbr->has_seen_rtt = 0; -+ bbr_init_pacing_rate_from_rtt(sk); -+ -+ bbr->round_start = 0; -+ bbr->idle_restart = 0; -+ bbr->full_bw_reached = 0; -+ bbr->full_bw = 0; - bbr->full_bw_cnt = 0; -- bbr_reset_lt_bw_sampling(sk); -- return tcp_snd_cwnd(tcp_sk(sk)); -+ bbr->cycle_mstamp = 0; -+ bbr->cycle_idx = 0; -+ -+ bbr_reset_startup_mode(sk); -+ -+ bbr->ack_epoch_mstamp = tp->tcp_mstamp; -+ bbr->ack_epoch_acked = 0; -+ bbr->extra_acked_win_rtts = 0; -+ bbr->extra_acked_win_idx = 0; -+ bbr->extra_acked[0] = 0; -+ bbr->extra_acked[1] = 0; -+ -+ bbr->ce_state = 0; -+ bbr->prior_rcv_nxt = tp->rcv_nxt; -+ bbr->try_fast_path = 0; -+ -+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); -+ -+ /* Start sampling ECN mark rate after first full flight is ACKed: */ -+ bbr->loss_round_delivered = tp->delivered + 1; -+ bbr->loss_round_start = 0; -+ bbr->undo_bw_lo = 0; -+ bbr->undo_inflight_lo = 0; -+ bbr->undo_inflight_hi = 0; -+ bbr->loss_events_in_round = 0; -+ bbr->startup_ecn_rounds = 0; -+ bbr_reset_congestion_signals(sk); -+ bbr->bw_lo = ~0U; -+ bbr->bw_hi[0] = 0; -+ bbr->bw_hi[1] = 0; -+ bbr->inflight_lo = ~0U; -+ bbr->inflight_hi = ~0U; -+ bbr_reset_full_bw(sk); -+ bbr->bw_probe_up_cnt = ~0U; -+ bbr->bw_probe_up_acks = 0; -+ bbr->bw_probe_up_rounds = 0; -+ bbr->probe_wait_us = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ bbr->rounds_since_probe = 0; -+ bbr->bw_probe_samples = 0; -+ bbr->prev_probe_too_high = 0; -+ bbr->ecn_eligible = 0; -+ bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init); -+ bbr->alpha_last_delivered = 0; -+ bbr->alpha_last_delivered_ce = 0; -+ bbr->plb.pause_until = 0; -+ -+ tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; -+ -+ if (bbr_can_use_ecn(sk)) -+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; -+} -+ -+/* BBR marks the current round trip as a loss round. */ -+static void bbr_note_loss(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Capture "current" data over the full round trip of loss, to -+ * have a better chance of observing the full capacity of the path. -+ */ -+ if (!bbr->loss_in_round) /* first loss in this round trip? */ -+ bbr->loss_round_delivered = tp->delivered; /* set round trip */ -+ bbr->loss_in_round = 1; -+ bbr->loss_in_cycle = 1; - } - --/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ -+/* Core TCP stack informs us that the given skb was just marked lost. */ -+__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk, -+ const struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb); -+ struct rate_sample rs = {}; -+ -+ bbr_note_loss(sk); -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not an skb sent while probing for bandwidth */ -+ if (unlikely(!scb->tx.delivered_mstamp)) -+ return; /* skb was SACKed, reneged, marked lost; ignore it */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this lost skb, -+ * then see if the loss rate went too high, and if so at which packet. -+ */ -+ rs.tx_in_flight = scb->tx.in_flight; -+ rs.lost = tp->lost - scb->tx.lost; -+ rs.is_app_limited = scb->tx.is_app_limited; -+ if (bbr_is_inflight_too_high(sk, &rs)) { -+ rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb); -+ bbr_handle_inflight_too_high(sk, &rs); -+ } -+} -+ -+static void bbr_run_loss_probe_recovery(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct rate_sample rs = {0}; -+ -+ bbr_note_loss(sk); -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not sent while probing for bandwidth */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this -+ * loss, then see if the loss rate went too high. -+ */ -+ rs.lost = 1; /* TLP probe repaired loss of a single segment */ -+ rs.tx_in_flight = bbr->inflight_latest + rs.lost; -+ rs.is_app_limited = tp->tlp_orig_data_app_limited; -+ if (bbr_is_inflight_too_high(sk, &rs)) -+ bbr_handle_inflight_too_high(sk, &rs); -+} -+ -+/* Revert short-term model if current loss recovery event was spurious. */ -+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */ -+ bbr->loss_in_round = 0; -+ -+ /* Revert to cwnd and other state saved before loss episode. */ -+ bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); -+ bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); -+ bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); -+ bbr->try_fast_path = 0; /* take slow path to set proper cwnd, pacing */ -+ return bbr->prior_cwnd; -+} -+ -+/* Entering loss recovery, so save state for when we undo recovery. */ - __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) - { -+ struct bbr *bbr = inet_csk_ca(sk); -+ - bbr_save_cwnd(sk); -+ /* For undo, save state that adapts based on loss signal. */ -+ bbr->undo_bw_lo = bbr->bw_lo; -+ bbr->undo_inflight_lo = bbr->inflight_lo; -+ bbr->undo_inflight_hi = bbr->inflight_hi; - return tcp_sk(sk)->snd_ssthresh; - } - -+static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr) -+{ -+ switch (bbr->mode) { -+ case BBR_STARTUP: -+ return BBR_PHASE_STARTUP; -+ case BBR_DRAIN: -+ return BBR_PHASE_DRAIN; -+ case BBR_PROBE_BW: -+ break; -+ case BBR_PROBE_RTT: -+ return BBR_PHASE_PROBE_RTT; -+ default: -+ return BBR_PHASE_INVALID; -+ } -+ switch (bbr->cycle_idx) { -+ case BBR_BW_PROBE_UP: -+ return BBR_PHASE_PROBE_BW_UP; -+ case BBR_BW_PROBE_DOWN: -+ return BBR_PHASE_PROBE_BW_DOWN; -+ case BBR_BW_PROBE_CRUISE: -+ return BBR_PHASE_PROBE_BW_CRUISE; -+ case BBR_BW_PROBE_REFILL: -+ return BBR_PHASE_PROBE_BW_REFILL; -+ default: -+ return BBR_PHASE_INVALID; -+ } -+} -+ - static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, -- union tcp_cc_info *info) -+ union tcp_cc_info *info) - { - if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || - ext & (1 << (INET_DIAG_VEGASINFO - 1))) { -- struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u64 bw = bbr_bw(sk); -- -- bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; -- memset(&info->bbr, 0, sizeof(info->bbr)); -- info->bbr.bbr_bw_lo = (u32)bw; -- info->bbr.bbr_bw_hi = (u32)(bw >> 32); -- info->bbr.bbr_min_rtt = bbr->min_rtt_us; -- info->bbr.bbr_pacing_gain = bbr->pacing_gain; -- info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; -+ u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); -+ u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); -+ u64 bw_lo = bbr->bw_lo == ~0U ? -+ ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); -+ struct tcp_bbr_info *bbr_info = &info->bbr; -+ -+ memset(bbr_info, 0, sizeof(*bbr_info)); -+ bbr_info->bbr_bw_lo = (u32)bw; -+ bbr_info->bbr_bw_hi = (u32)(bw >> 32); -+ bbr_info->bbr_min_rtt = bbr->min_rtt_us; -+ bbr_info->bbr_pacing_gain = bbr->pacing_gain; -+ bbr_info->bbr_cwnd_gain = bbr->cwnd_gain; -+ bbr_info->bbr_bw_hi_lsb = (u32)bw_hi; -+ bbr_info->bbr_bw_hi_msb = (u32)(bw_hi >> 32); -+ bbr_info->bbr_bw_lo_lsb = (u32)bw_lo; -+ bbr_info->bbr_bw_lo_msb = (u32)(bw_lo >> 32); -+ bbr_info->bbr_mode = bbr->mode; -+ bbr_info->bbr_phase = (__u8)bbr_get_phase(bbr); -+ bbr_info->bbr_version = (__u8)BBR_VERSION; -+ bbr_info->bbr_inflight_lo = bbr->inflight_lo; -+ bbr_info->bbr_inflight_hi = bbr->inflight_hi; -+ bbr_info->bbr_extra_acked = bbr_extra_acked(sk); - *attr = INET_DIAG_BBRINFO; -- return sizeof(info->bbr); -+ return sizeof(*bbr_info); - } - return 0; - } - - __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) - { -+ struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - if (new_state == TCP_CA_Loss) { -- struct rate_sample rs = { .losses = 1 }; - - bbr->prev_ca_state = TCP_CA_Loss; -- bbr->full_bw = 0; -- bbr->round_start = 1; /* treat RTO like end of a round */ -- bbr_lt_bw_sampling(sk, &rs); -+ tcp_plb_update_state_upon_rto(sk, &bbr->plb); -+ /* The tcp_write_timeout() call to sk_rethink_txhash() likely -+ * repathed this flow, so re-learn the min network RTT on the -+ * new path: -+ */ -+ bbr_reset_full_bw(sk); -+ if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { -+ /* bbr_adapt_lower_bounds() needs cwnd before -+ * we suffered an RTO, to update inflight_lo: -+ */ -+ bbr->inflight_lo = -+ max(tcp_snd_cwnd(tp), bbr->prior_cwnd); -+ } -+ } else if (bbr->prev_ca_state == TCP_CA_Loss && -+ new_state != TCP_CA_Loss) { -+ bbr_exit_loss_recovery(sk); - } - } - -+ - static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { -- .flags = TCP_CONG_NON_RESTRICTED, -+ .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, - .name = "bbr", - .owner = THIS_MODULE, - .init = bbr_init, - .cong_control = bbr_main, - .sndbuf_expand = bbr_sndbuf_expand, -+ .skb_marked_lost = bbr_skb_marked_lost, - .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, - .ssthresh = bbr_ssthresh, -- .min_tso_segs = bbr_min_tso_segs, -+ .tso_segs = bbr_tso_segs, - .get_info = bbr_get_info, - .set_state = bbr_set_state, - }; -@@ -1159,10 +2359,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) - BTF_ID_FLAGS(func, bbr_init) - BTF_ID_FLAGS(func, bbr_main) - BTF_ID_FLAGS(func, bbr_sndbuf_expand) -+BTF_ID_FLAGS(func, bbr_skb_marked_lost) - BTF_ID_FLAGS(func, bbr_undo_cwnd) - BTF_ID_FLAGS(func, bbr_cwnd_event) - BTF_ID_FLAGS(func, bbr_ssthresh) --BTF_ID_FLAGS(func, bbr_min_tso_segs) -+BTF_ID_FLAGS(func, bbr_tso_segs) - BTF_ID_FLAGS(func, bbr_set_state) - BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) - -@@ -1195,5 +2396,12 @@ MODULE_AUTHOR("Van Jacobson "); - MODULE_AUTHOR("Neal Cardwell "); - MODULE_AUTHOR("Yuchung Cheng "); - MODULE_AUTHOR("Soheil Hassas Yeganeh "); -+MODULE_AUTHOR("Priyaranjan Jha "); -+MODULE_AUTHOR("Yousuk Seung "); -+MODULE_AUTHOR("Kevin Yang "); -+MODULE_AUTHOR("Arjun Roy "); -+MODULE_AUTHOR("David Morley "); -+ - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); -+MODULE_VERSION(__stringify(BBR_VERSION)); -diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c -index 28ffcfbeef14..7b13915ba288 100644 ---- a/net/ipv4/tcp_cong.c -+++ b/net/ipv4/tcp_cong.c -@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct sock *sk) - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_sk(sk)->prior_ssthresh = 0; -+ tcp_sk(sk)->fast_ack_mode = 0; - if (icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - if (tcp_ca_needs_ecn(sk)) -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 38da23f991d6..37d2b393088a 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -365,7 +365,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) - tcp_enter_quickack_mode(sk, 2); - break; - case INET_ECN_CE: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); - - if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { -@@ -376,7 +376,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) - tp->ecn_flags |= TCP_ECN_SEEN; - break; - default: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); - tp->ecn_flags |= TCP_ECN_SEEN; - break; -@@ -1115,7 +1115,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) - */ - static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) - { -+ struct sock *sk = (struct sock *)tp; -+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -+ - tp->lost += tcp_skb_pcount(skb); -+ if (ca_ops->skb_marked_lost) -+ ca_ops->skb_marked_lost(sk, skb); - } - - void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) -@@ -1496,6 +1501,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, - WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); - tcp_skb_pcount_add(skb, -pcount); - -+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ -+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, -+ "prev in_flight: %u skb in_flight: %u pcount: %u", -+ TCP_SKB_CB(prev)->tx.in_flight, -+ TCP_SKB_CB(skb)->tx.in_flight, -+ pcount)) -+ TCP_SKB_CB(skb)->tx.in_flight = 0; -+ else -+ TCP_SKB_CB(skb)->tx.in_flight -= pcount; -+ TCP_SKB_CB(prev)->tx.in_flight += pcount; -+ - /* When we're adding to gso_segs == 1, gso_size will be zero, - * in theory this shouldn't be necessary but as long as DSACK - * code can come after this skb later on it's better to keep -@@ -3790,7 +3806,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) - /* This routine deals with acks during a TLP episode and ends an episode by - * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack - */ --static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) -+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag, -+ struct rate_sample *rs) - { - struct tcp_sock *tp = tcp_sk(sk); - -@@ -3807,6 +3824,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) - /* ACK advances: there was a loss, so reduce cwnd. Reset - * tlp_high_seq in tcp_init_cwnd_reduction() - */ -+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY); - tcp_init_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); -@@ -3817,6 +3835,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) - FLAG_NOT_DUP | FLAG_DATA_SACKED))) { - /* Pure dupack: original and TLP probe arrived; no loss */ - tp->tlp_high_seq = 0; -+ } else { -+ /* This ACK matches a TLP retransmit. We cannot yet tell if -+ * this ACK is for the original or the TLP retransmit. -+ */ -+ rs->is_acking_tlp_retrans_seq = 1; - } - } - -@@ -3925,6 +3948,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - - prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; - rs.prior_in_flight = tcp_packets_in_flight(tp); -+ tcp_rate_check_app_limited(sk); - - /* ts_recent update must be made after we are sure that the packet - * is in window. -@@ -3999,7 +4023,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - tcp_rack_update_reo_wnd(sk, &rs); - - if (tp->tlp_high_seq) -- tcp_process_tlp_ack(sk, ack, flag); -+ tcp_process_tlp_ack(sk, ack, flag, &rs); - - if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | -@@ -4023,6 +4047,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - delivered = tcp_newly_delivered(sk, delivered, flag); - lost = tp->lost - lost; /* freshly marked lost */ - rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); -+ rs.is_ece = !!(flag & FLAG_ECE); - tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); - tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); - tcp_xmit_recovery(sk, rexmit); -@@ -4042,7 +4067,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - tcp_ack_probe(sk); - - if (tp->tlp_high_seq) -- tcp_process_tlp_ack(sk, ack, flag); -+ tcp_process_tlp_ack(sk, ack, flag, &rs); - return 1; - - old_ack: -@@ -5704,13 +5729,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) - - /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && -+ (tp->fast_ack_mode == 1 || - /* ... and right edge of window advances far enough. - * (tcp_recvmsg() will send ACK otherwise). - * If application uses SO_RCVLOWAT, we want send ack now if - * we have not received enough bytes to satisfy the condition. - */ -- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -- __tcp_select_window(sk) >= tp->rcv_wnd)) || -+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -+ __tcp_select_window(sk) >= tp->rcv_wnd))) || - /* We ACK each frame or... */ - tcp_in_quickack_mode(sk) || - /* Protocol state mandates a one-time immediate ACK */ -diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c -index 538c06f95918..e4c861c071ae 100644 ---- a/net/ipv4/tcp_minisocks.c -+++ b/net/ipv4/tcp_minisocks.c -@@ -460,6 +460,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) - u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); - bool ca_got_dst = false; - -+ tcp_set_ecn_low_from_dst(sk, dst); -+ - if (ca_key != TCP_CA_UNSPEC) { - const struct tcp_congestion_ops *ca; - -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 95618d0e78e4..3f4bdd2b6476 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) - bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || - tcp_ca_needs_ecn(sk) || bpf_needs_ecn; -+ const struct dst_entry *dst = __sk_dst_get(sk); - - if (!use_ecn) { -- const struct dst_entry *dst = __sk_dst_get(sk); -- - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) - use_ecn = true; - } -@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) - tp->ecn_flags = TCP_ECN_OK; - if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) - INET_ECN_xmit(sk); -+ -+ if (dst) -+ tcp_set_ecn_low_from_dst(sk, dst); - } - } - -@@ -388,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, - th->cwr = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - } -- } else if (!tcp_ca_needs_ecn(sk)) { -+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && -+ !tcp_ca_needs_ecn(sk)) { - /* ACK or retransmitted segment: clear ECT|CE */ - INET_ECN_dontxmit(sk); - } -@@ -1601,7 +1604,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - { - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *buff; -- int old_factor; -+ int old_factor, inflight_prev; - long limit; - int nlen; - u8 flags; -@@ -1676,6 +1679,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - - if (diff) - tcp_adjust_pcount(sk, skb, diff); -+ -+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; -+ if (inflight_prev < 0) { -+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( -+ old_factor, -+ TCP_SKB_CB(skb)->sacked, -+ TCP_SKB_CB(skb)->tx.in_flight), -+ "inconsistent: tx.in_flight: %u " -+ "old_factor: %d mss: %u sacked: %u " -+ "1st pcount: %d 2nd pcount: %d " -+ "1st len: %u 2nd len: %u ", -+ TCP_SKB_CB(skb)->tx.in_flight, old_factor, -+ mss_now, TCP_SKB_CB(skb)->sacked, -+ tcp_skb_pcount(skb), tcp_skb_pcount(buff), -+ skb->len, buff->len); -+ inflight_prev = 0; -+ } -+ /* Set 1st tx.in_flight as if 1st were sent by itself: */ -+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(skb); -+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */ -+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(skb) + -+ tcp_skb_pcount(buff); - } - - /* Link BUFF into the send queue. */ -@@ -2033,13 +2060,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) - { - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -- u32 min_tso, tso_segs; -- -- min_tso = ca_ops->min_tso_segs ? -- ca_ops->min_tso_segs(sk) : -- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); -+ u32 tso_segs; - -- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); -+ tso_segs = ca_ops->tso_segs ? -+ ca_ops->tso_segs(sk, mss_now) : -+ tcp_tso_autosize(sk, mss_now, -+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); - return min_t(u32, tso_segs, sk->sk_gso_max_segs); - } - -@@ -2767,6 +2793,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); - list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); - tcp_init_tso_segs(skb, mss_now); -+ tcp_set_tx_in_flight(sk, skb); - goto repair; /* Skip network transmission */ - } - -@@ -2981,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk) - if (WARN_ON(!skb || !tcp_skb_pcount(skb))) - goto rearm_timer; - -+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited; - if (__tcp_retransmit_skb(sk, skb, 1)) - goto rearm_timer; - -diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c -index a8f6d9d06f2e..8737f2134648 100644 ---- a/net/ipv4/tcp_rate.c -+++ b/net/ipv4/tcp_rate.c -@@ -34,6 +34,24 @@ - * ready to send in the write queue. - */ - -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ u32 in_flight; -+ -+ /* Check, sanitize, and record packets in flight after skb was sent. */ -+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); -+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, -+ "insane in_flight %u cc %s mss %u " -+ "cwnd %u pif %u %u %u %u\n", -+ in_flight, inet_csk(sk)->icsk_ca_ops->name, -+ tp->mss_cache, tp->snd_cwnd, -+ tp->packets_out, tp->retrans_out, -+ tp->sacked_out, tp->lost_out)) -+ in_flight = TCPCB_IN_FLIGHT_MAX; -+ TCP_SKB_CB(skb)->tx.in_flight = in_flight; -+} -+ - /* Snapshot the current delivery information in the skb, to generate - * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). - */ -@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) - TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; - TCP_SKB_CB(skb)->tx.delivered = tp->delivered; - TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; -+ TCP_SKB_CB(skb)->tx.lost = tp->lost; - TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; -+ tcp_set_tx_in_flight(sk, skb); - } - - /* When an skb is sacked or acked, we fill in the rate sample with the (prior) -@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - if (!rs->prior_delivered || - tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, - scb->end_seq, rs->last_end_seq)) { -+ rs->prior_lost = scb->tx.lost; - rs->prior_delivered_ce = scb->tx.delivered_ce; - rs->prior_delivered = scb->tx.delivered; - rs->prior_mstamp = scb->tx.delivered_mstamp; - rs->is_app_limited = scb->tx.is_app_limited; - rs->is_retrans = scb->sacked & TCPCB_RETRANS; -+ rs->tx_in_flight = scb->tx.in_flight; - rs->last_end_seq = scb->end_seq; - - /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tx_tstamp; - /* Find the duration of the "send phase" of this window: */ -- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, -- scb->tx.first_tx_mstamp); -+ rs->interval_us = tcp_stamp32_us_delta( -+ tp->first_tx_mstamp, -+ scb->tx.first_tx_mstamp); - - } - /* Mark off the skb delivered once it's sacked to avoid being -@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - return; - } - rs->delivered = tp->delivered - rs->prior_delivered; -+ rs->lost = tp->lost - rs->prior_lost; - - rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; - /* delivered_ce occupies less than 32 bits in the skb control block */ -@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - * longer phase. - */ - snd_us = rs->interval_us; /* send phase */ -- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, -+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, - rs->prior_mstamp); /* ack phase */ - rs->interval_us = max(snd_us, ack_us); - -diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index 892c86657fbc..33c2c9252364 100644 ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -693,6 +693,7 @@ void tcp_write_timer_handler(struct sock *sk) - return; - } - -+ tcp_rate_check_app_limited(sk); - tcp_mstamp_refresh(tcp_sk(sk)); - event = icsk->icsk_pending; - --- -2.46.0.rc1 - -From 3bf203491864f9a7c6c234128a2d82fb8f448683 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:23:33 +0200 -Subject: [PATCH 03/11] block - -Signed-off-by: Peter Jung ---- - block/bfq-iosched.c | 120 ++++++++++++++++++++++++++++++++++++-------- - block/bfq-iosched.h | 16 +++++- - block/mq-deadline.c | 110 +++++++++++++++++++++++++++++++++------- - 3 files changed, 203 insertions(+), 43 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 4b88a54a9b76..88df08a246fa 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -467,6 +467,21 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q) - return icq; - } - -+static struct bfq_io_cq *bfq_bic_try_lookup(struct request_queue *q) -+{ -+ if (!current->io_context) -+ return NULL; -+ if (spin_trylock_irq(&q->queue_lock)) { -+ struct bfq_io_cq *icq; -+ -+ icq = icq_to_bic(ioc_lookup_icq(q)); -+ spin_unlock_irq(&q->queue_lock); -+ return icq; -+ } -+ -+ return NULL; -+} -+ - /* - * Scheduler run of queue, if there are requests pending and no one in the - * driver that will restart queueing. -@@ -2454,10 +2469,21 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, - * returned by bfq_bic_lookup does not go away before - * bfqd->lock is taken. - */ -- struct bfq_io_cq *bic = bfq_bic_lookup(q); -+ struct bfq_io_cq *bic = bfq_bic_try_lookup(q); - bool ret; - -- spin_lock_irq(&bfqd->lock); -+ /* -+ * bio merging is called for every bio queued, and it's very easy -+ * to run into contention because of that. If we fail getting -+ * the dd lock, just skip this merge attempt. For related IO, the -+ * plug will be the successful merging point. If we get here, we -+ * already failed doing the obvious merge. Chances of actually -+ * getting a merge off this path is a lot slimmer, so skipping an -+ * occassional lookup that will most likely not succeed anyway should -+ * not be a problem. -+ */ -+ if (!spin_trylock_irq(&bfqd->lock)) -+ return false; - - if (bic) { - /* -@@ -5148,6 +5174,10 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) - { - struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; - -+ if (!list_empty_careful(&bfqd->at_head) || -+ !list_empty_careful(&bfqd->at_tail)) -+ return true; -+ - /* - * Avoiding lock: a race on bfqd->queued should cause at - * most a call to dispatch for nothing -@@ -5297,15 +5327,61 @@ static inline void bfq_update_dispatch_stats(struct request_queue *q, - bool idle_timer_disabled) {} - #endif /* CONFIG_BFQ_CGROUP_DEBUG */ - -+static void bfq_insert_request(struct request_queue *q, struct request *rq, -+ blk_insert_t flags, struct list_head *free); -+ -+static void __bfq_do_insert(struct request_queue *q, blk_insert_t flags, -+ struct list_head *list, struct list_head *free) -+{ -+ while (!list_empty(list)) { -+ struct request *rq; -+ -+ rq = list_first_entry(list, struct request, queuelist); -+ list_del_init(&rq->queuelist); -+ bfq_insert_request(q, rq, flags, free); -+ } -+} -+ -+static void bfq_do_insert(struct request_queue *q, struct list_head *free) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ LIST_HEAD(at_head); -+ LIST_HEAD(at_tail); -+ -+ spin_lock(&bfqd->insert_lock); -+ list_splice_init(&bfqd->at_head, &at_head); -+ list_splice_init(&bfqd->at_tail, &at_tail); -+ spin_unlock(&bfqd->insert_lock); -+ -+ __bfq_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free); -+ __bfq_do_insert(q, 0, &at_tail, free); -+} -+ - static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) - { -- struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; -+ struct request_queue *q = hctx->queue; -+ struct bfq_data *bfqd = q->elevator->elevator_data; - struct request *rq; - struct bfq_queue *in_serv_queue; - bool waiting_rq, idle_timer_disabled = false; -+ LIST_HEAD(free); -+ -+ /* -+ * If someone else is already dispatching, skip this one. This will -+ * defer the next dispatch event to when something completes, and could -+ * potentially lower the queue depth for contended cases. -+ * -+ * See the logic in blk_mq_do_dispatch_sched(), which loops and -+ * retries if nothing is dispatched. -+ */ -+ if (test_bit(BFQ_DISPATCHING, &bfqd->run_state) || -+ test_and_set_bit_lock(BFQ_DISPATCHING, &bfqd->run_state)) -+ return NULL; - - spin_lock_irq(&bfqd->lock); - -+ bfq_do_insert(hctx->queue, &free); -+ - in_serv_queue = bfqd->in_service_queue; - waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); - -@@ -5315,7 +5391,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); - } - -+ clear_bit_unlock(BFQ_DISPATCHING, &bfqd->run_state); - spin_unlock_irq(&bfqd->lock); -+ blk_mq_free_requests(&free); - bfq_update_dispatch_stats(hctx->queue, rq, - idle_timer_disabled ? in_serv_queue : NULL, - idle_timer_disabled); -@@ -6236,27 +6314,21 @@ static inline void bfq_update_insert_stats(struct request_queue *q, - - static struct bfq_queue *bfq_init_rq(struct request *rq); - --static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, -- blk_insert_t flags) -+static void bfq_insert_request(struct request_queue *q, struct request *rq, -+ blk_insert_t flags, struct list_head *free) - { -- struct request_queue *q = hctx->queue; - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq; - bool idle_timer_disabled = false; - blk_opf_t cmd_flags; -- LIST_HEAD(free); - - #ifdef CONFIG_BFQ_GROUP_IOSCHED - if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio) - bfqg_stats_update_legacy_io(q, rq); - #endif -- spin_lock_irq(&bfqd->lock); - bfqq = bfq_init_rq(rq); -- if (blk_mq_sched_try_insert_merge(q, rq, &free)) { -- spin_unlock_irq(&bfqd->lock); -- blk_mq_free_requests(&free); -+ if (blk_mq_sched_try_insert_merge(q, rq, free)) - return; -- } - - trace_block_rq_insert(rq); - -@@ -6286,8 +6358,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - * merge). - */ - cmd_flags = rq->cmd_flags; -- spin_unlock_irq(&bfqd->lock); -- - bfq_update_insert_stats(q, bfqq, idle_timer_disabled, - cmd_flags); - } -@@ -6296,13 +6366,15 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, - struct list_head *list, - blk_insert_t flags) - { -- while (!list_empty(list)) { -- struct request *rq; -+ struct request_queue *q = hctx->queue; -+ struct bfq_data *bfqd = q->elevator->elevator_data; - -- rq = list_first_entry(list, struct request, queuelist); -- list_del_init(&rq->queuelist); -- bfq_insert_request(hctx, rq, flags); -- } -+ spin_lock_irq(&bfqd->insert_lock); -+ if (flags & BLK_MQ_INSERT_AT_HEAD) -+ list_splice_init(list, &bfqd->at_head); -+ else -+ list_splice_init(list, &bfqd->at_tail); -+ spin_unlock_irq(&bfqd->insert_lock); - } - - static void bfq_update_hw_tag(struct bfq_data *bfqd) -@@ -7211,6 +7283,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - q->elevator = eq; - spin_unlock_irq(&q->queue_lock); - -+ spin_lock_init(&bfqd->lock); -+ spin_lock_init(&bfqd->insert_lock); -+ -+ INIT_LIST_HEAD(&bfqd->at_head); -+ INIT_LIST_HEAD(&bfqd->at_tail); -+ - /* - * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. - * Grab a permanent reference to it, so that the normal code flow -@@ -7329,8 +7407,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - /* see comments on the definition of next field inside bfq_data */ - bfqd->actuator_load_threshold = 4; - -- spin_lock_init(&bfqd->lock); -- - /* - * The invocation of the next bfq_create_group_hierarchy - * function is the head of a chain of function calls -diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h -index 467e8cfc41a2..f44f5d4ec2f4 100644 ---- a/block/bfq-iosched.h -+++ b/block/bfq-iosched.h -@@ -504,12 +504,26 @@ struct bfq_io_cq { - unsigned int requests; /* Number of requests this process has in flight */ - }; - -+enum { -+ BFQ_DISPATCHING = 0, -+}; -+ - /** - * struct bfq_data - per-device data structure. - * - * All the fields are protected by @lock. - */ - struct bfq_data { -+ struct { -+ spinlock_t lock; -+ spinlock_t insert_lock; -+ } ____cacheline_aligned_in_smp; -+ -+ unsigned long run_state; -+ -+ struct list_head at_head; -+ struct list_head at_tail; -+ - /* device request queue */ - struct request_queue *queue; - /* dispatch queue */ -@@ -795,8 +809,6 @@ struct bfq_data { - /* fallback dummy bfqq for extreme OOM conditions */ - struct bfq_queue oom_bfqq; - -- spinlock_t lock; -- - /* - * bic associated with the task issuing current bio for - * merging. This and the next field are used as a support to -diff --git a/block/mq-deadline.c b/block/mq-deadline.c -index 94eede4fb9eb..567fd69a146c 100644 ---- a/block/mq-deadline.c -+++ b/block/mq-deadline.c -@@ -79,10 +79,23 @@ struct dd_per_prio { - struct io_stats_per_prio stats; - }; - -+enum { -+ DD_DISPATCHING = 0, -+}; -+ - struct deadline_data { - /* - * run time data - */ -+ struct { -+ spinlock_t lock; -+ spinlock_t insert_lock; -+ } ____cacheline_aligned_in_smp; -+ -+ unsigned long run_state; -+ -+ struct list_head at_head; -+ struct list_head at_tail; - - struct dd_per_prio per_prio[DD_PRIO_COUNT]; - -@@ -100,8 +113,6 @@ struct deadline_data { - int front_merges; - u32 async_depth; - int prio_aging_expire; -- -- spinlock_t lock; - }; - - /* Maps an I/O priority class to a deadline scheduler priority. */ -@@ -112,6 +123,9 @@ static const enum dd_prio ioprio_class_to_prio[] = { - [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, - }; - -+static void dd_insert_request(struct request_queue *q, struct request *rq, -+ blk_insert_t flags, struct list_head *free); -+ - static inline struct rb_root * - deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) - { -@@ -451,6 +465,33 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, - return NULL; - } - -+static void __dd_do_insert(struct request_queue *q, blk_insert_t flags, -+ struct list_head *list, struct list_head *free) -+{ -+ while (!list_empty(list)) { -+ struct request *rq; -+ -+ rq = list_first_entry(list, struct request, queuelist); -+ list_del_init(&rq->queuelist); -+ dd_insert_request(q, rq, flags, free); -+ } -+} -+ -+static void dd_do_insert(struct request_queue *q, struct list_head *free) -+{ -+ struct deadline_data *dd = q->elevator->elevator_data; -+ LIST_HEAD(at_head); -+ LIST_HEAD(at_tail); -+ -+ spin_lock(&dd->insert_lock); -+ list_splice_init(&dd->at_head, &at_head); -+ list_splice_init(&dd->at_tail, &at_tail); -+ spin_unlock(&dd->insert_lock); -+ -+ __dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free); -+ __dd_do_insert(q, 0, &at_tail, free); -+} -+ - /* - * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). - * -@@ -461,12 +502,27 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, - */ - static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) - { -- struct deadline_data *dd = hctx->queue->elevator->elevator_data; -+ struct request_queue *q = hctx->queue; -+ struct deadline_data *dd = q->elevator->elevator_data; - const unsigned long now = jiffies; - struct request *rq; - enum dd_prio prio; -+ LIST_HEAD(free); -+ -+ /* -+ * If someone else is already dispatching, skip this one. This will -+ * defer the next dispatch event to when something completes, and could -+ * potentially lower the queue depth for contended cases. -+ * -+ * See the logic in blk_mq_do_dispatch_sched(), which loops and -+ * retries if nothing is dispatched. -+ */ -+ if (test_bit(DD_DISPATCHING, &dd->run_state) || -+ test_and_set_bit_lock(DD_DISPATCHING, &dd->run_state)) -+ return NULL; - - spin_lock(&dd->lock); -+ dd_do_insert(q, &free); - rq = dd_dispatch_prio_aged_requests(dd, now); - if (rq) - goto unlock; -@@ -482,8 +538,10 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) - } - - unlock: -+ clear_bit_unlock(DD_DISPATCHING, &dd->run_state); - spin_unlock(&dd->lock); - -+ blk_mq_free_requests(&free); - return rq; - } - -@@ -571,6 +629,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) - - eq->elevator_data = dd; - -+ spin_lock_init(&dd->lock); -+ spin_lock_init(&dd->insert_lock); -+ -+ INIT_LIST_HEAD(&dd->at_head); -+ INIT_LIST_HEAD(&dd->at_tail); -+ - for (prio = 0; prio <= DD_PRIO_MAX; prio++) { - struct dd_per_prio *per_prio = &dd->per_prio[prio]; - -@@ -587,7 +651,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) - dd->last_dir = DD_WRITE; - dd->fifo_batch = fifo_batch; - dd->prio_aging_expire = prio_aging_expire; -- spin_lock_init(&dd->lock); - - /* We dispatch from request queue wide instead of hw queue */ - blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); -@@ -643,7 +706,19 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio, - struct request *free = NULL; - bool ret; - -- spin_lock(&dd->lock); -+ /* -+ * bio merging is called for every bio queued, and it's very easy -+ * to run into contention because of that. If we fail getting -+ * the dd lock, just skip this merge attempt. For related IO, the -+ * plug will be the successful merging point. If we get here, we -+ * already failed doing the obvious merge. Chances of actually -+ * getting a merge off this path is a lot slimmer, so skipping an -+ * occassional lookup that will most likely not succeed anyway should -+ * not be a problem. -+ */ -+ if (!spin_trylock(&dd->lock)) -+ return false; -+ - ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); - spin_unlock(&dd->lock); - -@@ -656,10 +731,9 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio, - /* - * add rq to rbtree and fifo - */ --static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, -+static void dd_insert_request(struct request_queue *q, struct request *rq, - blk_insert_t flags, struct list_head *free) - { -- struct request_queue *q = hctx->queue; - struct deadline_data *dd = q->elevator->elevator_data; - const enum dd_data_dir data_dir = rq_data_dir(rq); - u16 ioprio = req_get_ioprio(rq); -@@ -713,19 +787,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, - { - struct request_queue *q = hctx->queue; - struct deadline_data *dd = q->elevator->elevator_data; -- LIST_HEAD(free); -- -- spin_lock(&dd->lock); -- while (!list_empty(list)) { -- struct request *rq; -- -- rq = list_first_entry(list, struct request, queuelist); -- list_del_init(&rq->queuelist); -- dd_insert_request(hctx, rq, flags, &free); -- } -- spin_unlock(&dd->lock); - -- blk_mq_free_requests(&free); -+ spin_lock(&dd->insert_lock); -+ if (flags & BLK_MQ_INSERT_AT_HEAD) -+ list_splice_init(list, &dd->at_head); -+ else -+ list_splice_init(list, &dd->at_tail); -+ spin_unlock(&dd->insert_lock); - } - - /* Callback from inside blk_mq_rq_ctx_init(). */ -@@ -766,6 +834,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) - struct deadline_data *dd = hctx->queue->elevator->elevator_data; - enum dd_prio prio; - -+ if (!list_empty_careful(&dd->at_head) || -+ !list_empty_careful(&dd->at_tail)) -+ return true; -+ - for (prio = 0; prio <= DD_PRIO_MAX; prio++) - if (dd_has_work_for_prio(&dd->per_prio[prio])) - return true; --- -2.46.0.rc1 - -From 3eb49a6c890c1da829c0ac8fe76caec909cb2103 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Fri, 19 Jul 2024 08:04:09 +0200 -Subject: [PATCH 04/11] cachy - -Signed-off-by: Peter Jung ---- - .../admin-guide/kernel-parameters.txt | 12 + - Makefile | 7 +- - arch/x86/Kconfig.cpu | 432 ++- - arch/x86/Makefile | 45 +- - arch/x86/include/asm/pci.h | 6 + - arch/x86/include/asm/vermagic.h | 76 + - arch/x86/pci/common.c | 7 +- - block/bfq-iosched.c | 6 + - block/elevator.c | 10 + - drivers/Makefile | 13 +- - drivers/ata/ahci.c | 23 +- - drivers/cpufreq/Kconfig.x86 | 2 - - drivers/cpufreq/intel_pstate.c | 2 + - drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 + - drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 53 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + - drivers/gpu/drm/amd/display/Kconfig | 6 + - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- - .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- - .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- - .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- - .../amd/display/dc/optc/dcn10/dcn10_optc.c | 15 +- - .../amd/display/dc/optc/dcn20/dcn20_optc.c | 10 + - drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + - drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- - drivers/gpu/drm/drm_atomic_uapi.c | 11 +- - drivers/i2c/busses/Kconfig | 9 + - drivers/i2c/busses/Makefile | 1 + - drivers/i2c/busses/i2c-nct6775.c | 648 ++++ - drivers/i2c/busses/i2c-piix4.c | 4 +- - drivers/input/evdev.c | 19 +- - drivers/md/dm-crypt.c | 5 + - drivers/media/v4l2-core/Kconfig | 5 + - drivers/media/v4l2-core/Makefile | 2 + - drivers/media/v4l2-core/v4l2loopback.c | 3184 +++++++++++++++++ - drivers/media/v4l2-core/v4l2loopback.h | 98 + - .../media/v4l2-core/v4l2loopback_formats.h | 445 +++ - drivers/pci/controller/Makefile | 6 + - drivers/pci/controller/intel-nvme-remap.c | 462 +++ - drivers/pci/quirks.c | 101 + - include/linux/pagemap.h | 2 +- - include/linux/user_namespace.h | 4 + - init/Kconfig | 26 + - kernel/Kconfig.hz | 24 + - kernel/fork.c | 14 + - kernel/sched/fair.c | 13 + - kernel/sched/sched.h | 2 +- - kernel/sysctl.c | 12 + - kernel/user_namespace.c | 7 + - mm/Kconfig | 2 +- - mm/compaction.c | 4 + - mm/huge_memory.c | 4 + - mm/page-writeback.c | 8 + - mm/page_alloc.c | 4 + - mm/swap.c | 5 + - mm/vmpressure.c | 4 + - mm/vmscan.c | 8 + - 58 files changed, 5800 insertions(+), 113 deletions(-) - create mode 100644 drivers/i2c/busses/i2c-nct6775.c - create mode 100644 drivers/media/v4l2-core/v4l2loopback.c - create mode 100644 drivers/media/v4l2-core/v4l2loopback.h - create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h - create mode 100644 drivers/pci/controller/intel-nvme-remap.c - -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 27ec49af1bf2..07ac4c81a7dd 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2229,6 +2229,9 @@ - disable - Do not enable intel_pstate as the default - scaling driver for the supported processors -+ enable -+ Enable intel_pstate in-case "disable" was passed -+ previously in the kernel boot parameters - active - Use intel_pstate driver to bypass the scaling - governors layer of cpufreq and provides it own -@@ -4447,6 +4450,15 @@ - nomsi [MSI] If the PCI_MSI kernel config parameter is - enabled, this kernel boot option can be used to - disable the use of MSI interrupts system-wide. -+ pcie_acs_override = -+ [PCIE] Override missing PCIe ACS support for: -+ downstream -+ All downstream ports - full ACS capabilities -+ multfunction -+ All multifunction devices - multifunction ACS subset -+ id:nnnn:nnnn -+ Specfic device - full ACS capabilities -+ Specified as vid:did (vendor/device ID) in hex - noioapicquirk [APIC] Disable all boot interrupt quirks. - Safety option to keep boot IRQs enabled. This - should never be necessary. -diff --git a/Makefile b/Makefile -index 3d10e3aadeda..b9435cef21b0 100644 ---- a/Makefile -+++ b/Makefile -@@ -817,6 +817,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks - ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE - KBUILD_CFLAGS += -O2 - KBUILD_RUSTFLAGS += -Copt-level=2 -+else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 -+KBUILD_CFLAGS += -O3 -+KBUILD_RUSTFLAGS += -Copt-level=3 - else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE - KBUILD_CFLAGS += -Os - KBUILD_RUSTFLAGS += -Copt-level=s -@@ -1005,9 +1008,9 @@ KBUILD_CFLAGS += -fno-strict-overflow - # Make sure -fstack-check isn't enabled (like gentoo apparently did) - KBUILD_CFLAGS += -fno-stack-check - --# conserve stack if available -+# conserve stack, ivopts and modulo-sched if available - ifdef CONFIG_CC_IS_GCC --KBUILD_CFLAGS += -fconserve-stack -+KBUILD_CFLAGS += -fconserve-stack -fivopts -fmodulo-sched -fno-tree-vectorize - endif - - # change __FILE__ to the relative path from the srctree -diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu -index 2a7279d80460..3b077b9f9291 100644 ---- a/arch/x86/Kconfig.cpu -+++ b/arch/x86/Kconfig.cpu -@@ -157,7 +157,7 @@ config MPENTIUM4 - - - config MK6 -- bool "K6/K6-II/K6-III" -+ bool "AMD K6/K6-II/K6-III" - depends on X86_32 - help - Select this for an AMD K6-family processor. Enables use of -@@ -165,7 +165,7 @@ config MK6 - flags to GCC. - - config MK7 -- bool "Athlon/Duron/K7" -+ bool "AMD Athlon/Duron/K7" - depends on X86_32 - help - Select this for an AMD Athlon K7-family processor. Enables use of -@@ -173,12 +173,114 @@ config MK7 - flags to GCC. - - config MK8 -- bool "Opteron/Athlon64/Hammer/K8" -+ bool "AMD Opteron/Athlon64/Hammer/K8" - help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. - Enables use of some extended instructions, and passes appropriate - optimization flags to GCC. - -+config MK8SSE3 -+ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" -+ help -+ Select this for improved AMD Opteron or Athlon64 Hammer-family processors. -+ Enables use of some extended instructions, and passes appropriate -+ optimization flags to GCC. -+ -+config MK10 -+ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" -+ help -+ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, -+ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. -+ Enables use of some extended instructions, and passes appropriate -+ optimization flags to GCC. -+ -+config MBARCELONA -+ bool "AMD Barcelona" -+ help -+ Select this for AMD Family 10h Barcelona processors. -+ -+ Enables -march=barcelona -+ -+config MBOBCAT -+ bool "AMD Bobcat" -+ help -+ Select this for AMD Family 14h Bobcat processors. -+ -+ Enables -march=btver1 -+ -+config MJAGUAR -+ bool "AMD Jaguar" -+ help -+ Select this for AMD Family 16h Jaguar processors. -+ -+ Enables -march=btver2 -+ -+config MBULLDOZER -+ bool "AMD Bulldozer" -+ help -+ Select this for AMD Family 15h Bulldozer processors. -+ -+ Enables -march=bdver1 -+ -+config MPILEDRIVER -+ bool "AMD Piledriver" -+ help -+ Select this for AMD Family 15h Piledriver processors. -+ -+ Enables -march=bdver2 -+ -+config MSTEAMROLLER -+ bool "AMD Steamroller" -+ help -+ Select this for AMD Family 15h Steamroller processors. -+ -+ Enables -march=bdver3 -+ -+config MEXCAVATOR -+ bool "AMD Excavator" -+ help -+ Select this for AMD Family 15h Excavator processors. -+ -+ Enables -march=bdver4 -+ -+config MZEN -+ bool "AMD Zen" -+ help -+ Select this for AMD Family 17h Zen processors. -+ -+ Enables -march=znver1 -+ -+config MZEN2 -+ bool "AMD Zen 2" -+ help -+ Select this for AMD Family 17h Zen 2 processors. -+ -+ Enables -march=znver2 -+ -+config MZEN3 -+ bool "AMD Zen 3" -+ depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ help -+ Select this for AMD Family 19h Zen 3 processors. -+ -+ Enables -march=znver3 -+ -+config MZEN4 -+ bool "AMD Zen 4" -+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000) -+ help -+ Select this for AMD Family 19h Zen 4 processors. -+ -+ Enables -march=znver4 -+ -+config MZEN5 -+ bool "AMD Zen 5" -+ depends on (CC_IS_GCC && GCC_VERSION >= 140000) || (CC_IS_CLANG && CLANG_VERSION >= 180000) -+ help -+ Select this for AMD Family 1Ah Zen 5 processors. -+ -+ Enables -march=znver5 -+ - config MCRUSOE - bool "Crusoe" - depends on X86_32 -@@ -270,7 +372,7 @@ config MPSC - in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. - - config MCORE2 -- bool "Core 2/newer Xeon" -+ bool "Intel Core 2" - help - - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and -@@ -278,6 +380,8 @@ config MCORE2 - family in /proc/cpuinfo. Newer ones have 6 and older ones 15 - (not a typo) - -+ Enables -march=core2 -+ - config MATOM - bool "Intel Atom" - help -@@ -287,6 +391,212 @@ config MATOM - accordingly optimized code. Use a recent GCC with specific Atom - support in order to fully benefit from selecting this option. - -+config MNEHALEM -+ bool "Intel Nehalem" -+ select X86_P6_NOP -+ help -+ -+ Select this for 1st Gen Core processors in the Nehalem family. -+ -+ Enables -march=nehalem -+ -+config MWESTMERE -+ bool "Intel Westmere" -+ select X86_P6_NOP -+ help -+ -+ Select this for the Intel Westmere formerly Nehalem-C family. -+ -+ Enables -march=westmere -+ -+config MSILVERMONT -+ bool "Intel Silvermont" -+ select X86_P6_NOP -+ help -+ -+ Select this for the Intel Silvermont platform. -+ -+ Enables -march=silvermont -+ -+config MGOLDMONT -+ bool "Intel Goldmont" -+ select X86_P6_NOP -+ help -+ -+ Select this for the Intel Goldmont platform including Apollo Lake and Denverton. -+ -+ Enables -march=goldmont -+ -+config MGOLDMONTPLUS -+ bool "Intel Goldmont Plus" -+ select X86_P6_NOP -+ help -+ -+ Select this for the Intel Goldmont Plus platform including Gemini Lake. -+ -+ Enables -march=goldmont-plus -+ -+config MSANDYBRIDGE -+ bool "Intel Sandy Bridge" -+ select X86_P6_NOP -+ help -+ -+ Select this for 2nd Gen Core processors in the Sandy Bridge family. -+ -+ Enables -march=sandybridge -+ -+config MIVYBRIDGE -+ bool "Intel Ivy Bridge" -+ select X86_P6_NOP -+ help -+ -+ Select this for 3rd Gen Core processors in the Ivy Bridge family. -+ -+ Enables -march=ivybridge -+ -+config MHASWELL -+ bool "Intel Haswell" -+ select X86_P6_NOP -+ help -+ -+ Select this for 4th Gen Core processors in the Haswell family. -+ -+ Enables -march=haswell -+ -+config MBROADWELL -+ bool "Intel Broadwell" -+ select X86_P6_NOP -+ help -+ -+ Select this for 5th Gen Core processors in the Broadwell family. -+ -+ Enables -march=broadwell -+ -+config MSKYLAKE -+ bool "Intel Skylake" -+ select X86_P6_NOP -+ help -+ -+ Select this for 6th Gen Core processors in the Skylake family. -+ -+ Enables -march=skylake -+ -+config MSKYLAKEX -+ bool "Intel Skylake X" -+ select X86_P6_NOP -+ help -+ -+ Select this for 6th Gen Core processors in the Skylake X family. -+ -+ Enables -march=skylake-avx512 -+ -+config MCANNONLAKE -+ bool "Intel Cannon Lake" -+ select X86_P6_NOP -+ help -+ -+ Select this for 8th Gen Core processors -+ -+ Enables -march=cannonlake -+ -+config MICELAKE -+ bool "Intel Ice Lake" -+ select X86_P6_NOP -+ help -+ -+ Select this for 10th Gen Core processors in the Ice Lake family. -+ -+ Enables -march=icelake-client -+ -+config MCASCADELAKE -+ bool "Intel Cascade Lake" -+ select X86_P6_NOP -+ help -+ -+ Select this for Xeon processors in the Cascade Lake family. -+ -+ Enables -march=cascadelake -+ -+config MCOOPERLAKE -+ bool "Intel Cooper Lake" -+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) -+ select X86_P6_NOP -+ help -+ -+ Select this for Xeon processors in the Cooper Lake family. -+ -+ Enables -march=cooperlake -+ -+config MTIGERLAKE -+ bool "Intel Tiger Lake" -+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) -+ select X86_P6_NOP -+ help -+ -+ Select this for third-generation 10 nm process processors in the Tiger Lake family. -+ -+ Enables -march=tigerlake -+ -+config MSAPPHIRERAPIDS -+ bool "Intel Sapphire Rapids" -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ select X86_P6_NOP -+ help -+ -+ Select this for fourth-generation 10 nm process processors in the Sapphire Rapids family. -+ -+ Enables -march=sapphirerapids -+ -+config MROCKETLAKE -+ bool "Intel Rocket Lake" -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ select X86_P6_NOP -+ help -+ -+ Select this for eleventh-generation processors in the Rocket Lake family. -+ -+ Enables -march=rocketlake -+ -+config MALDERLAKE -+ bool "Intel Alder Lake" -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ select X86_P6_NOP -+ help -+ -+ Select this for twelfth-generation processors in the Alder Lake family. -+ -+ Enables -march=alderlake -+ -+config MRAPTORLAKE -+ bool "Intel Raptor Lake" -+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) -+ select X86_P6_NOP -+ help -+ -+ Select this for thirteenth-generation processors in the Raptor Lake family. -+ -+ Enables -march=raptorlake -+ -+config MMETEORLAKE -+ bool "Intel Meteor Lake" -+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) -+ select X86_P6_NOP -+ help -+ -+ Select this for fourteenth-generation processors in the Meteor Lake family. -+ -+ Enables -march=meteorlake -+ -+config MEMERALDRAPIDS -+ bool "Intel Emerald Rapids" -+ depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500) -+ select X86_P6_NOP -+ help -+ -+ Select this for fifth-generation 10 nm process processors in the Emerald Rapids family. -+ -+ Enables -march=emeraldrapids -+ - config GENERIC_CPU - bool "Generic-x86-64" - depends on X86_64 -@@ -294,6 +604,50 @@ config GENERIC_CPU - Generic x86-64 CPU. - Run equally well on all x86-64 CPUs. - -+config GENERIC_CPU2 -+ bool "Generic-x86-64-v2" -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ depends on X86_64 -+ help -+ Generic x86-64 CPU. -+ Run equally well on all x86-64 CPUs with min support of x86-64-v2. -+ -+config GENERIC_CPU3 -+ bool "Generic-x86-64-v3" -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ depends on X86_64 -+ help -+ Generic x86-64-v3 CPU with v3 instructions. -+ Run equally well on all x86-64 CPUs with min support of x86-64-v3. -+ -+config GENERIC_CPU4 -+ bool "Generic-x86-64-v4" -+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) -+ depends on X86_64 -+ help -+ Generic x86-64 CPU with v4 instructions. -+ Run equally well on all x86-64 CPUs with min support of x86-64-v4. -+ -+config MNATIVE_INTEL -+ bool "Intel-Native optimizations autodetected by the compiler" -+ help -+ -+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects -+ the optimum settings to use based on your processor. Do NOT use this -+ for AMD CPUs. Intel Only! -+ -+ Enables -march=native -+ -+config MNATIVE_AMD -+ bool "AMD-Native optimizations autodetected by the compiler" -+ help -+ -+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects -+ the optimum settings to use based on your processor. Do NOT use this -+ for Intel CPUs. AMD Only! -+ -+ Enables -march=native -+ - endchoice - - config X86_GENERIC -@@ -318,9 +672,17 @@ config X86_INTERNODE_CACHE_SHIFT - config X86_L1_CACHE_SHIFT - int - default "7" if MPENTIUM4 || MPSC -- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU -+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 \ -+ || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER \ -+ || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT \ -+ || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL \ -+ || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \ -+ || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE \ -+ || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 \ -+ || GENERIC_CPU3 || GENERIC_CPU4 - default "4" if MELAN || M486SX || M486 || MGEODEGX1 -- default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -+ default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII \ -+ || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - - config X86_F00F_BUG - def_bool y -@@ -332,15 +694,27 @@ config X86_INVD_BUG - - config X86_ALIGNMENT_16 - def_bool y -- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1 -+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC \ -+ || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1 - - config X86_INTEL_USERCOPY - def_bool y -- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 -+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC \ -+ || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \ -+ || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX \ -+ || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS \ -+ || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL - - config X86_USE_PPRO_CHECKSUM - def_bool y -- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM -+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM \ -+ || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX \ -+ || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER \ -+ || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM \ -+ || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE \ -+ || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE \ -+ || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE \ -+ || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD - - # - # P6_NOPs are a relatively minor optimization that require a family >= -@@ -356,11 +730,22 @@ config X86_USE_PPRO_CHECKSUM - config X86_P6_NOP - def_bool y - depends on X86_64 -- depends on (MCORE2 || MPENTIUM4 || MPSC) -+ depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \ -+ || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE \ -+ || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE \ -+ || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS \ -+ || MNATIVE_INTEL) - - config X86_TSC - def_bool y -- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 -+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM \ -+ || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 \ -+ || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER \ -+ || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM \ -+ || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL \ -+ || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \ -+ || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS \ -+ || MNATIVE_INTEL || MNATIVE_AMD) || X86_64 - - config X86_HAVE_PAE - def_bool y -@@ -368,18 +753,37 @@ config X86_HAVE_PAE - - config X86_CMPXCHG64 - def_bool y -- depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 -+ depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \ -+ || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 \ -+ || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN \ -+ || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS \ -+ || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE \ -+ || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE \ -+ || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD - - # this should be set for all -march=.. options where the compiler - # generates cmov. - config X86_CMOV - def_bool y -- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) -+ depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \ -+ || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 \ -+ || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR \ -+ || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \ -+ || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX \ -+ || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS \ -+ || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD) - - config X86_MINIMUM_CPU_FAMILY - int - default "64" if X86_64 -- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) -+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \ -+ || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 || MK8SSE3 \ -+ || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER \ -+ || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT \ -+ || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL \ -+ || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \ -+ || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MRAPTORLAKE \ -+ || MNATIVE_INTEL || MNATIVE_AMD) - default "5" if X86_32 && X86_CMPXCHG64 - default "4" - -diff --git a/arch/x86/Makefile b/arch/x86/Makefile -index 801fd85c3ef6..93cc88b59cbb 100644 ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -176,8 +176,49 @@ else - # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) - cflags-$(CONFIG_MK8) += -march=k8 - cflags-$(CONFIG_MPSC) += -march=nocona -- cflags-$(CONFIG_MCORE2) += -march=core2 -- cflags-$(CONFIG_MATOM) += -march=atom -+ cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3 -+ cflags-$(CONFIG_MK10) += -march=amdfam10 -+ cflags-$(CONFIG_MBARCELONA) += -march=barcelona -+ cflags-$(CONFIG_MBOBCAT) += -march=btver1 -+ cflags-$(CONFIG_MJAGUAR) += -march=btver2 -+ cflags-$(CONFIG_MBULLDOZER) += -march=bdver1 -+ cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm -+ cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm -+ cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm -+ cflags-$(CONFIG_MZEN) += -march=znver1 -+ cflags-$(CONFIG_MZEN2) += -march=znver2 -+ cflags-$(CONFIG_MZEN3) += -march=znver3 -+ cflags-$(CONFIG_MZEN4) += -march=znver4 -+ cflags-$(CONFIG_MZEN5) += -march=znver5 -+ cflags-$(CONFIG_MNATIVE_INTEL) += -march=native -+ cflags-$(CONFIG_MNATIVE_AMD) += -march=native -+ cflags-$(CONFIG_MATOM) += -march=bonnell -+ cflags-$(CONFIG_MCORE2) += -march=core2 -+ cflags-$(CONFIG_MNEHALEM) += -march=nehalem -+ cflags-$(CONFIG_MWESTMERE) += -march=westmere -+ cflags-$(CONFIG_MSILVERMONT) += -march=silvermont -+ cflags-$(CONFIG_MGOLDMONT) += -march=goldmont -+ cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus -+ cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge -+ cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge -+ cflags-$(CONFIG_MHASWELL) += -march=haswell -+ cflags-$(CONFIG_MBROADWELL) += -march=broadwell -+ cflags-$(CONFIG_MSKYLAKE) += -march=skylake -+ cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512 -+ cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake -+ cflags-$(CONFIG_MICELAKE) += -march=icelake-client -+ cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake -+ cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake -+ cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake -+ cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids -+ cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake -+ cflags-$(CONFIG_MALDERLAKE) += -march=alderlake -+ cflags-$(CONFIG_MRAPTORLAKE) += -march=raptorlake -+ cflags-$(CONFIG_MMETEORLAKE) += -march=meteorlake -+ cflags-$(CONFIG_MEMERALDRAPIDS) += -march=emeraldrapids -+ cflags-$(CONFIG_GENERIC_CPU2) += -march=x86-64-v2 -+ cflags-$(CONFIG_GENERIC_CPU3) += -march=x86-64-v3 -+ cflags-$(CONFIG_GENERIC_CPU4) += -march=x86-64-v4 - cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic - KBUILD_CFLAGS += $(cflags-y) - -diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h -index b3ab80a03365..5e883b397ff3 100644 ---- a/arch/x86/include/asm/pci.h -+++ b/arch/x86/include/asm/pci.h -@@ -26,6 +26,7 @@ struct pci_sysdata { - #if IS_ENABLED(CONFIG_VMD) - struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ - #endif -+ struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */ - }; - - extern int pci_routeirq; -@@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus) - #define is_vmd(bus) false - #endif /* CONFIG_VMD */ - -+static inline bool is_nvme_remap(struct pci_bus *bus) -+{ -+ return to_pci_sysdata(bus)->nvme_remap_dev != NULL; -+} -+ - /* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ -diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h -index 75884d2cdec3..7acca9b5a9d5 100644 ---- a/arch/x86/include/asm/vermagic.h -+++ b/arch/x86/include/asm/vermagic.h -@@ -17,6 +17,54 @@ - #define MODULE_PROC_FAMILY "586MMX " - #elif defined CONFIG_MCORE2 - #define MODULE_PROC_FAMILY "CORE2 " -+#elif defined CONFIG_MNATIVE_INTEL -+#define MODULE_PROC_FAMILY "NATIVE_INTEL " -+#elif defined CONFIG_MNATIVE_AMD -+#define MODULE_PROC_FAMILY "NATIVE_AMD " -+#elif defined CONFIG_MNEHALEM -+#define MODULE_PROC_FAMILY "NEHALEM " -+#elif defined CONFIG_MWESTMERE -+#define MODULE_PROC_FAMILY "WESTMERE " -+#elif defined CONFIG_MSILVERMONT -+#define MODULE_PROC_FAMILY "SILVERMONT " -+#elif defined CONFIG_MGOLDMONT -+#define MODULE_PROC_FAMILY "GOLDMONT " -+#elif defined CONFIG_MGOLDMONTPLUS -+#define MODULE_PROC_FAMILY "GOLDMONTPLUS " -+#elif defined CONFIG_MSANDYBRIDGE -+#define MODULE_PROC_FAMILY "SANDYBRIDGE " -+#elif defined CONFIG_MIVYBRIDGE -+#define MODULE_PROC_FAMILY "IVYBRIDGE " -+#elif defined CONFIG_MHASWELL -+#define MODULE_PROC_FAMILY "HASWELL " -+#elif defined CONFIG_MBROADWELL -+#define MODULE_PROC_FAMILY "BROADWELL " -+#elif defined CONFIG_MSKYLAKE -+#define MODULE_PROC_FAMILY "SKYLAKE " -+#elif defined CONFIG_MSKYLAKEX -+#define MODULE_PROC_FAMILY "SKYLAKEX " -+#elif defined CONFIG_MCANNONLAKE -+#define MODULE_PROC_FAMILY "CANNONLAKE " -+#elif defined CONFIG_MICELAKE -+#define MODULE_PROC_FAMILY "ICELAKE " -+#elif defined CONFIG_MCASCADELAKE -+#define MODULE_PROC_FAMILY "CASCADELAKE " -+#elif defined CONFIG_MCOOPERLAKE -+#define MODULE_PROC_FAMILY "COOPERLAKE " -+#elif defined CONFIG_MTIGERLAKE -+#define MODULE_PROC_FAMILY "TIGERLAKE " -+#elif defined CONFIG_MSAPPHIRERAPIDS -+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " -+#elif defined CONFIG_ROCKETLAKE -+#define MODULE_PROC_FAMILY "ROCKETLAKE " -+#elif defined CONFIG_MALDERLAKE -+#define MODULE_PROC_FAMILY "ALDERLAKE " -+#elif defined CONFIG_MRAPTORLAKE -+#define MODULE_PROC_FAMILY "RAPTORLAKE " -+#elif defined CONFIG_MMETEORLAKE -+#define MODULE_PROC_FAMILY "METEORLAKE " -+#elif defined CONFIG_MEMERALDRAPIDS -+#define MODULE_PROC_FAMILY "EMERALDRAPIDS " - #elif defined CONFIG_MATOM - #define MODULE_PROC_FAMILY "ATOM " - #elif defined CONFIG_M686 -@@ -35,6 +83,34 @@ - #define MODULE_PROC_FAMILY "K7 " - #elif defined CONFIG_MK8 - #define MODULE_PROC_FAMILY "K8 " -+#elif defined CONFIG_MK8SSE3 -+#define MODULE_PROC_FAMILY "K8SSE3 " -+#elif defined CONFIG_MK10 -+#define MODULE_PROC_FAMILY "K10 " -+#elif defined CONFIG_MBARCELONA -+#define MODULE_PROC_FAMILY "BARCELONA " -+#elif defined CONFIG_MBOBCAT -+#define MODULE_PROC_FAMILY "BOBCAT " -+#elif defined CONFIG_MBULLDOZER -+#define MODULE_PROC_FAMILY "BULLDOZER " -+#elif defined CONFIG_MPILEDRIVER -+#define MODULE_PROC_FAMILY "PILEDRIVER " -+#elif defined CONFIG_MSTEAMROLLER -+#define MODULE_PROC_FAMILY "STEAMROLLER " -+#elif defined CONFIG_MJAGUAR -+#define MODULE_PROC_FAMILY "JAGUAR " -+#elif defined CONFIG_MEXCAVATOR -+#define MODULE_PROC_FAMILY "EXCAVATOR " -+#elif defined CONFIG_MZEN -+#define MODULE_PROC_FAMILY "ZEN " -+#elif defined CONFIG_MZEN2 -+#define MODULE_PROC_FAMILY "ZEN2 " -+#elif defined CONFIG_MZEN3 -+#define MODULE_PROC_FAMILY "ZEN3 " -+#elif defined CONFIG_MZEN4 -+#define MODULE_PROC_FAMILY "ZEN4 " -+#elif defined CONFIG_MZEN5 -+#define MODULE_PROC_FAMILY "ZEN5 " - #elif defined CONFIG_MELAN - #define MODULE_PROC_FAMILY "ELAN " - #elif defined CONFIG_MCRUSOE -diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c -index ddb798603201..7c20387d8202 100644 ---- a/arch/x86/pci/common.c -+++ b/arch/x86/pci/common.c -@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void) - return 0; - } - --#if IS_ENABLED(CONFIG_VMD) - struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) - { -+#if IS_ENABLED(CONFIG_VMD) - if (is_vmd(dev->bus)) - return to_pci_sysdata(dev->bus)->vmd_dev; -+#endif -+ -+ if (is_nvme_remap(dev->bus)) -+ return to_pci_sysdata(dev->bus)->nvme_remap_dev; - - return dev; - } --#endif -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 88df08a246fa..deecce63d0fc 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -7703,6 +7703,7 @@ MODULE_ALIAS("bfq-iosched"); - static int __init bfq_init(void) - { - int ret; -+ char msg[60] = "BFQ I/O-scheduler: BFQ-CachyOS v6.10"; - - #ifdef CONFIG_BFQ_GROUP_IOSCHED - ret = blkcg_policy_register(&blkcg_policy_bfq); -@@ -7734,6 +7735,11 @@ static int __init bfq_init(void) - if (ret) - goto slab_kill; - -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ strcat(msg, " (with cgroups support)"); -+#endif -+ pr_info("%s", msg); -+ - return 0; - - slab_kill: -diff --git a/block/elevator.c b/block/elevator.c -index f64ebd726e58..4f1ccf8cf250 100644 ---- a/block/elevator.c -+++ b/block/elevator.c -@@ -567,9 +567,19 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) - - if (q->nr_hw_queues != 1 && - !blk_mq_is_shared_tags(q->tag_set->flags)) -+#if defined(CONFIG_CACHY) && defined(CONFIG_MQ_IOSCHED_KYBER) -+ return elevator_find_get(q, "kyber"); -+#elif defined(CONFIG_CACHY) -+ return elevator_find_get(q, "mq-deadline"); -+#else - return NULL; -+#endif - -+#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ) -+ return elevator_find_get(q, "bfq"); -+#else - return elevator_find_get(q, "mq-deadline"); -+#endif - } - - /* -diff --git a/drivers/Makefile b/drivers/Makefile -index fe9ceb0d2288..b58955caf19b 100644 ---- a/drivers/Makefile -+++ b/drivers/Makefile -@@ -61,14 +61,8 @@ obj-y += char/ - # iommu/ comes before gpu as gpu are using iommu controllers - obj-y += iommu/ - --# gpu/ comes after char for AGP vs DRM startup and after iommu --obj-y += gpu/ -- - obj-$(CONFIG_CONNECTOR) += connector/ - --# i810fb depends on char/agp/ --obj-$(CONFIG_FB_I810) += video/fbdev/i810/ -- - obj-$(CONFIG_PARPORT) += parport/ - obj-y += base/ block/ misc/ mfd/ nfc/ - obj-$(CONFIG_LIBNVDIMM) += nvdimm/ -@@ -80,6 +74,13 @@ obj-y += macintosh/ - obj-y += scsi/ - obj-y += nvme/ - obj-$(CONFIG_ATA) += ata/ -+ -+# gpu/ comes after char for AGP vs DRM startup and after iommu -+obj-y += gpu/ -+ -+# i810fb depends on char/agp/ -+obj-$(CONFIG_FB_I810) += video/fbdev/i810/ -+ - obj-$(CONFIG_TARGET_CORE) += target/ - obj-$(CONFIG_MTD) += mtd/ - obj-$(CONFIG_SPI) += spi/ -diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c -index fc6fd583faf8..f79e205a51dd 100644 ---- a/drivers/ata/ahci.c -+++ b/drivers/ata/ahci.c -@@ -1618,7 +1618,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) - } - #endif - --static void ahci_remap_check(struct pci_dev *pdev, int bar, -+static int ahci_remap_check(struct pci_dev *pdev, int bar, - struct ahci_host_priv *hpriv) - { - int i; -@@ -1631,7 +1631,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, - pci_resource_len(pdev, bar) < SZ_512K || - bar != AHCI_PCI_BAR_STANDARD || - !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) -- return; -+ return 0; - - cap = readq(hpriv->mmio + AHCI_REMAP_CAP); - for (i = 0; i < AHCI_MAX_REMAP; i++) { -@@ -1646,18 +1646,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, - } - - if (!hpriv->remapped_nvme) -- return; -- -- dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", -- hpriv->remapped_nvme); -- dev_warn(&pdev->dev, -- "Switch your BIOS from RAID to AHCI mode to use them.\n"); -+ return 0; - -- /* -- * Don't rely on the msi-x capability in the remap case, -- * share the legacy interrupt across ahci and remapped devices. -- */ -- hpriv->flags |= AHCI_HFLAG_NO_MSI; -+ /* Abort probe, allowing intel-nvme-remap to step in when available */ -+ dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n"); -+ return -ENODEV; - } - - static int ahci_get_irq_vector(struct ata_host *host, int port) -@@ -1894,7 +1887,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; - - /* detect remapped nvme devices */ -- ahci_remap_check(pdev, ahci_pci_bar, hpriv); -+ rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv); -+ if (rc) -+ return rc; - - sysfs_add_file_to_group(&pdev->dev.kobj, - &dev_attr_remapped_nvme.attr, -diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 -index 97c2d4f15d76..5a3af44d785a 100644 ---- a/drivers/cpufreq/Kconfig.x86 -+++ b/drivers/cpufreq/Kconfig.x86 -@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE - select ACPI_PROCESSOR if ACPI - select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO - select CPU_FREQ_GOV_PERFORMANCE -- select CPU_FREQ_GOV_SCHEDUTIL if SMP - help - This driver provides a P state for Intel core processors. - The driver implements an internal governor and will become -@@ -39,7 +38,6 @@ config X86_AMD_PSTATE - depends on X86 && ACPI - select ACPI_PROCESSOR - select ACPI_CPPC_LIB if X86_64 -- select CPU_FREQ_GOV_SCHEDUTIL if SMP - help - This driver adds a CPUFreq driver which utilizes a fine grain - processor performance frequency control range instead of legacy -diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index c31914a9876f..1035c074f36a 100644 ---- a/drivers/cpufreq/intel_pstate.c -+++ b/drivers/cpufreq/intel_pstate.c -@@ -3550,6 +3550,8 @@ static int __init intel_pstate_setup(char *str) - - if (!strcmp(str, "disable")) - no_load = 1; -+ else if (!strcmp(str, "enable")) -+ no_load = 0; - else if (!strcmp(str, "active")) - default_driver = &intel_pstate; - else if (!strcmp(str, "passive")) -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -index f87d53e183c3..c489d3b2576b 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h -@@ -159,6 +159,7 @@ struct amdgpu_watchdog_timer { - */ - extern int amdgpu_modeset; - extern unsigned int amdgpu_vram_limit; -+extern int amdgpu_ignore_min_pcap; - extern int amdgpu_vis_vram_limit; - extern int amdgpu_gart_size; - extern int amdgpu_gtt_size; -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index ea14f1c8f430..bb0b636d0d75 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -132,6 +132,7 @@ enum AMDGPU_DEBUG_MASK { - }; - - unsigned int amdgpu_vram_limit = UINT_MAX; -+int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */ - int amdgpu_vis_vram_limit; - int amdgpu_gart_size = -1; /* auto */ - int amdgpu_gtt_size = -1; /* auto */ -@@ -243,6 +244,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { - .period = 0x0, /* default to 0x0 (timeout disable) */ - }; - -+/** -+ * DOC: ignore_min_pcap (int) -+ * Ignore the minimum power cap. -+ * Useful on graphics cards where the minimum power cap is very high. -+ * The default is 0 (Do not ignore). -+ */ -+MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap"); -+module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600); -+ - /** - * DOC: vramlimit (int) - * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c -index 677eb141554e..ceb3f1e4ed1d 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c -@@ -151,6 +151,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) - } - } - -+ /* from vcn4 and above, only unified queue is used */ -+ adev->vcn.using_unified_queue = -+ amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); -+ - hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; - adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); - -@@ -279,18 +283,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) - return 0; - } - --/* from vcn4 and above, only unified queue is used */ --static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) --{ -- struct amdgpu_device *adev = ring->adev; -- bool ret = false; -- -- if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) -- ret = true; -- -- return ret; --} -- - bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) - { - bool ret = false; -@@ -401,7 +393,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); - -- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { -+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ -+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && -+ !adev->vcn.using_unified_queue) { - struct dpg_pause_state new_state; - - if (fence[j] || -@@ -447,7 +441,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); - -- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { -+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ -+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && -+ !adev->vcn.using_unified_queue) { - struct dpg_pause_state new_state; - - if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { -@@ -473,8 +469,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) - - void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) - { -+ struct amdgpu_device *adev = ring->adev; -+ -+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ - if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && -- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) -+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && -+ !adev->vcn.using_unified_queue) - atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); - - atomic_dec(&ring->adev->vcn.total_submission_cnt); -@@ -728,12 +728,11 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, - struct amdgpu_job *job; - struct amdgpu_ib *ib; - uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); -- bool sq = amdgpu_vcn_using_unified_queue(ring); - uint32_t *ib_checksum; - uint32_t ib_pack_in_dw; - int i, r; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - ib_size_dw += 8; - - r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, -@@ -746,7 +745,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, - ib->length_dw = 0; - - /* single queue headers */ -- if (sq) { -+ if (adev->vcn.using_unified_queue) { - ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) - + 4 + 2; /* engine info + decoding ib in dw */ - ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); -@@ -765,7 +764,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); - - r = amdgpu_job_submit_direct(job, ring, &f); -@@ -855,15 +854,15 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand - struct dma_fence **fence) - { - unsigned int ib_size_dw = 16; -+ struct amdgpu_device *adev = ring->adev; - struct amdgpu_job *job; - struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - uint32_t *ib_checksum = NULL; - uint64_t addr; -- bool sq = amdgpu_vcn_using_unified_queue(ring); - int i, r; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - ib_size_dw += 8; - - r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, -@@ -877,7 +876,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand - - ib->length_dw = 0; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); - - ib->ptr[ib->length_dw++] = 0x00000018; -@@ -899,7 +898,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); - - r = amdgpu_job_submit_direct(job, ring, &f); -@@ -922,15 +921,15 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han - struct dma_fence **fence) - { - unsigned int ib_size_dw = 16; -+ struct amdgpu_device *adev = ring->adev; - struct amdgpu_job *job; - struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - uint32_t *ib_checksum = NULL; - uint64_t addr; -- bool sq = amdgpu_vcn_using_unified_queue(ring); - int i, r; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - ib_size_dw += 8; - - r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, -@@ -944,7 +943,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han - - ib->length_dw = 0; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); - - ib->ptr[ib->length_dw++] = 0x00000018; -@@ -966,7 +965,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; - -- if (sq) -+ if (adev->vcn.using_unified_queue) - amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); - - r = amdgpu_job_submit_direct(job, ring, &f); -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h -index 9f06def236fd..1a5439abd1a0 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h -@@ -329,6 +329,7 @@ struct amdgpu_vcn { - - uint16_t inst_mask; - uint8_t num_inst_per_aid; -+ bool using_unified_queue; - }; - - struct amdgpu_fw_shared_rb_ptrs_struct { -diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig -index 47b8b49da8a7..943959d1f401 100644 ---- a/drivers/gpu/drm/amd/display/Kconfig -+++ b/drivers/gpu/drm/amd/display/Kconfig -@@ -51,4 +51,10 @@ config DRM_AMD_SECURE_DISPLAY - This option enables the calculation of crc of specific region via - debugfs. Cooperate with specific DMCU FW. - -+config AMD_PRIVATE_COLOR -+ bool "Enable KMS color management by AMD for AMD" -+ default n -+ help -+ This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck. -+ - endmenu -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index 3cdcadd41be1..8c0b165ec7fb 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4118,7 +4118,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) - return r; - } - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - if (amdgpu_dm_create_color_properties(adev)) - return -ENOMEM; - #endif -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -index ebabfe3a512f..4d3ebcaacca1 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -@@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) - return val; - } - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - /* Pre-defined Transfer Functions (TF) - * - * AMD driver supports pre-defined mathematical functions for transferring -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -index e23a0a276e33..dd83cf50a89b 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -@@ -338,7 +338,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) - } - #endif - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - /** - * dm_crtc_additional_color_mgmt - enable additional color properties - * @crtc: DRM CRTC -@@ -420,7 +420,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - #if defined(CONFIG_DEBUG_FS) - .late_register = amdgpu_dm_crtc_late_register, - #endif --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, - .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, - #endif -@@ -599,7 +599,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - - drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - dm_crtc_additional_color_mgmt(&acrtc->base); - #endif - return 0; -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -index 8a4c40b4c27e..779880c64575 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -@@ -1468,7 +1468,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, - drm_atomic_helper_plane_destroy_state(plane, state); - } - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - static void - dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, - struct drm_plane *plane) -@@ -1659,7 +1659,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { - .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, - .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, - .format_mod_supported = amdgpu_dm_plane_format_mod_supported, --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - .atomic_set_property = dm_atomic_plane_set_property, - .atomic_get_property = dm_atomic_plane_get_property, - #endif -@@ -1742,7 +1742,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - - drm_plane_helper_add(plane, &dm_plane_helper_funcs); - --#ifdef AMD_PRIVATE_COLOR -+#ifdef CONFIG_AMD_PRIVATE_COLOR - dm_atomic_plane_attach_color_mgmt_properties(dm, plane); - #endif - /* Create (reset) the plane state */ -diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c -index 5574bc628053..f109a101d84f 100644 ---- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c -+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c -@@ -945,19 +945,10 @@ void optc1_set_drr( - OTG_FORCE_LOCK_ON_EVENT, 0, - OTG_SET_V_TOTAL_MIN_MASK_EN, 0, - OTG_SET_V_TOTAL_MIN_MASK, 0); -- -- // Setup manual flow control for EOF via TRIG_A -- optc->funcs->setup_manual_trigger(optc); -- -- } else { -- REG_UPDATE_4(OTG_V_TOTAL_CONTROL, -- OTG_SET_V_TOTAL_MIN_MASK, 0, -- OTG_V_TOTAL_MIN_SEL, 0, -- OTG_V_TOTAL_MAX_SEL, 0, -- OTG_FORCE_LOCK_ON_EVENT, 0); -- -- optc->funcs->set_vtotal_min_max(optc, 0, 0); - } -+ -+ // Setup manual flow control for EOF via TRIG_A -+ optc->funcs->setup_manual_trigger(optc); - } - - void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) -diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c -index d6f095b4555d..58bdbd859bf9 100644 ---- a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c -+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c -@@ -462,6 +462,16 @@ void optc2_setup_manual_trigger(struct timing_generator *optc) - { - struct optc *optc1 = DCN10TG_FROM_TG(optc); - -+ /* Set the min/max selectors unconditionally so that -+ * DMCUB fw may change OTG timings when necessary -+ * TODO: Remove the w/a after fixing the issue in DMCUB firmware -+ */ -+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL, -+ OTG_V_TOTAL_MIN_SEL, 1, -+ OTG_V_TOTAL_MAX_SEL, 1, -+ OTG_FORCE_LOCK_ON_EVENT, 0, -+ OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ -+ - REG_SET_8(OTG_TRIGA_CNTL, 0, - OTG_TRIGA_SOURCE_SELECT, 21, - OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, -diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -index c11952a4389b..52f54a228b39 100644 ---- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c -+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -@@ -3155,6 +3155,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, - struct device_attribute *attr, - char *buf) - { -+ if (amdgpu_ignore_min_pcap) -+ return sysfs_emit(buf, "%i\n", 0); -+ - return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); - } - -diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index e1796ecf9c05..5e46bd293205 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -@@ -2749,7 +2749,10 @@ int smu_get_power_limit(void *handle, - *limit = smu->max_power_limit; - break; - case SMU_PPT_LIMIT_MIN: -- *limit = smu->min_power_limit; -+ if (amdgpu_ignore_min_pcap) -+ *limit = 0; -+ else -+ *limit = smu->min_power_limit; - break; - default: - return -EINVAL; -@@ -2773,7 +2776,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) - if (smu->ppt_funcs->set_power_limit) - return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - -- if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { -+ if (amdgpu_ignore_min_pcap) { -+ if ((limit > smu->max_power_limit)) { -+ dev_err(smu->adev->dev, -+ "New power limit (%d) is over the max allowed %d\n", -+ limit, smu->max_power_limit); -+ return -EINVAL; -+ } -+ } else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { - dev_err(smu->adev->dev, - "New power limit (%d) is out of range [%d,%d]\n", - limit, smu->min_power_limit, smu->max_power_limit); -diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c -index fc16fddee5c5..05b21fe9b395 100644 ---- a/drivers/gpu/drm/drm_atomic_uapi.c -+++ b/drivers/gpu/drm/drm_atomic_uapi.c -@@ -1066,21 +1066,14 @@ int drm_atomic_set_property(struct drm_atomic_state *state, - break; - } - -- if (async_flip && prop != config->prop_fb_id) { -+ if (async_flip && (prop != config->prop_fb_id || -+ plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY)) { - ret = drm_atomic_plane_get_property(plane, plane_state, - prop, &old_val); - ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); - break; - } - -- if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { -- drm_dbg_atomic(prop->dev, -- "[OBJECT:%d] Only primary planes can be changed during async flip\n", -- obj->id); -- ret = -EINVAL; -- break; -- } -- - ret = drm_atomic_plane_set_property(plane, - plane_state, file_priv, - prop, prop_value); -diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig -index fe6e8a1bb607..1488a904e3bf 100644 ---- a/drivers/i2c/busses/Kconfig -+++ b/drivers/i2c/busses/Kconfig -@@ -238,6 +238,15 @@ config I2C_CHT_WC - combined with a FUSB302 Type-C port-controller as such it is advised - to also select CONFIG_TYPEC_FUSB302=m. - -+config I2C_NCT6775 -+ tristate "Nuvoton NCT6775 and compatible SMBus controller" -+ help -+ If you say yes to this option, support will be included for the -+ Nuvoton NCT6775 and compatible SMBus controllers. -+ -+ This driver can also be built as a module. If so, the module -+ will be called i2c-nct6775. -+ - config I2C_NFORCE2 - tristate "Nvidia nForce2, nForce3 and nForce4" - depends on PCI && HAS_IOPORT -diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile -index 78d0561339e5..9ea3a294f9f0 100644 ---- a/drivers/i2c/busses/Makefile -+++ b/drivers/i2c/busses/Makefile -@@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_CHT_WC) += i2c-cht-wc.o - obj-$(CONFIG_I2C_I801) += i2c-i801.o - obj-$(CONFIG_I2C_ISCH) += i2c-isch.o - obj-$(CONFIG_I2C_ISMT) += i2c-ismt.o -+obj-$(CONFIG_I2C_NCT6775) += i2c-nct6775.o - obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o - obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o - obj-$(CONFIG_I2C_NVIDIA_GPU) += i2c-nvidia-gpu.o -diff --git a/drivers/i2c/busses/i2c-nct6775.c b/drivers/i2c/busses/i2c-nct6775.c -new file mode 100644 -index 000000000000..fdbd9a1c8d7a ---- /dev/null -+++ b/drivers/i2c/busses/i2c-nct6775.c -@@ -0,0 +1,648 @@ -+/* -+ * i2c-nct6775 - Driver for the SMBus master functionality of -+ * Nuvoton NCT677x Super-I/O chips -+ * -+ * Copyright (C) 2019 Adam Honse -+ * -+ * Derived from nct6775 hwmon driver -+ * Copyright (C) 2012 Guenter Roeck -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DRVNAME "i2c-nct6775" -+ -+/* Nuvoton SMBus address offsets */ -+#define SMBHSTDAT (0 + nuvoton_nct6793d_smba) -+#define SMBBLKSZ (1 + nuvoton_nct6793d_smba) -+#define SMBHSTCMD (2 + nuvoton_nct6793d_smba) -+#define SMBHSTIDX (3 + nuvoton_nct6793d_smba) //Index field is the Command field on other controllers -+#define SMBHSTCTL (4 + nuvoton_nct6793d_smba) -+#define SMBHSTADD (5 + nuvoton_nct6793d_smba) -+#define SMBHSTERR (9 + nuvoton_nct6793d_smba) -+#define SMBHSTSTS (0xE + nuvoton_nct6793d_smba) -+ -+/* Command register */ -+#define NCT6793D_READ_BYTE 0 -+#define NCT6793D_READ_WORD 1 -+#define NCT6793D_READ_BLOCK 2 -+#define NCT6793D_BLOCK_WRITE_READ_PROC_CALL 3 -+#define NCT6793D_PROC_CALL 4 -+#define NCT6793D_WRITE_BYTE 8 -+#define NCT6793D_WRITE_WORD 9 -+#define NCT6793D_WRITE_BLOCK 10 -+ -+/* Control register */ -+#define NCT6793D_MANUAL_START 128 -+#define NCT6793D_SOFT_RESET 64 -+ -+/* Error register */ -+#define NCT6793D_NO_ACK 32 -+ -+/* Status register */ -+#define NCT6793D_FIFO_EMPTY 1 -+#define NCT6793D_FIFO_FULL 2 -+#define NCT6793D_MANUAL_ACTIVE 4 -+ -+#define NCT6775_LD_SMBUS 0x0B -+ -+/* Other settings */ -+#define MAX_RETRIES 400 -+ -+enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793, -+ nct6795, nct6796, nct6798 }; -+ -+struct nct6775_sio_data { -+ int sioreg; -+ enum kinds kind; -+}; -+ -+/* used to set data->name = nct6775_device_names[data->sio_kind] */ -+static const char * const nct6775_device_names[] = { -+ "nct6106", -+ "nct6775", -+ "nct6776", -+ "nct6779", -+ "nct6791", -+ "nct6792", -+ "nct6793", -+ "nct6795", -+ "nct6796", -+ "nct6798", -+}; -+ -+static const char * const nct6775_sio_names[] __initconst = { -+ "NCT6106D", -+ "NCT6775F", -+ "NCT6776D/F", -+ "NCT6779D", -+ "NCT6791D", -+ "NCT6792D", -+ "NCT6793D", -+ "NCT6795D", -+ "NCT6796D", -+ "NCT6798D", -+}; -+ -+#define SIO_REG_LDSEL 0x07 /* Logical device select */ -+#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ -+#define SIO_REG_SMBA 0x62 /* SMBus base address register */ -+ -+#define SIO_NCT6106_ID 0xc450 -+#define SIO_NCT6775_ID 0xb470 -+#define SIO_NCT6776_ID 0xc330 -+#define SIO_NCT6779_ID 0xc560 -+#define SIO_NCT6791_ID 0xc800 -+#define SIO_NCT6792_ID 0xc910 -+#define SIO_NCT6793_ID 0xd120 -+#define SIO_NCT6795_ID 0xd350 -+#define SIO_NCT6796_ID 0xd420 -+#define SIO_NCT6798_ID 0xd428 -+#define SIO_ID_MASK 0xFFF0 -+ -+static inline void -+superio_outb(int ioreg, int reg, int val) -+{ -+ outb(reg, ioreg); -+ outb(val, ioreg + 1); -+} -+ -+static inline int -+superio_inb(int ioreg, int reg) -+{ -+ outb(reg, ioreg); -+ return inb(ioreg + 1); -+} -+ -+static inline void -+superio_select(int ioreg, int ld) -+{ -+ outb(SIO_REG_LDSEL, ioreg); -+ outb(ld, ioreg + 1); -+} -+ -+static inline int -+superio_enter(int ioreg) -+{ -+ /* -+ * Try to reserve and for exclusive access. -+ */ -+ if (!request_muxed_region(ioreg, 2, DRVNAME)) -+ return -EBUSY; -+ -+ outb(0x87, ioreg); -+ outb(0x87, ioreg); -+ -+ return 0; -+} -+ -+static inline void -+superio_exit(int ioreg) -+{ -+ outb(0xaa, ioreg); -+ outb(0x02, ioreg); -+ outb(0x02, ioreg + 1); -+ release_region(ioreg, 2); -+} -+ -+/* -+ * ISA constants -+ */ -+ -+#define IOREGION_ALIGNMENT (~7) -+#define IOREGION_LENGTH 2 -+#define ADDR_REG_OFFSET 0 -+#define DATA_REG_OFFSET 1 -+ -+#define NCT6775_REG_BANK 0x4E -+#define NCT6775_REG_CONFIG 0x40 -+ -+static struct i2c_adapter *nct6775_adapter; -+ -+struct i2c_nct6775_adapdata { -+ unsigned short smba; -+}; -+ -+/* Return negative errno on error. */ -+static s32 nct6775_access(struct i2c_adapter * adap, u16 addr, -+ unsigned short flags, char read_write, -+ u8 command, int size, union i2c_smbus_data * data) -+{ -+ struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap); -+ unsigned short nuvoton_nct6793d_smba = adapdata->smba; -+ int i, len, cnt; -+ union i2c_smbus_data tmp_data; -+ int timeout = 0; -+ -+ tmp_data.word = 0; -+ cnt = 0; -+ len = 0; -+ -+ outb_p(NCT6793D_SOFT_RESET, SMBHSTCTL); -+ -+ switch (size) { -+ case I2C_SMBUS_QUICK: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ break; -+ case I2C_SMBUS_BYTE_DATA: -+ tmp_data.byte = data->byte; -+ fallthrough; -+ case I2C_SMBUS_BYTE: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ outb_p(command, SMBHSTIDX); -+ if (read_write == I2C_SMBUS_WRITE) { -+ outb_p(tmp_data.byte, SMBHSTDAT); -+ outb_p(NCT6793D_WRITE_BYTE, SMBHSTCMD); -+ } -+ else { -+ outb_p(NCT6793D_READ_BYTE, SMBHSTCMD); -+ } -+ break; -+ case I2C_SMBUS_WORD_DATA: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ outb_p(command, SMBHSTIDX); -+ if (read_write == I2C_SMBUS_WRITE) { -+ outb_p(data->word & 0xff, SMBHSTDAT); -+ outb_p((data->word & 0xff00) >> 8, SMBHSTDAT); -+ outb_p(NCT6793D_WRITE_WORD, SMBHSTCMD); -+ } -+ else { -+ outb_p(NCT6793D_READ_WORD, SMBHSTCMD); -+ } -+ break; -+ case I2C_SMBUS_BLOCK_DATA: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ outb_p(command, SMBHSTIDX); -+ if (read_write == I2C_SMBUS_WRITE) { -+ len = data->block[0]; -+ if (len == 0 || len > I2C_SMBUS_BLOCK_MAX) -+ return -EINVAL; -+ outb_p(len, SMBBLKSZ); -+ -+ cnt = 1; -+ if (len >= 4) { -+ for (i = cnt; i <= 4; i++) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len -= 4; -+ cnt += 4; -+ } -+ else { -+ for (i = cnt; i <= len; i++ ) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len = 0; -+ } -+ -+ outb_p(NCT6793D_WRITE_BLOCK, SMBHSTCMD); -+ } -+ else { -+ return -ENOTSUPP; -+ } -+ break; -+ default: -+ dev_warn(&adap->dev, "Unsupported transaction %d\n", size); -+ return -EOPNOTSUPP; -+ } -+ -+ outb_p(NCT6793D_MANUAL_START, SMBHSTCTL); -+ -+ while ((size == I2C_SMBUS_BLOCK_DATA) && (len > 0)) { -+ if (read_write == I2C_SMBUS_WRITE) { -+ timeout = 0; -+ while ((inb_p(SMBHSTSTS) & NCT6793D_FIFO_EMPTY) == 0) -+ { -+ if(timeout > MAX_RETRIES) -+ { -+ return -ETIMEDOUT; -+ } -+ usleep_range(250, 500); -+ timeout++; -+ } -+ -+ //Load more bytes into FIFO -+ if (len >= 4) { -+ for (i = cnt; i <= (cnt + 4); i++) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len -= 4; -+ cnt += 4; -+ } -+ else { -+ for (i = cnt; i <= (cnt + len); i++) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len = 0; -+ } -+ } -+ else { -+ return -ENOTSUPP; -+ } -+ -+ } -+ -+ //wait for manual mode to complete -+ timeout = 0; -+ while ((inb_p(SMBHSTSTS) & NCT6793D_MANUAL_ACTIVE) != 0) -+ { -+ if(timeout > MAX_RETRIES) -+ { -+ return -ETIMEDOUT; -+ } -+ usleep_range(250, 500); -+ timeout++; -+ } -+ -+ if ((inb_p(SMBHSTERR) & NCT6793D_NO_ACK) != 0) { -+ return -ENXIO; -+ } -+ else if ((read_write == I2C_SMBUS_WRITE) || (size == I2C_SMBUS_QUICK)) { -+ return 0; -+ } -+ -+ switch (size) { -+ case I2C_SMBUS_QUICK: -+ case I2C_SMBUS_BYTE_DATA: -+ data->byte = inb_p(SMBHSTDAT); -+ break; -+ case I2C_SMBUS_WORD_DATA: -+ data->word = inb_p(SMBHSTDAT) + (inb_p(SMBHSTDAT) << 8); -+ break; -+ } -+ return 0; -+} -+ -+static u32 nct6775_func(struct i2c_adapter *adapter) -+{ -+ return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | -+ I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | -+ I2C_FUNC_SMBUS_BLOCK_DATA; -+} -+ -+static const struct i2c_algorithm smbus_algorithm = { -+ .smbus_xfer = nct6775_access, -+ .functionality = nct6775_func, -+}; -+ -+static int nct6775_add_adapter(unsigned short smba, const char *name, struct i2c_adapter **padap) -+{ -+ struct i2c_adapter *adap; -+ struct i2c_nct6775_adapdata *adapdata; -+ int retval; -+ -+ adap = kzalloc(sizeof(*adap), GFP_KERNEL); -+ if (adap == NULL) { -+ return -ENOMEM; -+ } -+ -+ adap->owner = THIS_MODULE; -+ adap->class = I2C_CLASS_HWMON; -+ adap->algo = &smbus_algorithm; -+ -+ adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL); -+ if (adapdata == NULL) { -+ kfree(adap); -+ return -ENOMEM; -+ } -+ -+ adapdata->smba = smba; -+ -+ snprintf(adap->name, sizeof(adap->name), -+ "SMBus NCT67xx adapter%s at %04x", name, smba); -+ -+ i2c_set_adapdata(adap, adapdata); -+ -+ retval = i2c_add_adapter(adap); -+ if (retval) { -+ kfree(adapdata); -+ kfree(adap); -+ return retval; -+ } -+ -+ *padap = adap; -+ return 0; -+} -+ -+static void nct6775_remove_adapter(struct i2c_adapter *adap) -+{ -+ struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap); -+ -+ if (adapdata->smba) { -+ i2c_del_adapter(adap); -+ kfree(adapdata); -+ kfree(adap); -+ } -+} -+ -+//static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume); -+ -+/* -+ * when Super-I/O functions move to a separate file, the Super-I/O -+ * bus will manage the lifetime of the device and this module will only keep -+ * track of the nct6775 driver. But since we use platform_device_alloc(), we -+ * must keep track of the device -+ */ -+static struct platform_device *pdev[2]; -+ -+static int nct6775_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct nct6775_sio_data *sio_data = dev_get_platdata(dev); -+ struct resource *res; -+ -+ res = platform_get_resource(pdev, IORESOURCE_IO, 0); -+ if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH, -+ DRVNAME)) -+ return -EBUSY; -+ -+ switch (sio_data->kind) { -+ case nct6791: -+ case nct6792: -+ case nct6793: -+ case nct6795: -+ case nct6796: -+ case nct6798: -+ nct6775_add_adapter(res->start, "", &nct6775_adapter); -+ break; -+ default: -+ return -ENODEV; -+ } -+ -+ return 0; -+} -+/* -+static void nct6791_enable_io_mapping(int sioaddr) -+{ -+ int val; -+ -+ val = superio_inb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE); -+ if (val & 0x10) { -+ pr_info("Enabling hardware monitor logical device mappings.\n"); -+ superio_outb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE, -+ val & ~0x10); -+ } -+}*/ -+ -+static struct platform_driver i2c_nct6775_driver = { -+ .driver = { -+ .name = DRVNAME, -+// .pm = &nct6775_dev_pm_ops, -+ }, -+ .probe = nct6775_probe, -+}; -+ -+static void __exit i2c_nct6775_exit(void) -+{ -+ int i; -+ -+ if(nct6775_adapter) -+ nct6775_remove_adapter(nct6775_adapter); -+ -+ for (i = 0; i < ARRAY_SIZE(pdev); i++) { -+ if (pdev[i]) -+ platform_device_unregister(pdev[i]); -+ } -+ platform_driver_unregister(&i2c_nct6775_driver); -+} -+ -+/* nct6775_find() looks for a '627 in the Super-I/O config space */ -+static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) -+{ -+ u16 val; -+ int err; -+ int addr; -+ -+ err = superio_enter(sioaddr); -+ if (err) -+ return err; -+ -+ val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) | -+ superio_inb(sioaddr, SIO_REG_DEVID + 1); -+ -+ switch (val & SIO_ID_MASK) { -+ case SIO_NCT6106_ID: -+ sio_data->kind = nct6106; -+ break; -+ case SIO_NCT6775_ID: -+ sio_data->kind = nct6775; -+ break; -+ case SIO_NCT6776_ID: -+ sio_data->kind = nct6776; -+ break; -+ case SIO_NCT6779_ID: -+ sio_data->kind = nct6779; -+ break; -+ case SIO_NCT6791_ID: -+ sio_data->kind = nct6791; -+ break; -+ case SIO_NCT6792_ID: -+ sio_data->kind = nct6792; -+ break; -+ case SIO_NCT6793_ID: -+ sio_data->kind = nct6793; -+ break; -+ case SIO_NCT6795_ID: -+ sio_data->kind = nct6795; -+ break; -+ case SIO_NCT6796_ID: -+ sio_data->kind = nct6796; -+ break; -+ case SIO_NCT6798_ID: -+ sio_data->kind = nct6798; -+ break; -+ default: -+ if (val != 0xffff) -+ pr_debug("unsupported chip ID: 0x%04x\n", val); -+ superio_exit(sioaddr); -+ return -ENODEV; -+ } -+ -+ /* We have a known chip, find the SMBus I/O address */ -+ superio_select(sioaddr, NCT6775_LD_SMBUS); -+ val = (superio_inb(sioaddr, SIO_REG_SMBA) << 8) -+ | superio_inb(sioaddr, SIO_REG_SMBA + 1); -+ addr = val & IOREGION_ALIGNMENT; -+ if (addr == 0) { -+ pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n"); -+ superio_exit(sioaddr); -+ return -ENODEV; -+ } -+ -+ //if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || -+ // sio_data->kind == nct6793 || sio_data->kind == nct6795 || -+ // sio_data->kind == nct6796) -+ // nct6791_enable_io_mapping(sioaddr); -+ -+ superio_exit(sioaddr); -+ pr_info("Found %s or compatible chip at %#x:%#x\n", -+ nct6775_sio_names[sio_data->kind], sioaddr, addr); -+ sio_data->sioreg = sioaddr; -+ -+ return addr; -+} -+ -+static int __init i2c_nct6775_init(void) -+{ -+ int i, err; -+ bool found = false; -+ int address; -+ struct resource res; -+ struct nct6775_sio_data sio_data; -+ int sioaddr[2] = { 0x2e, 0x4e }; -+ -+ err = platform_driver_register(&i2c_nct6775_driver); -+ if (err) -+ return err; -+ -+ /* -+ * initialize sio_data->kind and sio_data->sioreg. -+ * -+ * when Super-I/O functions move to a separate file, the Super-I/O -+ * driver will probe 0x2e and 0x4e and auto-detect the presence of a -+ * nct6775 hardware monitor, and call probe() -+ */ -+ for (i = 0; i < ARRAY_SIZE(pdev); i++) { -+ address = nct6775_find(sioaddr[i], &sio_data); -+ if (address <= 0) -+ continue; -+ -+ found = true; -+ -+ pdev[i] = platform_device_alloc(DRVNAME, address); -+ if (!pdev[i]) { -+ err = -ENOMEM; -+ goto exit_device_unregister; -+ } -+ -+ err = platform_device_add_data(pdev[i], &sio_data, -+ sizeof(struct nct6775_sio_data)); -+ if (err) -+ goto exit_device_put; -+ -+ memset(&res, 0, sizeof(res)); -+ res.name = DRVNAME; -+ res.start = address; -+ res.end = address + IOREGION_LENGTH - 1; -+ res.flags = IORESOURCE_IO; -+ -+ err = acpi_check_resource_conflict(&res); -+ if (err) { -+ platform_device_put(pdev[i]); -+ pdev[i] = NULL; -+ continue; -+ } -+ -+ err = platform_device_add_resources(pdev[i], &res, 1); -+ if (err) -+ goto exit_device_put; -+ -+ /* platform_device_add calls probe() */ -+ err = platform_device_add(pdev[i]); -+ if (err) -+ goto exit_device_put; -+ } -+ if (!found) { -+ err = -ENODEV; -+ goto exit_unregister; -+ } -+ -+ return 0; -+ -+exit_device_put: -+ platform_device_put(pdev[i]); -+exit_device_unregister: -+ while (--i >= 0) { -+ if (pdev[i]) -+ platform_device_unregister(pdev[i]); -+ } -+exit_unregister: -+ platform_driver_unregister(&i2c_nct6775_driver); -+ return err; -+} -+ -+MODULE_AUTHOR("Adam Honse "); -+MODULE_DESCRIPTION("SMBus driver for NCT6775F and compatible chips"); -+MODULE_LICENSE("GPL"); -+ -+module_init(i2c_nct6775_init); -+module_exit(i2c_nct6775_exit); -diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c -index 6a0392172b2f..e7dd007bf6b1 100644 ---- a/drivers/i2c/busses/i2c-piix4.c -+++ b/drivers/i2c/busses/i2c-piix4.c -@@ -568,11 +568,11 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter) - if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */ - usleep_range(2000, 2100); - else -- usleep_range(250, 500); -+ usleep_range(25, 50); - - while ((++timeout < MAX_TIMEOUT) && - ((temp = inb_p(SMBHSTSTS)) & 0x01)) -- usleep_range(250, 500); -+ usleep_range(25, 50); - - /* If the SMBus is still busy, we give up */ - if (timeout == MAX_TIMEOUT) { -diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c -index 51e0c4954600..35c3ad741870 100644 ---- a/drivers/input/evdev.c -+++ b/drivers/input/evdev.c -@@ -46,6 +46,7 @@ struct evdev_client { - struct fasync_struct *fasync; - struct evdev *evdev; - struct list_head node; -+ struct rcu_head rcu; - enum input_clock_type clk_type; - bool revoked; - unsigned long *evmasks[EV_CNT]; -@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev, - spin_unlock(&evdev->client_lock); - } - -+static void evdev_reclaim_client(struct rcu_head *rp) -+{ -+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu); -+ unsigned int i; -+ for (i = 0; i < EV_CNT; ++i) -+ bitmap_free(client->evmasks[i]); -+ kvfree(client); -+} -+ - static void evdev_detach_client(struct evdev *evdev, - struct evdev_client *client) - { - spin_lock(&evdev->client_lock); - list_del_rcu(&client->node); - spin_unlock(&evdev->client_lock); -- synchronize_rcu(); -+ call_rcu(&client->rcu, evdev_reclaim_client); - } - - static int evdev_open_device(struct evdev *evdev) -@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file) - { - struct evdev_client *client = file->private_data; - struct evdev *evdev = client->evdev; -- unsigned int i; - - mutex_lock(&evdev->mutex); - -@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file) - - evdev_detach_client(evdev, client); - -- for (i = 0; i < EV_CNT; ++i) -- bitmap_free(client->evmasks[i]); -- -- kvfree(client); -- - evdev_close_device(evdev); - - return 0; -@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file) - - err_free_client: - evdev_detach_client(evdev, client); -- kvfree(client); - return error; - } - -diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c -index 1b7a97cc3779..37e9e43908ab 100644 ---- a/drivers/md/dm-crypt.c -+++ b/drivers/md/dm-crypt.c -@@ -3284,6 +3284,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) - goto bad; - } - -+#ifdef CONFIG_CACHY -+ set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); -+ set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); -+#endif -+ - ret = crypt_ctr_cipher(ti, argv[0], argv[1]); - if (ret < 0) - goto bad; -diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig -index 331b8e535e5b..80dabeebf580 100644 ---- a/drivers/media/v4l2-core/Kconfig -+++ b/drivers/media/v4l2-core/Kconfig -@@ -40,6 +40,11 @@ config VIDEO_TUNER - config V4L2_JPEG_HELPER - tristate - -+config V4L2_LOOPBACK -+ tristate "V4L2 loopback device" -+ help -+ V4L2 loopback device -+ - # Used by drivers that need v4l2-h264.ko - config V4L2_H264 - tristate -diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile -index 2177b9d63a8f..c179507cedc4 100644 ---- a/drivers/media/v4l2-core/Makefile -+++ b/drivers/media/v4l2-core/Makefile -@@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o - obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o - obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o - -+obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o -+ - obj-$(CONFIG_VIDEO_TUNER) += tuner.o - obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o -diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c -new file mode 100644 -index 000000000000..25cb1beb26e5 ---- /dev/null -+++ b/drivers/media/v4l2-core/v4l2loopback.c -@@ -0,0 +1,3184 @@ -+/* -*- c-file-style: "linux" -*- */ -+/* -+ * v4l2loopback.c -- video4linux2 loopback driver -+ * -+ * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) -+ * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at) -+ * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) -+ * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "v4l2loopback.h" -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) -+#error This module is not supported on kernels before 4.0.0. -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) -+#define strscpy strlcpy -+#endif -+ -+#if defined(timer_setup) && defined(from_timer) -+#define HAVE_TIMER_SETUP -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) -+#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER -+#endif -+ -+#define V4L2LOOPBACK_VERSION_CODE \ -+ KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ -+ V4L2LOOPBACK_VERSION_BUGFIX) -+ -+MODULE_DESCRIPTION("V4L2 loopback video device"); -+MODULE_AUTHOR("Vasily Levin, " -+ "IOhannes m zmoelnig ," -+ "Stefan Diewald," -+ "Anton Novikov" -+ "et al."); -+#ifdef SNAPSHOT_VERSION -+MODULE_VERSION(__stringify(SNAPSHOT_VERSION)); -+#else -+MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify( -+ V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX)); -+#endif -+MODULE_LICENSE("GPL"); -+ -+/* -+ * helpers -+ */ -+#define dprintk(fmt, args...) \ -+ do { \ -+ if (debug > 0) { \ -+ printk(KERN_INFO "v4l2-loopback[" __stringify( \ -+ __LINE__) "], pid(%d): " fmt, \ -+ task_pid_nr(current), ##args); \ -+ } \ -+ } while (0) -+ -+#define MARK() \ -+ do { \ -+ if (debug > 1) { \ -+ printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \ -+ __LINE__, __func__, task_pid_nr(current)); \ -+ } \ -+ } while (0) -+ -+#define dprintkrw(fmt, args...) \ -+ do { \ -+ if (debug > 2) { \ -+ printk(KERN_INFO "v4l2-loopback[" __stringify( \ -+ __LINE__) "], pid(%d): " fmt, \ -+ task_pid_nr(current), ##args); \ -+ } \ -+ } while (0) -+ -+static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) -+{ -+ struct timespec64 ts; -+ ktime_get_ts64(&ts); -+ -+ b->timestamp.tv_sec = ts.tv_sec; -+ b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); -+ b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; -+} -+ -+#if BITS_PER_LONG == 32 -+#include /* do_div() for 64bit division */ -+static inline int v4l2l_mod64(const s64 A, const u32 B) -+{ -+ u64 a = (u64)A; -+ u32 b = B; -+ -+ if (A > 0) -+ return do_div(a, b); -+ a = -A; -+ return -do_div(a, b); -+} -+#else -+static inline int v4l2l_mod64(const s64 A, const u32 B) -+{ -+ return A % B; -+} -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) -+typedef unsigned __poll_t; -+#endif -+ -+/* module constants -+ * can be overridden during he build process using something like -+ * make KCPPFLAGS="-DMAX_DEVICES=100" -+ */ -+ -+/* maximum number of v4l2loopback devices that can be created */ -+#ifndef MAX_DEVICES -+#define MAX_DEVICES 8 -+#endif -+ -+/* whether the default is to announce capabilities exclusively or not */ -+#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS -+#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 -+#endif -+ -+/* when a producer is considered to have gone stale */ -+#ifndef MAX_TIMEOUT -+#define MAX_TIMEOUT (100 * 1000) /* in msecs */ -+#endif -+ -+/* max buffers that can be mapped, actually they -+ * are all mapped to max_buffers buffers */ -+#ifndef MAX_BUFFERS -+#define MAX_BUFFERS 32 -+#endif -+ -+/* module parameters */ -+static int debug = 0; -+module_param(debug, int, S_IRUGO | S_IWUSR); -+MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); -+ -+#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 -+static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; -+module_param(max_buffers, int, S_IRUGO); -+MODULE_PARM_DESC(max_buffers, -+ "how many buffers should be allocated [DEFAULT: " __stringify( -+ V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); -+ -+/* how many times a device can be opened -+ * the per-module default value can be overridden on a per-device basis using -+ * the /sys/devices interface -+ * -+ * note that max_openers should be at least 2 in order to get a working system: -+ * one opener for the producer and one opener for the consumer -+ * however, we leave that to the user -+ */ -+#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 -+static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; -+module_param(max_openers, int, S_IRUGO | S_IWUSR); -+MODULE_PARM_DESC( -+ max_openers, -+ "how many users can open the loopback device [DEFAULT: " __stringify( -+ V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); -+ -+static int devices = -1; -+module_param(devices, int, 0); -+MODULE_PARM_DESC(devices, "how many devices should be created"); -+ -+static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; -+module_param_array(video_nr, int, NULL, 0444); -+MODULE_PARM_DESC(video_nr, -+ "video device numbers (-1=auto, 0=/dev/video0, etc.)"); -+ -+static char *card_label[MAX_DEVICES]; -+module_param_array(card_label, charp, NULL, 0000); -+MODULE_PARM_DESC(card_label, "card labels for each device"); -+ -+static bool exclusive_caps[MAX_DEVICES] = { -+ [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS -+}; -+module_param_array(exclusive_caps, bool, NULL, 0444); -+/* FIXXME: wording */ -+MODULE_PARM_DESC( -+ exclusive_caps, -+ "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " __stringify( -+ V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); -+ -+/* format specifications */ -+#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2 -+#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1 -+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 -+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 -+ -+#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 -+#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 -+ -+static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; -+module_param(max_width, int, S_IRUGO); -+MODULE_PARM_DESC(max_width, -+ "maximum allowed frame width [DEFAULT: " __stringify( -+ V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); -+static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; -+module_param(max_height, int, S_IRUGO); -+MODULE_PARM_DESC(max_height, -+ "maximum allowed frame height [DEFAULT: " __stringify( -+ V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); -+ -+static DEFINE_IDR(v4l2loopback_index_idr); -+static DEFINE_MUTEX(v4l2loopback_ctl_mutex); -+ -+/* frame intervals */ -+#define V4L2LOOPBACK_FPS_MIN 0 -+#define V4L2LOOPBACK_FPS_MAX 1000 -+ -+/* control IDs */ -+#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) -+#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) -+#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) -+#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) -+#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) -+ -+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); -+static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { -+ .s_ctrl = v4l2loopback_s_ctrl, -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_KEEP_FORMAT, -+ .name = "keep_format", -+ .type = V4L2_CTRL_TYPE_BOOLEAN, -+ .min = 0, -+ .max = 1, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_SUSTAIN_FRAMERATE, -+ .name = "sustain_framerate", -+ .type = V4L2_CTRL_TYPE_BOOLEAN, -+ .min = 0, -+ .max = 1, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_TIMEOUT, -+ .name = "timeout", -+ .type = V4L2_CTRL_TYPE_INTEGER, -+ .min = 0, -+ .max = MAX_TIMEOUT, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { -+ // clang-format off -+ .ops = &v4l2loopback_ctrl_ops, -+ .id = CID_TIMEOUT_IMAGE_IO, -+ .name = "timeout_image_io", -+ .type = V4L2_CTRL_TYPE_BUTTON, -+ .min = 0, -+ .max = 1, -+ .step = 1, -+ .def = 0, -+ // clang-format on -+}; -+ -+/* module structures */ -+struct v4l2loopback_private { -+ int device_nr; -+}; -+ -+/* TODO(vasaka) use typenames which are common to kernel, but first find out if -+ * it is needed */ -+/* struct keeping state and settings of loopback device */ -+ -+struct v4l2l_buffer { -+ struct v4l2_buffer buffer; -+ struct list_head list_head; -+ int use_count; -+}; -+ -+struct v4l2_loopback_device { -+ struct v4l2_device v4l2_dev; -+ struct v4l2_ctrl_handler ctrl_handler; -+ struct video_device *vdev; -+ /* pixel and stream format */ -+ struct v4l2_pix_format pix_format; -+ bool pix_format_has_valid_sizeimage; -+ struct v4l2_captureparm capture_param; -+ unsigned long frame_jiffies; -+ -+ /* ctrls */ -+ int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all -+ openers close() the device */ -+ int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain -+ (close to) nominal framerate */ -+ -+ /* buffers stuff */ -+ u8 *image; /* pointer to actual buffers data */ -+ unsigned long int imagesize; /* size of buffers data */ -+ int buffers_number; /* should not be big, 4 is a good choice */ -+ struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ -+ int used_buffers; /* number of the actually used buffers */ -+ int max_openers; /* how many times can this device be opened */ -+ -+ s64 write_position; /* number of last written frame + 1 */ -+ struct list_head outbufs_list; /* buffers in output DQBUF order */ -+ int bufpos2index -+ [MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers) -+ * to inner buffer index */ -+ long buffer_size; -+ -+ /* sustain_framerate stuff */ -+ struct timer_list sustain_timer; -+ unsigned int reread_count; -+ -+ /* timeout stuff */ -+ unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ -+ int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will -+ * read/write to timeout_image */ -+ u8 *timeout_image; /* copy of it will be captured when timeout passes */ -+ struct v4l2l_buffer timeout_image_buffer; -+ struct timer_list timeout_timer; -+ int timeout_happened; -+ -+ /* sync stuff */ -+ atomic_t open_count; -+ -+ int ready_for_capture; /* set to the number of writers that opened the -+ * device and negotiated format. */ -+ int ready_for_output; /* set to true when no writer is currently attached -+ * this differs slightly from !ready_for_capture, -+ * e.g. when using fallback images */ -+ int active_readers; /* increase if any reader starts streaming */ -+ int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE) -+ * should only be announced if the resp. "ready" -+ * flag is set; default=TRUE */ -+ -+ int min_width, max_width; -+ int min_height, max_height; -+ -+ char card_label[32]; -+ -+ wait_queue_head_t read_event; -+ spinlock_t lock, list_lock; -+}; -+ -+/* types of opener shows what opener wants to do with loopback */ -+enum opener_type { -+ // clang-format off -+ UNNEGOTIATED = 0, -+ READER = 1, -+ WRITER = 2, -+ // clang-format on -+}; -+ -+/* struct keeping state and type of opener */ -+struct v4l2_loopback_opener { -+ enum opener_type type; -+ s64 read_position; /* number of last processed frame + 1 or -+ * write_position - 1 if reader went out of sync */ -+ unsigned int reread_count; -+ struct v4l2_buffer *buffers; -+ int buffers_number; /* should not be big, 4 is a good choice */ -+ int timeout_image_io; -+ -+ struct v4l2_fh fh; -+}; -+ -+#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) -+ -+/* this is heavily inspired by the bttv driver found in the linux kernel */ -+struct v4l2l_format { -+ char *name; -+ int fourcc; /* video4linux 2 */ -+ int depth; /* bit/pixel */ -+ int flags; -+}; -+/* set the v4l2l_format.flags to PLANAR for non-packed formats */ -+#define FORMAT_FLAGS_PLANAR 0x01 -+#define FORMAT_FLAGS_COMPRESSED 0x02 -+ -+#include "v4l2loopback_formats.h" -+ -+#ifndef V4L2_TYPE_IS_CAPTURE -+#define V4L2_TYPE_IS_CAPTURE(type) \ -+ ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \ -+ (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) -+#endif /* V4L2_TYPE_IS_CAPTURE */ -+#ifndef V4L2_TYPE_IS_OUTPUT -+#define V4L2_TYPE_IS_OUTPUT(type) \ -+ ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \ -+ (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) -+#endif /* V4L2_TYPE_IS_OUTPUT */ -+ -+/* whether the format can be changed */ -+/* the format is fixated if we -+ - have writers (ready_for_capture>0) -+ - and/or have readers (active_readers>0) -+*/ -+#define V4L2LOOPBACK_IS_FIXED_FMT(device) \ -+ (device->ready_for_capture > 0 || device->active_readers > 0 || \ -+ device->keep_format) -+ -+static const unsigned int FORMATS = ARRAY_SIZE(formats); -+ -+static char *fourcc2str(unsigned int fourcc, char buf[4]) -+{ -+ buf[0] = (fourcc >> 0) & 0xFF; -+ buf[1] = (fourcc >> 8) & 0xFF; -+ buf[2] = (fourcc >> 16) & 0xFF; -+ buf[3] = (fourcc >> 24) & 0xFF; -+ -+ return buf; -+} -+ -+static const struct v4l2l_format *format_by_fourcc(int fourcc) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < FORMATS; i++) { -+ if (formats[i].fourcc == fourcc) -+ return formats + i; -+ } -+ -+ dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF, -+ (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, -+ (fourcc >> 24) & 0xFF); -+ return NULL; -+} -+ -+static void pix_format_set_size(struct v4l2_pix_format *f, -+ const struct v4l2l_format *fmt, -+ unsigned int width, unsigned int height) -+{ -+ f->width = width; -+ f->height = height; -+ -+ if (fmt->flags & FORMAT_FLAGS_PLANAR) { -+ f->bytesperline = width; /* Y plane */ -+ f->sizeimage = (width * height * fmt->depth) >> 3; -+ } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { -+ /* doesn't make sense for compressed formats */ -+ f->bytesperline = 0; -+ f->sizeimage = (width * height * fmt->depth) >> 3; -+ } else { -+ f->bytesperline = (width * fmt->depth) >> 3; -+ f->sizeimage = height * f->bytesperline; -+ } -+} -+ -+static int v4l2l_fill_format(struct v4l2_format *fmt, int capture, -+ const u32 minwidth, const u32 maxwidth, -+ const u32 minheight, const u32 maxheight) -+{ -+ u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height; -+ u32 pixelformat = fmt->fmt.pix.pixelformat; -+ struct v4l2_format fmt0 = *fmt; -+ u32 bytesperline = 0, sizeimage = 0; -+ if (!width) -+ width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; -+ if (!height) -+ height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; -+ if (width < minwidth) -+ width = minwidth; -+ if (width > maxwidth) -+ width = maxwidth; -+ if (height < minheight) -+ height = minheight; -+ if (height > maxheight) -+ height = maxheight; -+ -+ /* sets: width,height,pixelformat,bytesperline,sizeimage */ -+ if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) { -+ fmt0.fmt.pix.bytesperline = 0; -+ fmt0.fmt.pix.sizeimage = 0; -+ } -+ -+ if (0) { -+ ; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) -+ } else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width, -+ height)) { -+ ; -+ } else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width, -+ height)) { -+ ; -+#endif -+ } else { -+ const struct v4l2l_format *format = -+ format_by_fourcc(pixelformat); -+ if (!format) -+ return -EINVAL; -+ pix_format_set_size(&fmt0.fmt.pix, format, width, height); -+ fmt0.fmt.pix.pixelformat = format->fourcc; -+ } -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) { -+ *fmt = fmt0; -+ -+ if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) || -+ (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3)) -+ fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB; -+ if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field) -+ fmt->fmt.pix_mp.field = V4L2_FIELD_NONE; -+ if (capture) -+ fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; -+ else -+ fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -+ } else { -+ bytesperline = fmt->fmt.pix.bytesperline; -+ sizeimage = fmt->fmt.pix.sizeimage; -+ -+ *fmt = fmt0; -+ -+ if (!fmt->fmt.pix.bytesperline) -+ fmt->fmt.pix.bytesperline = bytesperline; -+ if (!fmt->fmt.pix.sizeimage) -+ fmt->fmt.pix.sizeimage = sizeimage; -+ -+ if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || -+ (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) -+ fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; -+ if (V4L2_FIELD_ANY == fmt->fmt.pix.field) -+ fmt->fmt.pix.field = V4L2_FIELD_NONE; -+ if (capture) -+ fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ else -+ fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ } -+ -+ return 0; -+} -+ -+/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */ -+static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt) -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) -+ const struct v4l2_format_info *info; -+ -+ info = v4l2_format_info(fmt->fmt.pix.pixelformat); -+ if (info && info->mem_planes == 1) -+ return true; -+#endif -+ -+ return false; -+} -+ -+static int pix_format_eq(const struct v4l2_pix_format *ref, -+ const struct v4l2_pix_format *tgt, int strict) -+{ -+ /* check if the two formats are equivalent. -+ * ANY fields are handled gracefully -+ */ -+#define _pix_format_eq0(x) \ -+ if (ref->x != tgt->x) \ -+ result = 0 -+#define _pix_format_eq1(x, def) \ -+ do { \ -+ if ((def != tgt->x) && (ref->x != tgt->x)) { \ -+ printk(KERN_INFO #x " failed"); \ -+ result = 0; \ -+ } \ -+ } while (0) -+ int result = 1; -+ _pix_format_eq0(width); -+ _pix_format_eq0(height); -+ _pix_format_eq0(pixelformat); -+ if (!strict) -+ return result; -+ _pix_format_eq1(field, V4L2_FIELD_ANY); -+ _pix_format_eq0(bytesperline); -+ _pix_format_eq0(sizeimage); -+ _pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT); -+ return result; -+} -+ -+static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f); -+static int inner_try_setfmt(struct file *file, struct v4l2_format *fmt) -+{ -+ int capture = V4L2_TYPE_IS_CAPTURE(fmt->type); -+ struct v4l2_loopback_device *dev; -+ int needschange = 0; -+ char buf[5]; -+ buf[4] = 0; -+ -+ dev = v4l2loopback_getdevice(file); -+ -+ needschange = !(pix_format_eq(&dev->pix_format, &fmt->fmt.pix, 0)); -+ if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { -+ fmt->fmt.pix = dev->pix_format; -+ if (needschange) { -+ if (dev->active_readers > 0 && capture) { -+ /* cannot call fmt_cap while there are readers */ -+ return -EBUSY; -+ } -+ if (dev->ready_for_capture > 0 && !capture) { -+ /* cannot call fmt_out while there are writers */ -+ return -EBUSY; -+ } -+ } -+ } -+ if (v4l2l_fill_format(fmt, capture, dev->min_width, dev->max_width, -+ dev->min_height, dev->max_height) != 0) { -+ return -EINVAL; -+ } -+ -+ if (1) { -+ char buf[5]; -+ buf[4] = 0; -+ dprintk("capFOURCC=%s\n", -+ fourcc2str(dev->pix_format.pixelformat, buf)); -+ } -+ return 0; -+} -+ -+static int set_timeperframe(struct v4l2_loopback_device *dev, -+ struct v4l2_fract *tpf) -+{ -+ if ((tpf->denominator < 1) || (tpf->numerator < 1)) { -+ return -EINVAL; -+ } -+ dev->capture_param.timeperframe = *tpf; -+ dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator / -+ tpf->denominator); -+ return 0; -+} -+ -+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); -+ -+/* device attributes */ -+/* available via sysfs: /sys/devices/virtual/video4linux/video* */ -+ -+static ssize_t attr_show_format(struct device *cd, -+ struct device_attribute *attr, char *buf) -+{ -+ /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ const struct v4l2_fract *tpf; -+ char buf4cc[5], buf_fps[32]; -+ -+ if (!dev || !V4L2LOOPBACK_IS_FIXED_FMT(dev)) -+ return 0; -+ tpf = &dev->capture_param.timeperframe; -+ -+ fourcc2str(dev->pix_format.pixelformat, buf4cc); -+ buf4cc[4] = 0; -+ if (tpf->numerator == 1) -+ snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator); -+ else -+ snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator, -+ tpf->numerator); -+ return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width, -+ dev->pix_format.height, buf_fps); -+} -+ -+static ssize_t attr_store_format(struct device *cd, -+ struct device_attribute *attr, const char *buf, -+ size_t len) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ int fps_num = 0, fps_den = 1; -+ -+ if (!dev) -+ return -ENODEV; -+ -+ /* only fps changing is supported */ -+ if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) { -+ struct v4l2_fract f = { .numerator = fps_den, -+ .denominator = fps_num }; -+ int err = 0; -+ if ((err = set_timeperframe(dev, &f)) < 0) -+ return err; -+ return len; -+ } -+ return -EINVAL; -+} -+ -+static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, -+ attr_store_format); -+ -+static ssize_t attr_show_buffers(struct device *cd, -+ struct device_attribute *attr, char *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ return sprintf(buf, "%d\n", dev->used_buffers); -+} -+ -+static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); -+ -+static ssize_t attr_show_maxopeners(struct device *cd, -+ struct device_attribute *attr, char *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ return sprintf(buf, "%d\n", dev->max_openers); -+} -+ -+static ssize_t attr_store_maxopeners(struct device *cd, -+ struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ struct v4l2_loopback_device *dev = NULL; -+ unsigned long curr = 0; -+ -+ if (kstrtoul(buf, 0, &curr)) -+ return -EINVAL; -+ -+ dev = v4l2loopback_cd2dev(cd); -+ if (!dev) -+ return -ENODEV; -+ -+ if (dev->max_openers == curr) -+ return len; -+ -+ if (curr > __INT_MAX__ || dev->open_count.counter > curr) { -+ /* request to limit to less openers as are currently attached to us */ -+ return -EINVAL; -+ } -+ -+ dev->max_openers = (int)curr; -+ -+ return len; -+} -+ -+static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, -+ attr_store_maxopeners); -+ -+static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr, -+ char *buf) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ if (dev->ready_for_capture) -+ return sprintf(buf, "capture\n"); -+ if (dev->ready_for_output) -+ return sprintf(buf, "output\n"); -+ -+ return -EAGAIN; -+} -+ -+static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL); -+ -+static void v4l2loopback_remove_sysfs(struct video_device *vdev) -+{ -+#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) -+ -+ if (vdev) { -+ V4L2_SYSFS_DESTROY(format); -+ V4L2_SYSFS_DESTROY(buffers); -+ V4L2_SYSFS_DESTROY(max_openers); -+ V4L2_SYSFS_DESTROY(state); -+ /* ... */ -+ } -+} -+ -+static void v4l2loopback_create_sysfs(struct video_device *vdev) -+{ -+ int res = 0; -+ -+#define V4L2_SYSFS_CREATE(x) \ -+ res = device_create_file(&vdev->dev, &dev_attr_##x); \ -+ if (res < 0) \ -+ break -+ if (!vdev) -+ return; -+ do { -+ V4L2_SYSFS_CREATE(format); -+ V4L2_SYSFS_CREATE(buffers); -+ V4L2_SYSFS_CREATE(max_openers); -+ V4L2_SYSFS_CREATE(state); -+ /* ... */ -+ } while (0); -+ -+ if (res >= 0) -+ return; -+ dev_err(&vdev->dev, "%s error: %d\n", __func__, res); -+} -+ -+/* Event APIs */ -+ -+#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START) -+#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000 -+#define V4L2_EVENT_PRI_CLIENT_USAGE \ -+ (V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1) -+ -+struct v4l2_event_client_usage { -+ __u32 count; -+}; -+ -+/* global module data */ -+/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ -+struct v4l2loopback_lookup_cb_data { -+ int device_nr; -+ struct v4l2_loopback_device *device; -+}; -+static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) -+{ -+ struct v4l2_loopback_device *device = ptr; -+ struct v4l2loopback_lookup_cb_data *cbdata = data; -+ if (cbdata && device && device->vdev) { -+ if (device->vdev->num == cbdata->device_nr) { -+ cbdata->device = device; -+ cbdata->device_nr = id; -+ return 1; -+ } -+ } -+ return 0; -+} -+static int v4l2loopback_lookup(int device_nr, -+ struct v4l2_loopback_device **device) -+{ -+ struct v4l2loopback_lookup_cb_data data = { -+ .device_nr = device_nr, -+ .device = NULL, -+ }; -+ int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, -+ &data); -+ if (1 == err) { -+ if (device) -+ *device = data.device; -+ return data.device_nr; -+ } -+ return -ENODEV; -+} -+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) -+{ -+ struct video_device *loopdev = to_video_device(cd); -+ struct v4l2loopback_private *ptr = -+ (struct v4l2loopback_private *)video_get_drvdata(loopdev); -+ int nr = ptr->device_nr; -+ -+ return idr_find(&v4l2loopback_index_idr, nr); -+} -+ -+static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) -+{ -+ struct v4l2loopback_private *ptr = video_drvdata(f); -+ int nr = ptr->device_nr; -+ -+ return idr_find(&v4l2loopback_index_idr, nr); -+} -+ -+/* forward declarations */ -+static void client_usage_queue_event(struct video_device *vdev); -+static void init_buffers(struct v4l2_loopback_device *dev); -+static int allocate_buffers(struct v4l2_loopback_device *dev); -+static void free_buffers(struct v4l2_loopback_device *dev); -+static void try_free_buffers(struct v4l2_loopback_device *dev); -+static int allocate_timeout_image(struct v4l2_loopback_device *dev); -+static void check_timers(struct v4l2_loopback_device *dev); -+static const struct v4l2_file_operations v4l2_loopback_fops; -+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; -+ -+/* Queue helpers */ -+/* next functions sets buffer flags and adjusts counters accordingly */ -+static inline void set_done(struct v4l2l_buffer *buffer) -+{ -+ buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; -+ buffer->buffer.flags |= V4L2_BUF_FLAG_DONE; -+} -+ -+static inline void set_queued(struct v4l2l_buffer *buffer) -+{ -+ buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; -+ buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED; -+} -+ -+static inline void unset_flags(struct v4l2l_buffer *buffer) -+{ -+ buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; -+ buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; -+} -+ -+/* V4L2 ioctl caps and params calls */ -+/* returns device capabilities -+ * called on VIDIOC_QUERYCAP -+ */ -+static int vidioc_querycap(struct file *file, void *priv, -+ struct v4l2_capability *cap) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ int device_nr = -+ ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) -+ ->device_nr; -+ __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; -+ -+ strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); -+ snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label); -+ snprintf(cap->bus_info, sizeof(cap->bus_info), -+ "platform:v4l2loopback-%03d", device_nr); -+ -+ if (dev->announce_all_caps) { -+ capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; -+ } else { -+ if (dev->ready_for_capture) { -+ capabilities |= V4L2_CAP_VIDEO_CAPTURE; -+ } -+ if (dev->ready_for_output) { -+ capabilities |= V4L2_CAP_VIDEO_OUTPUT; -+ } -+ } -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) -+ dev->vdev->device_caps = -+#endif /* >=linux-4.7.0 */ -+ cap->device_caps = cap->capabilities = capabilities; -+ -+ cap->capabilities |= V4L2_CAP_DEVICE_CAPS; -+ -+ memset(cap->reserved, 0, sizeof(cap->reserved)); -+ return 0; -+} -+ -+static int vidioc_enum_framesizes(struct file *file, void *fh, -+ struct v4l2_frmsizeenum *argp) -+{ -+ struct v4l2_loopback_device *dev; -+ -+ /* there can be only one... */ -+ if (argp->index) -+ return -EINVAL; -+ -+ dev = v4l2loopback_getdevice(file); -+ if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { -+ /* format has already been negotiated -+ * cannot change during runtime -+ */ -+ if (argp->pixel_format != dev->pix_format.pixelformat) -+ return -EINVAL; -+ -+ argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; -+ -+ argp->discrete.width = dev->pix_format.width; -+ argp->discrete.height = dev->pix_format.height; -+ } else { -+ /* if the format has not been negotiated yet, we accept anything -+ */ -+ if (NULL == format_by_fourcc(argp->pixel_format)) -+ return -EINVAL; -+ -+ if (dev->min_width == dev->max_width && -+ dev->min_height == dev->max_height) { -+ argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; -+ -+ argp->discrete.width = dev->min_width; -+ argp->discrete.height = dev->min_height; -+ } else { -+ argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; -+ -+ argp->stepwise.min_width = dev->min_width; -+ argp->stepwise.min_height = dev->min_height; -+ -+ argp->stepwise.max_width = dev->max_width; -+ argp->stepwise.max_height = dev->max_height; -+ -+ argp->stepwise.step_width = 1; -+ argp->stepwise.step_height = 1; -+ } -+ } -+ return 0; -+} -+ -+/* returns frameinterval (fps) for the set resolution -+ * called on VIDIOC_ENUM_FRAMEINTERVALS -+ */ -+static int vidioc_enum_frameintervals(struct file *file, void *fh, -+ struct v4l2_frmivalenum *argp) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ -+ /* there can be only one... */ -+ if (argp->index) -+ return -EINVAL; -+ -+ if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { -+ if (argp->width != dev->pix_format.width || -+ argp->height != dev->pix_format.height || -+ argp->pixel_format != dev->pix_format.pixelformat) -+ return -EINVAL; -+ -+ argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; -+ argp->discrete = dev->capture_param.timeperframe; -+ } else { -+ if (argp->width < dev->min_width || -+ argp->width > dev->max_width || -+ argp->height < dev->min_height || -+ argp->height > dev->max_height || -+ NULL == format_by_fourcc(argp->pixel_format)) -+ return -EINVAL; -+ -+ argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; -+ argp->stepwise.min.numerator = 1; -+ argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX; -+ argp->stepwise.max.numerator = 1; -+ argp->stepwise.max.denominator = V4L2LOOPBACK_FPS_MIN; -+ argp->stepwise.step.numerator = 1; -+ argp->stepwise.step.denominator = 1; -+ } -+ -+ return 0; -+} -+ -+/* ------------------ CAPTURE ----------------------- */ -+ -+/* returns device formats -+ * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE -+ */ -+static int vidioc_enum_fmt_cap(struct file *file, void *fh, -+ struct v4l2_fmtdesc *f) -+{ -+ struct v4l2_loopback_device *dev; -+ const struct v4l2l_format *fmt; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ -+ if (f->index) -+ return -EINVAL; -+ -+ if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { -+ /* format has been fixed, so only one single format is supported */ -+ const __u32 format = dev->pix_format.pixelformat; -+ -+ if ((fmt = format_by_fourcc(format))) { -+ snprintf(f->description, sizeof(f->description), "%s", -+ fmt->name); -+ } else { -+ snprintf(f->description, sizeof(f->description), -+ "[%c%c%c%c]", (format >> 0) & 0xFF, -+ (format >> 8) & 0xFF, (format >> 16) & 0xFF, -+ (format >> 24) & 0xFF); -+ } -+ -+ f->pixelformat = dev->pix_format.pixelformat; -+ } else { -+ return -EINVAL; -+ } -+ f->flags = 0; -+ MARK(); -+ return 0; -+} -+ -+/* returns current video format -+ * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE -+ */ -+static int vidioc_g_fmt_cap(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ struct v4l2_loopback_device *dev; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ if (!dev->ready_for_capture && !dev->ready_for_output) -+ return -EINVAL; -+ -+ fmt->fmt.pix = dev->pix_format; -+ MARK(); -+ return 0; -+} -+ -+/* checks if it is OK to change to format fmt; -+ * actual check is done by inner_try_setfmt -+ * just checking that pixelformat is OK and set other parameters, app should -+ * obey this decision -+ * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE -+ */ -+static int vidioc_try_fmt_cap(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ int ret = 0; -+ if (!V4L2_TYPE_IS_CAPTURE(fmt->type)) -+ return -EINVAL; -+ ret = inner_try_setfmt(file, fmt); -+ if (-EBUSY == ret) -+ return 0; -+ return ret; -+} -+ -+/* sets new output format, if possible -+ * actually format is set by input and we even do not check it, just return -+ * current one, but it is possible to set subregions of input TODO(vasaka) -+ * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE -+ */ -+static int vidioc_s_fmt_cap(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ int ret; -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ if (!V4L2_TYPE_IS_CAPTURE(fmt->type)) -+ return -EINVAL; -+ ret = inner_try_setfmt(file, fmt); -+ if (!ret) { -+ dev->pix_format = fmt->fmt.pix; -+ } -+ return ret; -+} -+ -+/* ------------------ OUTPUT ----------------------- */ -+ -+/* returns device formats; -+ * LATER: allow all formats -+ * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT -+ */ -+static int vidioc_enum_fmt_out(struct file *file, void *fh, -+ struct v4l2_fmtdesc *f) -+{ -+ struct v4l2_loopback_device *dev; -+ const struct v4l2l_format *fmt; -+ -+ dev = v4l2loopback_getdevice(file); -+ -+ if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) { -+ /* format has been fixed, so only one single format is supported */ -+ const __u32 format = dev->pix_format.pixelformat; -+ -+ if (f->index) -+ return -EINVAL; -+ -+ if ((fmt = format_by_fourcc(format))) { -+ snprintf(f->description, sizeof(f->description), "%s", -+ fmt->name); -+ } else { -+ snprintf(f->description, sizeof(f->description), -+ "[%c%c%c%c]", (format >> 0) & 0xFF, -+ (format >> 8) & 0xFF, (format >> 16) & 0xFF, -+ (format >> 24) & 0xFF); -+ } -+ -+ f->pixelformat = dev->pix_format.pixelformat; -+ } else { -+ /* fill in a dummy format */ -+ /* coverity[unsigned_compare] */ -+ if (f->index < 0 || f->index >= FORMATS) -+ return -EINVAL; -+ -+ fmt = &formats[f->index]; -+ -+ f->pixelformat = fmt->fourcc; -+ snprintf(f->description, sizeof(f->description), "%s", -+ fmt->name); -+ } -+ f->flags = 0; -+ -+ return 0; -+} -+ -+/* returns current video format format fmt */ -+/* NOTE: this is called from the producer -+ * so if format has not been negotiated yet, -+ * it should return ALL of available formats, -+ * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT -+ */ -+static int vidioc_g_fmt_out(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ struct v4l2_loopback_device *dev; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ -+ /* -+ * LATER: this should return the currently valid format -+ * gstreamer doesn't like it, if this returns -EINVAL, as it -+ * then concludes that there is _no_ valid format -+ * CHECK whether this assumption is wrong, -+ * or whether we have to always provide a valid format -+ */ -+ -+ fmt->fmt.pix = dev->pix_format; -+ return 0; -+} -+ -+/* checks if it is OK to change to format fmt; -+ * if format is negotiated do not change it -+ * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT -+ */ -+static int vidioc_try_fmt_out(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ int ret = 0; -+ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) -+ return -EINVAL; -+ ret = inner_try_setfmt(file, fmt); -+ if (-EBUSY == ret) -+ return 0; -+ return ret; -+} -+ -+/* sets new output format, if possible; -+ * allocate data here because we do not know if it will be streaming or -+ * read/write IO -+ * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT -+ */ -+static int vidioc_s_fmt_out(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ struct v4l2_loopback_device *dev; -+ int ret; -+ char buf[5]; -+ buf[4] = 0; -+ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) -+ return -EINVAL; -+ dev = v4l2loopback_getdevice(file); -+ -+ ret = inner_try_setfmt(file, fmt); -+ if (!ret) { -+ dev->pix_format = fmt->fmt.pix; -+ dev->pix_format_has_valid_sizeimage = -+ v4l2l_pix_format_has_valid_sizeimage(fmt); -+ dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture, -+ dev->pix_format.sizeimage); -+ dprintk("outFOURCC=%s\n", -+ fourcc2str(dev->pix_format.pixelformat, buf)); -+ -+ if (!dev->ready_for_capture) { -+ dev->buffer_size = -+ PAGE_ALIGN(dev->pix_format.sizeimage); -+ // JMZ: TODO get rid of the next line -+ fmt->fmt.pix.sizeimage = dev->buffer_size; -+ ret = allocate_buffers(dev); -+ } -+ } -+ return ret; -+} -+ -+// #define V4L2L_OVERLAY -+#ifdef V4L2L_OVERLAY -+/* ------------------ OVERLAY ----------------------- */ -+/* currently unsupported */ -+/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work -+ * while it should only require it, if overlay is requested -+ * once the gstreamer element is fixed, remove the overlay dummies -+ */ -+#warning OVERLAY dummies -+static int vidioc_g_fmt_overlay(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ return 0; -+} -+ -+static int vidioc_s_fmt_overlay(struct file *file, void *priv, -+ struct v4l2_format *fmt) -+{ -+ return 0; -+} -+#endif /* V4L2L_OVERLAY */ -+ -+/* ------------------ PARAMs ----------------------- */ -+ -+/* get some data flow parameters, only capability, fps and readbuffers has -+ * effect on this driver -+ * called on VIDIOC_G_PARM -+ */ -+static int vidioc_g_parm(struct file *file, void *priv, -+ struct v4l2_streamparm *parm) -+{ -+ /* do not care about type of opener, hope these enums would always be -+ * compatible */ -+ struct v4l2_loopback_device *dev; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ parm->parm.capture = dev->capture_param; -+ return 0; -+} -+ -+/* get some data flow parameters, only capability, fps and readbuffers has -+ * effect on this driver -+ * called on VIDIOC_S_PARM -+ */ -+static int vidioc_s_parm(struct file *file, void *priv, -+ struct v4l2_streamparm *parm) -+{ -+ struct v4l2_loopback_device *dev; -+ int err = 0; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ dprintk("vidioc_s_parm called frate=%d/%d\n", -+ parm->parm.capture.timeperframe.numerator, -+ parm->parm.capture.timeperframe.denominator); -+ -+ switch (parm->type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ if ((err = set_timeperframe( -+ dev, &parm->parm.capture.timeperframe)) < 0) -+ return err; -+ break; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if ((err = set_timeperframe( -+ dev, &parm->parm.capture.timeperframe)) < 0) -+ return err; -+ break; -+ default: -+ return -1; -+ } -+ -+ parm->parm.capture = dev->capture_param; -+ return 0; -+} -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+/* sets a tv standard, actually we do not need to handle this any special way -+ * added to support effecttv -+ * called on VIDIOC_S_STD -+ */ -+static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) -+{ -+ v4l2_std_id req_std = 0, supported_std = 0; -+ const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; -+ -+ if (_std) { -+ req_std = *_std; -+ *_std = all_std; -+ } -+ -+ /* we support everything in V4L2_STD_ALL, but not more... */ -+ supported_std = (all_std & req_std); -+ if (no_std == supported_std) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+/* gets a fake video standard -+ * called on VIDIOC_G_STD -+ */ -+static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) -+{ -+ if (norm) -+ *norm = V4L2_STD_ALL; -+ return 0; -+} -+/* gets a fake video standard -+ * called on VIDIOC_QUERYSTD -+ */ -+static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) -+{ -+ if (norm) -+ *norm = V4L2_STD_ALL; -+ return 0; -+} -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, -+ s64 val) -+{ -+ switch (id) { -+ case CID_KEEP_FORMAT: -+ if (val < 0 || val > 1) -+ return -EINVAL; -+ dev->keep_format = val; -+ try_free_buffers( -+ dev); /* will only free buffers if !keep_format */ -+ break; -+ case CID_SUSTAIN_FRAMERATE: -+ if (val < 0 || val > 1) -+ return -EINVAL; -+ spin_lock_bh(&dev->lock); -+ dev->sustain_framerate = val; -+ check_timers(dev); -+ spin_unlock_bh(&dev->lock); -+ break; -+ case CID_TIMEOUT: -+ if (val < 0 || val > MAX_TIMEOUT) -+ return -EINVAL; -+ spin_lock_bh(&dev->lock); -+ dev->timeout_jiffies = msecs_to_jiffies(val); -+ check_timers(dev); -+ spin_unlock_bh(&dev->lock); -+ allocate_timeout_image(dev); -+ break; -+ case CID_TIMEOUT_IMAGE_IO: -+ dev->timeout_image_io = 1; -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) -+{ -+ struct v4l2_loopback_device *dev = container_of( -+ ctrl->handler, struct v4l2_loopback_device, ctrl_handler); -+ return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); -+} -+ -+/* returns set of device outputs, in our case there is only one -+ * called on VIDIOC_ENUMOUTPUT -+ */ -+static int vidioc_enum_output(struct file *file, void *fh, -+ struct v4l2_output *outp) -+{ -+ __u32 index = outp->index; -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ MARK(); -+ -+ if (!dev->announce_all_caps && !dev->ready_for_output) -+ return -ENOTTY; -+ -+ if (0 != index) -+ return -EINVAL; -+ -+ /* clear all data (including the reserved fields) */ -+ memset(outp, 0, sizeof(*outp)); -+ -+ outp->index = index; -+ strscpy(outp->name, "loopback in", sizeof(outp->name)); -+ outp->type = V4L2_OUTPUT_TYPE_ANALOG; -+ outp->audioset = 0; -+ outp->modulator = 0; -+#ifdef V4L2LOOPBACK_WITH_STD -+ outp->std = V4L2_STD_ALL; -+#ifdef V4L2_OUT_CAP_STD -+ outp->capabilities |= V4L2_OUT_CAP_STD; -+#endif /* V4L2_OUT_CAP_STD */ -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ return 0; -+} -+ -+/* which output is currently active, -+ * called on VIDIOC_G_OUTPUT -+ */ -+static int vidioc_g_output(struct file *file, void *fh, unsigned int *i) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ if (!dev->announce_all_caps && !dev->ready_for_output) -+ return -ENOTTY; -+ if (i) -+ *i = 0; -+ return 0; -+} -+ -+/* set output, can make sense if we have more than one video src, -+ * called on VIDIOC_S_OUTPUT -+ */ -+static int vidioc_s_output(struct file *file, void *fh, unsigned int i) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ if (!dev->announce_all_caps && !dev->ready_for_output) -+ return -ENOTTY; -+ -+ if (i) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+/* returns set of device inputs, in our case there is only one, -+ * but later I may add more -+ * called on VIDIOC_ENUMINPUT -+ */ -+static int vidioc_enum_input(struct file *file, void *fh, -+ struct v4l2_input *inp) -+{ -+ struct v4l2_loopback_device *dev; -+ __u32 index = inp->index; -+ MARK(); -+ -+ if (0 != index) -+ return -EINVAL; -+ -+ /* clear all data (including the reserved fields) */ -+ memset(inp, 0, sizeof(*inp)); -+ -+ inp->index = index; -+ strscpy(inp->name, "loopback", sizeof(inp->name)); -+ inp->type = V4L2_INPUT_TYPE_CAMERA; -+ inp->audioset = 0; -+ inp->tuner = 0; -+ inp->status = 0; -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+ inp->std = V4L2_STD_ALL; -+#ifdef V4L2_IN_CAP_STD -+ inp->capabilities |= V4L2_IN_CAP_STD; -+#endif -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ dev = v4l2loopback_getdevice(file); -+ if (!dev->ready_for_capture) { -+ inp->status |= V4L2_IN_ST_NO_SIGNAL; -+ } -+ -+ return 0; -+} -+ -+/* which input is currently active, -+ * called on VIDIOC_G_INPUT -+ */ -+static int vidioc_g_input(struct file *file, void *fh, unsigned int *i) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ if (!dev->announce_all_caps && !dev->ready_for_capture) -+ return -ENOTTY; -+ if (i) -+ *i = 0; -+ return 0; -+} -+ -+/* set input, can make sense if we have more than one video src, -+ * called on VIDIOC_S_INPUT -+ */ -+static int vidioc_s_input(struct file *file, void *fh, unsigned int i) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ if (!dev->announce_all_caps && !dev->ready_for_capture) -+ return -ENOTTY; -+ if (i == 0) -+ return 0; -+ return -EINVAL; -+} -+ -+/* --------------- V4L2 ioctl buffer related calls ----------------- */ -+ -+/* negotiate buffer type -+ * only mmap streaming supported -+ * called on VIDIOC_REQBUFS -+ */ -+static int vidioc_reqbufs(struct file *file, void *fh, -+ struct v4l2_requestbuffers *b) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ int i; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(fh); -+ -+ dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count, -+ dev->buffers_number); -+ -+ if (opener->timeout_image_io) { -+ dev->timeout_image_io = 0; -+ if (b->memory != V4L2_MEMORY_MMAP) -+ return -EINVAL; -+ b->count = 2; -+ return 0; -+ } -+ -+ if (V4L2_TYPE_IS_OUTPUT(b->type) && (!dev->ready_for_output)) { -+ return -EBUSY; -+ } -+ -+ init_buffers(dev); -+ switch (b->memory) { -+ case V4L2_MEMORY_MMAP: -+ /* do nothing here, buffers are always allocated */ -+ if (b->count < 1 || dev->buffers_number < 1) -+ return 0; -+ -+ if (b->count > dev->buffers_number) -+ b->count = dev->buffers_number; -+ -+ /* make sure that outbufs_list contains buffers from 0 to used_buffers-1 -+ * actually, it will have been already populated via v4l2_loopback_init() -+ * at this point */ -+ if (list_empty(&dev->outbufs_list)) { -+ for (i = 0; i < dev->used_buffers; ++i) -+ list_add_tail(&dev->buffers[i].list_head, -+ &dev->outbufs_list); -+ } -+ -+ /* also, if dev->used_buffers is going to be decreased, we should remove -+ * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */ -+ if (b->count < dev->used_buffers) { -+ struct v4l2l_buffer *pos, *n; -+ -+ list_for_each_entry_safe(pos, n, &dev->outbufs_list, -+ list_head) { -+ if (pos->buffer.index >= b->count) -+ list_del(&pos->list_head); -+ } -+ -+ /* after we update dev->used_buffers, buffers in outbufs_list will -+ * correspond to dev->write_position + [0;b->count-1] range */ -+ i = v4l2l_mod64(dev->write_position, b->count); -+ list_for_each_entry(pos, &dev->outbufs_list, -+ list_head) { -+ dev->bufpos2index[i % b->count] = -+ pos->buffer.index; -+ ++i; -+ } -+ } -+ -+ opener->buffers_number = b->count; -+ if (opener->buffers_number < dev->used_buffers) -+ dev->used_buffers = opener->buffers_number; -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+/* returns buffer asked for; -+ * give app as many buffers as it wants, if it less than MAX, -+ * but map them in our inner buffers -+ * called on VIDIOC_QUERYBUF -+ */ -+static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) -+{ -+ enum v4l2_buf_type type; -+ int index; -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ -+ MARK(); -+ -+ type = b->type; -+ index = b->index; -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(fh); -+ -+ if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && -+ (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) { -+ return -EINVAL; -+ } -+ if (b->index > max_buffers) -+ return -EINVAL; -+ -+ if (opener->timeout_image_io) -+ *b = dev->timeout_image_buffer.buffer; -+ else -+ *b = dev->buffers[b->index % dev->used_buffers].buffer; -+ -+ b->type = type; -+ b->index = index; -+ dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory, -+ dev->buffers_number, dev->buffer_size); -+ -+ /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' -+ https://github.com/umlaeute/v4l2loopback/issues/60 */ -+ b->flags &= ~V4L2_BUF_FLAG_DONE; -+ b->flags |= V4L2_BUF_FLAG_QUEUED; -+ -+ return 0; -+} -+ -+static void buffer_written(struct v4l2_loopback_device *dev, -+ struct v4l2l_buffer *buf) -+{ -+ del_timer_sync(&dev->sustain_timer); -+ del_timer_sync(&dev->timeout_timer); -+ -+ spin_lock_bh(&dev->list_lock); -+ list_move_tail(&buf->list_head, &dev->outbufs_list); -+ spin_unlock_bh(&dev->list_lock); -+ -+ spin_lock_bh(&dev->lock); -+ dev->bufpos2index[v4l2l_mod64(dev->write_position, dev->used_buffers)] = -+ buf->buffer.index; -+ ++dev->write_position; -+ dev->reread_count = 0; -+ -+ check_timers(dev); -+ spin_unlock_bh(&dev->lock); -+} -+ -+/* put buffer to queue -+ * called on VIDIOC_QBUF -+ */ -+static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ struct v4l2l_buffer *b; -+ int index; -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(fh); -+ -+ if (buf->index > max_buffers) -+ return -EINVAL; -+ if (opener->timeout_image_io) -+ return 0; -+ -+ index = buf->index % dev->used_buffers; -+ b = &dev->buffers[index]; -+ -+ switch (buf->type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ dprintkrw( -+ "qbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", -+ index, buf->index, buf, buf->type, buf->bytesused, -+ buf->length, buf->flags, buf->field, -+ (long long)buf->timestamp.tv_sec, -+ (long int)buf->timestamp.tv_usec, buf->sequence); -+ set_queued(b); -+ return 0; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ dprintkrw( -+ "qbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", -+ index, buf->index, buf, buf->type, buf->bytesused, -+ buf->length, buf->flags, buf->field, -+ (long long)buf->timestamp.tv_sec, -+ (long int)buf->timestamp.tv_usec, buf->sequence); -+ if ((!(b->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY)) && -+ (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0)) -+ v4l2l_get_timestamp(&b->buffer); -+ else { -+ b->buffer.timestamp = buf->timestamp; -+ b->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY; -+ } -+ if (dev->pix_format_has_valid_sizeimage) { -+ if (buf->bytesused >= dev->pix_format.sizeimage) { -+ b->buffer.bytesused = dev->pix_format.sizeimage; -+ } else { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) -+ dev_warn_ratelimited( -+ &dev->vdev->dev, -+#else -+ dprintkrw( -+#endif -+ "warning queued output buffer bytesused too small %d < %d\n", -+ buf->bytesused, -+ dev->pix_format.sizeimage); -+ b->buffer.bytesused = buf->bytesused; -+ } -+ } else { -+ b->buffer.bytesused = buf->bytesused; -+ } -+ -+ set_done(b); -+ buffer_written(dev, b); -+ -+ /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' -+ https://github.com/umlaeute/v4l2loopback/issues/60 */ -+ buf->flags &= ~V4L2_BUF_FLAG_DONE; -+ buf->flags |= V4L2_BUF_FLAG_QUEUED; -+ -+ wake_up_all(&dev->read_event); -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+static int can_read(struct v4l2_loopback_device *dev, -+ struct v4l2_loopback_opener *opener) -+{ -+ int ret; -+ -+ spin_lock_bh(&dev->lock); -+ check_timers(dev); -+ ret = dev->write_position > opener->read_position || -+ dev->reread_count > opener->reread_count || dev->timeout_happened; -+ spin_unlock_bh(&dev->lock); -+ return ret; -+} -+ -+static int get_capture_buffer(struct file *file) -+{ -+ struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); -+ struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); -+ int pos, ret; -+ int timeout_happened; -+ -+ if ((file->f_flags & O_NONBLOCK) && -+ (dev->write_position <= opener->read_position && -+ dev->reread_count <= opener->reread_count && -+ !dev->timeout_happened)) -+ return -EAGAIN; -+ wait_event_interruptible(dev->read_event, can_read(dev, opener)); -+ -+ spin_lock_bh(&dev->lock); -+ if (dev->write_position == opener->read_position) { -+ if (dev->reread_count > opener->reread_count + 2) -+ opener->reread_count = dev->reread_count - 1; -+ ++opener->reread_count; -+ pos = v4l2l_mod64(opener->read_position + dev->used_buffers - 1, -+ dev->used_buffers); -+ } else { -+ opener->reread_count = 0; -+ if (dev->write_position > -+ opener->read_position + dev->used_buffers) -+ opener->read_position = dev->write_position - 1; -+ pos = v4l2l_mod64(opener->read_position, dev->used_buffers); -+ ++opener->read_position; -+ } -+ timeout_happened = dev->timeout_happened; -+ dev->timeout_happened = 0; -+ spin_unlock_bh(&dev->lock); -+ -+ ret = dev->bufpos2index[pos]; -+ if (timeout_happened) { -+ if (ret < 0) { -+ dprintk("trying to return not mapped buf[%d]\n", ret); -+ return -EFAULT; -+ } -+ /* although allocated on-demand, timeout_image is freed only -+ * in free_buffers(), so we don't need to worry about it being -+ * deallocated suddenly */ -+ memcpy(dev->image + dev->buffers[ret].buffer.m.offset, -+ dev->timeout_image, dev->buffer_size); -+ } -+ return ret; -+} -+ -+/* put buffer to dequeue -+ * called on VIDIOC_DQBUF -+ */ -+static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ int index; -+ struct v4l2l_buffer *b; -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(fh); -+ if (opener->timeout_image_io) { -+ *buf = dev->timeout_image_buffer.buffer; -+ return 0; -+ } -+ -+ switch (buf->type) { -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ index = get_capture_buffer(file); -+ if (index < 0) -+ return index; -+ dprintkrw("capture DQBUF pos: %lld index: %d\n", -+ (long long)(opener->read_position - 1), index); -+ if (!(dev->buffers[index].buffer.flags & -+ V4L2_BUF_FLAG_MAPPED)) { -+ dprintk("trying to return not mapped buf[%d]\n", index); -+ return -EINVAL; -+ } -+ unset_flags(&dev->buffers[index]); -+ *buf = dev->buffers[index].buffer; -+ dprintkrw( -+ "dqbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", -+ index, buf->index, buf, buf->type, buf->bytesused, -+ buf->length, buf->flags, buf->field, -+ (long long)buf->timestamp.tv_sec, -+ (long int)buf->timestamp.tv_usec, buf->sequence); -+ return 0; -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ spin_lock_bh(&dev->list_lock); -+ -+ b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer, -+ list_head); -+ list_move_tail(&b->list_head, &dev->outbufs_list); -+ -+ spin_unlock_bh(&dev->list_lock); -+ dprintkrw("output DQBUF index: %d\n", b->buffer.index); -+ unset_flags(b); -+ *buf = b->buffer; -+ buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ dprintkrw( -+ "dqbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n", -+ index, buf->index, buf, buf->type, buf->bytesused, -+ buf->length, buf->flags, buf->field, -+ (long long)buf->timestamp.tv_sec, -+ (long int)buf->timestamp.tv_usec, buf->sequence); -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+/* ------------- STREAMING ------------------- */ -+ -+/* start streaming -+ * called on VIDIOC_STREAMON -+ */ -+static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(fh); -+ -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if (!dev->ready_for_capture) { -+ int ret = allocate_buffers(dev); -+ if (ret < 0) -+ return ret; -+ } -+ opener->type = WRITER; -+ dev->ready_for_output = 0; -+ dev->ready_for_capture++; -+ return 0; -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ if (!dev->ready_for_capture) -+ return -EIO; -+ if (dev->active_readers > 0) -+ return -EBUSY; -+ opener->type = READER; -+ dev->active_readers++; -+ client_usage_queue_event(dev->vdev); -+ return 0; -+ default: -+ return -EINVAL; -+ } -+ return -EINVAL; -+} -+ -+/* stop streaming -+ * called on VIDIOC_STREAMOFF -+ */ -+static int vidioc_streamoff(struct file *file, void *fh, -+ enum v4l2_buf_type type) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ -+ MARK(); -+ dprintk("%d\n", type); -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(fh); -+ switch (type) { -+ case V4L2_BUF_TYPE_VIDEO_OUTPUT: -+ if (dev->ready_for_capture > 0) -+ dev->ready_for_capture--; -+ return 0; -+ case V4L2_BUF_TYPE_VIDEO_CAPTURE: -+ if (opener->type == READER) { -+ opener->type = 0; -+ dev->active_readers--; -+ client_usage_queue_event(dev->vdev); -+ } -+ return 0; -+ default: -+ return -EINVAL; -+ } -+ return -EINVAL; -+} -+ -+#ifdef CONFIG_VIDEO_V4L1_COMPAT -+static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) -+{ -+ struct v4l2_loopback_device *dev; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ p->frames = dev->buffers_number; -+ p->offsets[0] = 0; -+ p->offsets[1] = 0; -+ p->size = dev->buffer_size; -+ return 0; -+} -+#endif -+ -+static void client_usage_queue_event(struct video_device *vdev) -+{ -+ struct v4l2_event ev; -+ struct v4l2_loopback_device *dev; -+ -+ dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device, -+ v4l2_dev); -+ -+ memset(&ev, 0, sizeof(ev)); -+ ev.type = V4L2_EVENT_PRI_CLIENT_USAGE; -+ ((struct v4l2_event_client_usage *)&ev.u)->count = dev->active_readers; -+ -+ v4l2_event_queue(vdev, &ev); -+} -+ -+static int client_usage_ops_add(struct v4l2_subscribed_event *sev, -+ unsigned elems) -+{ -+ if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL)) -+ return 0; -+ -+ client_usage_queue_event(sev->fh->vdev); -+ return 0; -+} -+ -+static void client_usage_ops_replace(struct v4l2_event *old, -+ const struct v4l2_event *new) -+{ -+ *((struct v4l2_event_client_usage *)&old->u) = -+ *((struct v4l2_event_client_usage *)&new->u); -+} -+ -+static void client_usage_ops_merge(const struct v4l2_event *old, -+ struct v4l2_event *new) -+{ -+ *((struct v4l2_event_client_usage *)&new->u) = -+ *((struct v4l2_event_client_usage *)&old->u); -+} -+ -+const struct v4l2_subscribed_event_ops client_usage_ops = { -+ .add = client_usage_ops_add, -+ .replace = client_usage_ops_replace, -+ .merge = client_usage_ops_merge, -+}; -+ -+static int vidioc_subscribe_event(struct v4l2_fh *fh, -+ const struct v4l2_event_subscription *sub) -+{ -+ switch (sub->type) { -+ case V4L2_EVENT_CTRL: -+ return v4l2_ctrl_subscribe_event(fh, sub); -+ case V4L2_EVENT_PRI_CLIENT_USAGE: -+ return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops); -+ } -+ -+ return -EINVAL; -+} -+ -+/* file operations */ -+static void vm_open(struct vm_area_struct *vma) -+{ -+ struct v4l2l_buffer *buf; -+ MARK(); -+ -+ buf = vma->vm_private_data; -+ buf->use_count++; -+ -+ buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED; -+} -+ -+static void vm_close(struct vm_area_struct *vma) -+{ -+ struct v4l2l_buffer *buf; -+ MARK(); -+ -+ buf = vma->vm_private_data; -+ buf->use_count--; -+ -+ if (buf->use_count <= 0) -+ buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED; -+} -+ -+static struct vm_operations_struct vm_ops = { -+ .open = vm_open, -+ .close = vm_close, -+}; -+ -+static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ u8 *addr; -+ unsigned long start; -+ unsigned long size; -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ struct v4l2l_buffer *buffer = NULL; -+ MARK(); -+ -+ start = (unsigned long)vma->vm_start; -+ size = (unsigned long)(vma->vm_end - vma->vm_start); -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(file->private_data); -+ -+ if (size > dev->buffer_size) { -+ dprintk("userspace tries to mmap too much, fail\n"); -+ return -EINVAL; -+ } -+ if (opener->timeout_image_io) { -+ /* we are going to map the timeout_image_buffer */ -+ if ((vma->vm_pgoff << PAGE_SHIFT) != -+ dev->buffer_size * MAX_BUFFERS) { -+ dprintk("invalid mmap offset for timeout_image_io mode\n"); -+ return -EINVAL; -+ } -+ } else if ((vma->vm_pgoff << PAGE_SHIFT) > -+ dev->buffer_size * (dev->buffers_number - 1)) { -+ dprintk("userspace tries to mmap too far, fail\n"); -+ return -EINVAL; -+ } -+ -+ /* FIXXXXXME: allocation should not happen here! */ -+ if (NULL == dev->image) -+ if (allocate_buffers(dev) < 0) -+ return -EINVAL; -+ -+ if (opener->timeout_image_io) { -+ buffer = &dev->timeout_image_buffer; -+ addr = dev->timeout_image; -+ } else { -+ int i; -+ for (i = 0; i < dev->buffers_number; ++i) { -+ buffer = &dev->buffers[i]; -+ if ((buffer->buffer.m.offset >> PAGE_SHIFT) == -+ vma->vm_pgoff) -+ break; -+ } -+ -+ if (i >= dev->buffers_number) -+ return -EINVAL; -+ -+ addr = dev->image + (vma->vm_pgoff << PAGE_SHIFT); -+ } -+ -+ while (size > 0) { -+ struct page *page; -+ -+ page = vmalloc_to_page(addr); -+ -+ if (vm_insert_page(vma, start, page) < 0) -+ return -EAGAIN; -+ -+ start += PAGE_SIZE; -+ addr += PAGE_SIZE; -+ size -= PAGE_SIZE; -+ } -+ -+ vma->vm_ops = &vm_ops; -+ vma->vm_private_data = buffer; -+ -+ vm_open(vma); -+ -+ MARK(); -+ return 0; -+} -+ -+static unsigned int v4l2_loopback_poll(struct file *file, -+ struct poll_table_struct *pts) -+{ -+ struct v4l2_loopback_opener *opener; -+ struct v4l2_loopback_device *dev; -+ __poll_t req_events = poll_requested_events(pts); -+ int ret_mask = 0; -+ MARK(); -+ -+ opener = fh_to_opener(file->private_data); -+ dev = v4l2loopback_getdevice(file); -+ -+ if (req_events & POLLPRI) { -+ if (!v4l2_event_pending(&opener->fh)) -+ poll_wait(file, &opener->fh.wait, pts); -+ if (v4l2_event_pending(&opener->fh)) { -+ ret_mask |= POLLPRI; -+ if (!(req_events & DEFAULT_POLLMASK)) -+ return ret_mask; -+ } -+ } -+ -+ switch (opener->type) { -+ case WRITER: -+ ret_mask |= POLLOUT | POLLWRNORM; -+ break; -+ case READER: -+ if (!can_read(dev, opener)) { -+ if (ret_mask) -+ return ret_mask; -+ poll_wait(file, &dev->read_event, pts); -+ } -+ if (can_read(dev, opener)) -+ ret_mask |= POLLIN | POLLRDNORM; -+ if (v4l2_event_pending(&opener->fh)) -+ ret_mask |= POLLPRI; -+ break; -+ default: -+ break; -+ } -+ -+ MARK(); -+ return ret_mask; -+} -+ -+/* do not want to limit device opens, it can be as many readers as user want, -+ * writers are limited by means of setting writer field */ -+static int v4l2_loopback_open(struct file *file) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_opener *opener; -+ MARK(); -+ dev = v4l2loopback_getdevice(file); -+ if (dev->open_count.counter >= dev->max_openers) -+ return -EBUSY; -+ /* kfree on close */ -+ opener = kzalloc(sizeof(*opener), GFP_KERNEL); -+ if (opener == NULL) -+ return -ENOMEM; -+ -+ atomic_inc(&dev->open_count); -+ -+ opener->timeout_image_io = dev->timeout_image_io; -+ if (opener->timeout_image_io) { -+ int r = allocate_timeout_image(dev); -+ -+ if (r < 0) { -+ dprintk("timeout image allocation failed\n"); -+ -+ atomic_dec(&dev->open_count); -+ -+ kfree(opener); -+ return r; -+ } -+ } -+ -+ v4l2_fh_init(&opener->fh, video_devdata(file)); -+ file->private_data = &opener->fh; -+ -+ v4l2_fh_add(&opener->fh); -+ dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL); -+ MARK(); -+ return 0; -+} -+ -+static int v4l2_loopback_close(struct file *file) -+{ -+ struct v4l2_loopback_opener *opener; -+ struct v4l2_loopback_device *dev; -+ int is_writer = 0, is_reader = 0; -+ MARK(); -+ -+ opener = fh_to_opener(file->private_data); -+ dev = v4l2loopback_getdevice(file); -+ -+ if (WRITER == opener->type) -+ is_writer = 1; -+ if (READER == opener->type) -+ is_reader = 1; -+ -+ atomic_dec(&dev->open_count); -+ if (dev->open_count.counter == 0) { -+ del_timer_sync(&dev->sustain_timer); -+ del_timer_sync(&dev->timeout_timer); -+ } -+ try_free_buffers(dev); -+ -+ v4l2_fh_del(&opener->fh); -+ v4l2_fh_exit(&opener->fh); -+ -+ kfree(opener); -+ if (is_writer) -+ dev->ready_for_output = 1; -+ if (is_reader) { -+ dev->active_readers--; -+ client_usage_queue_event(dev->vdev); -+ } -+ MARK(); -+ return 0; -+} -+ -+static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ int read_index; -+ struct v4l2_loopback_device *dev; -+ struct v4l2_buffer *b; -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ -+ read_index = get_capture_buffer(file); -+ if (read_index < 0) -+ return read_index; -+ if (count > dev->buffer_size) -+ count = dev->buffer_size; -+ b = &dev->buffers[read_index].buffer; -+ if (count > b->bytesused) -+ count = b->bytesused; -+ if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), -+ count)) { -+ printk(KERN_ERR -+ "v4l2-loopback: failed copy_to_user() in read buf\n"); -+ return -EFAULT; -+ } -+ dprintkrw("leave v4l2_loopback_read()\n"); -+ return count; -+} -+ -+static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct v4l2_loopback_opener *opener; -+ struct v4l2_loopback_device *dev; -+ int write_index; -+ struct v4l2_buffer *b; -+ int err = 0; -+ -+ MARK(); -+ -+ dev = v4l2loopback_getdevice(file); -+ opener = fh_to_opener(file->private_data); -+ -+ if (UNNEGOTIATED == opener->type) { -+ spin_lock(&dev->lock); -+ -+ if (dev->ready_for_output) { -+ err = vidioc_streamon(file, file->private_data, -+ V4L2_BUF_TYPE_VIDEO_OUTPUT); -+ } -+ -+ spin_unlock(&dev->lock); -+ -+ if (err < 0) -+ return err; -+ } -+ -+ if (WRITER != opener->type) -+ return -EINVAL; -+ -+ if (!dev->ready_for_capture) { -+ int ret = allocate_buffers(dev); -+ if (ret < 0) -+ return ret; -+ dev->ready_for_capture = 1; -+ } -+ dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count); -+ if (count > dev->buffer_size) -+ count = dev->buffer_size; -+ -+ write_index = v4l2l_mod64(dev->write_position, dev->used_buffers); -+ b = &dev->buffers[write_index].buffer; -+ -+ if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, -+ count)) { -+ printk(KERN_ERR -+ "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n", -+ count); -+ return -EFAULT; -+ } -+ v4l2l_get_timestamp(b); -+ b->bytesused = count; -+ b->sequence = dev->write_position; -+ buffer_written(dev, &dev->buffers[write_index]); -+ wake_up_all(&dev->read_event); -+ dprintkrw("leave v4l2_loopback_write()\n"); -+ return count; -+} -+ -+/* init functions */ -+/* frees buffers, if already allocated */ -+static void free_buffers(struct v4l2_loopback_device *dev) -+{ -+ MARK(); -+ dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev); -+ if (!dev) -+ return; -+ if (dev->image) { -+ vfree(dev->image); -+ dev->image = NULL; -+ } -+ if (dev->timeout_image) { -+ vfree(dev->timeout_image); -+ dev->timeout_image = NULL; -+ } -+ dev->imagesize = 0; -+} -+/* frees buffers, if they are no longer needed */ -+static void try_free_buffers(struct v4l2_loopback_device *dev) -+{ -+ MARK(); -+ if (0 == dev->open_count.counter && !dev->keep_format) { -+ free_buffers(dev); -+ dev->ready_for_capture = 0; -+ dev->buffer_size = 0; -+ dev->write_position = 0; -+ } -+} -+/* allocates buffers, if buffer_size is set */ -+static int allocate_buffers(struct v4l2_loopback_device *dev) -+{ -+ int err; -+ -+ MARK(); -+ /* vfree on close file operation in case no open handles left */ -+ -+ if (dev->buffer_size < 1 || dev->buffers_number < 1) -+ return -EINVAL; -+ -+ if ((__LONG_MAX__ / dev->buffer_size) < dev->buffers_number) -+ return -ENOSPC; -+ -+ if (dev->image) { -+ dprintk("allocating buffers again: %ld %ld\n", -+ dev->buffer_size * dev->buffers_number, dev->imagesize); -+ /* FIXME: prevent double allocation more intelligently! */ -+ if (dev->buffer_size * dev->buffers_number == dev->imagesize) -+ return 0; -+ -+ /* check whether the total number of readers/writers is <=1 */ -+ if ((dev->ready_for_capture + dev->active_readers) <= 1) -+ free_buffers(dev); -+ else -+ return -EINVAL; -+ } -+ -+ dev->imagesize = (unsigned long)dev->buffer_size * -+ (unsigned long)dev->buffers_number; -+ -+ dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size, -+ dev->buffers_number); -+ err = -ENOMEM; -+ -+ if (dev->timeout_jiffies > 0) { -+ err = allocate_timeout_image(dev); -+ if (err < 0) -+ goto error; -+ } -+ -+ dev->image = vmalloc(dev->imagesize); -+ if (dev->image == NULL) -+ goto error; -+ -+ dprintk("vmallocated %ld bytes\n", dev->imagesize); -+ MARK(); -+ -+ init_buffers(dev); -+ return 0; -+ -+error: -+ free_buffers(dev); -+ return err; -+} -+ -+/* init inner buffers, they are capture mode and flags are set as -+ * for capture mod buffers */ -+static void init_buffers(struct v4l2_loopback_device *dev) -+{ -+ int i; -+ int buffer_size; -+ int bytesused; -+ MARK(); -+ -+ buffer_size = dev->buffer_size; -+ bytesused = dev->pix_format.sizeimage; -+ for (i = 0; i < dev->buffers_number; ++i) { -+ struct v4l2_buffer *b = &dev->buffers[i].buffer; -+ b->index = i; -+ b->bytesused = bytesused; -+ b->length = buffer_size; -+ b->field = V4L2_FIELD_NONE; -+ b->flags = 0; -+ b->m.offset = i * buffer_size; -+ b->memory = V4L2_MEMORY_MMAP; -+ b->sequence = 0; -+ b->timestamp.tv_sec = 0; -+ b->timestamp.tv_usec = 0; -+ b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ -+ v4l2l_get_timestamp(b); -+ } -+ dev->timeout_image_buffer = dev->buffers[0]; -+ dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; -+ MARK(); -+} -+ -+static int allocate_timeout_image(struct v4l2_loopback_device *dev) -+{ -+ MARK(); -+ if (dev->buffer_size <= 0) { -+ dev->timeout_image_io = 0; -+ return -EINVAL; -+ } -+ -+ if (dev->timeout_image == NULL) { -+ dev->timeout_image = vzalloc(dev->buffer_size); -+ if (dev->timeout_image == NULL) { -+ dev->timeout_image_io = 0; -+ return -ENOMEM; -+ } -+ } -+ return 0; -+} -+ -+/* fills and register video device */ -+static void init_vdev(struct video_device *vdev, int nr) -+{ -+ MARK(); -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+ vdev->tvnorms = V4L2_STD_ALL; -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ vdev->vfl_type = VFL_TYPE_VIDEO; -+ vdev->fops = &v4l2_loopback_fops; -+ vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; -+ vdev->release = &video_device_release; -+ vdev->minor = -1; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) -+ vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE | -+ V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE | -+ V4L2_CAP_STREAMING; -+#endif -+ -+ if (debug > 1) -+ vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL | -+ V4L2_DEV_DEBUG_IOCTL_ARG; -+ -+ vdev->vfl_dir = VFL_DIR_M2M; -+ -+ MARK(); -+} -+ -+/* init default capture parameters, only fps may be changed in future */ -+static void init_capture_param(struct v4l2_captureparm *capture_param) -+{ -+ MARK(); -+ capture_param->capability = 0; -+ capture_param->capturemode = 0; -+ capture_param->extendedmode = 0; -+ capture_param->readbuffers = max_buffers; -+ capture_param->timeperframe.numerator = 1; -+ capture_param->timeperframe.denominator = 30; -+} -+ -+static void check_timers(struct v4l2_loopback_device *dev) -+{ -+ if (!dev->ready_for_capture) -+ return; -+ -+ if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) -+ mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); -+ if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) -+ mod_timer(&dev->sustain_timer, -+ jiffies + dev->frame_jiffies * 3 / 2); -+} -+#ifdef HAVE_TIMER_SETUP -+static void sustain_timer_clb(struct timer_list *t) -+{ -+ struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); -+#else -+static void sustain_timer_clb(unsigned long nr) -+{ -+ struct v4l2_loopback_device *dev = -+ idr_find(&v4l2loopback_index_idr, nr); -+#endif -+ spin_lock(&dev->lock); -+ if (dev->sustain_framerate) { -+ dev->reread_count++; -+ dprintkrw("reread: %lld %d\n", (long long)dev->write_position, -+ dev->reread_count); -+ if (dev->reread_count == 1) -+ mod_timer(&dev->sustain_timer, -+ jiffies + max(1UL, dev->frame_jiffies / 2)); -+ else -+ mod_timer(&dev->sustain_timer, -+ jiffies + dev->frame_jiffies); -+ wake_up_all(&dev->read_event); -+ } -+ spin_unlock(&dev->lock); -+} -+#ifdef HAVE_TIMER_SETUP -+static void timeout_timer_clb(struct timer_list *t) -+{ -+ struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); -+#else -+static void timeout_timer_clb(unsigned long nr) -+{ -+ struct v4l2_loopback_device *dev = -+ idr_find(&v4l2loopback_index_idr, nr); -+#endif -+ spin_lock(&dev->lock); -+ if (dev->timeout_jiffies > 0) { -+ dev->timeout_happened = 1; -+ mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); -+ wake_up_all(&dev->read_event); -+ } -+ spin_unlock(&dev->lock); -+} -+ -+/* init loopback main structure */ -+#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ -+ ((conf) ? \ -+ ((conf->confmember default_condition) ? (default_value) : \ -+ (conf->confmember)) : \ -+ default_value) -+ -+static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_ctrl_handler *hdl; -+ struct v4l2loopback_private *vdev_priv = NULL; -+ -+ int err = -ENOMEM; -+ -+ u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; -+ u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; -+ -+ u32 _min_width = DEFAULT_FROM_CONF(min_width, -+ < V4L2LOOPBACK_SIZE_MIN_WIDTH, -+ V4L2LOOPBACK_SIZE_MIN_WIDTH); -+ u32 _min_height = DEFAULT_FROM_CONF(min_height, -+ < V4L2LOOPBACK_SIZE_MIN_HEIGHT, -+ V4L2LOOPBACK_SIZE_MIN_HEIGHT); -+ u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width); -+ u32 _max_height = -+ DEFAULT_FROM_CONF(max_height, < _min_height, max_height); -+ bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? -+ (conf->announce_all_caps) : -+ V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS; -+ int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); -+ int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); -+ -+ int nr = -1; -+ -+ _announce_all_caps = (!!_announce_all_caps); -+ -+ if (conf) { -+ const int output_nr = conf->output_nr; -+#ifdef SPLIT_DEVICES -+ const int capture_nr = conf->capture_nr; -+#else -+ const int capture_nr = output_nr; -+#endif -+ if (capture_nr >= 0 && output_nr == capture_nr) { -+ nr = output_nr; -+ } else if (capture_nr < 0 && output_nr < 0) { -+ nr = -1; -+ } else if (capture_nr < 0) { -+ nr = output_nr; -+ } else if (output_nr < 0) { -+ nr = capture_nr; -+ } else { -+ printk(KERN_ERR -+ "split OUTPUT and CAPTURE devices not yet supported."); -+ printk(KERN_INFO -+ "both devices must have the same number (%d != %d).", -+ output_nr, capture_nr); -+ return -EINVAL; -+ } -+ } -+ -+ if (idr_find(&v4l2loopback_index_idr, nr)) -+ return -EEXIST; -+ -+ dprintk("creating v4l2loopback-device #%d\n", nr); -+ dev = kzalloc(sizeof(*dev), GFP_KERNEL); -+ if (!dev) -+ return -ENOMEM; -+ -+ /* allocate id, if @id >= 0, we're requesting that specific id */ -+ if (nr >= 0) { -+ err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, -+ GFP_KERNEL); -+ if (err == -ENOSPC) -+ err = -EEXIST; -+ } else { -+ err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); -+ } -+ if (err < 0) -+ goto out_free_dev; -+ nr = err; -+ err = -ENOMEM; -+ -+ if (conf && conf->card_label[0]) { -+ snprintf(dev->card_label, sizeof(dev->card_label), "%s", -+ conf->card_label); -+ } else { -+ snprintf(dev->card_label, sizeof(dev->card_label), -+ "Dummy video device (0x%04X)", nr); -+ } -+ snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), -+ "v4l2loopback-%03d", nr); -+ -+ err = v4l2_device_register(NULL, &dev->v4l2_dev); -+ if (err) -+ goto out_free_idr; -+ MARK(); -+ -+ dev->vdev = video_device_alloc(); -+ if (dev->vdev == NULL) { -+ err = -ENOMEM; -+ goto out_unregister; -+ } -+ -+ vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL); -+ if (vdev_priv == NULL) { -+ err = -ENOMEM; -+ goto out_unregister; -+ } -+ -+ video_set_drvdata(dev->vdev, vdev_priv); -+ if (video_get_drvdata(dev->vdev) == NULL) { -+ err = -ENOMEM; -+ goto out_unregister; -+ } -+ -+ MARK(); -+ snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s", -+ dev->card_label); -+ -+ vdev_priv->device_nr = nr; -+ -+ init_vdev(dev->vdev, nr); -+ dev->vdev->v4l2_dev = &dev->v4l2_dev; -+ init_capture_param(&dev->capture_param); -+ err = set_timeperframe(dev, &dev->capture_param.timeperframe); -+ if (err) -+ goto out_unregister; -+ dev->keep_format = 0; -+ dev->sustain_framerate = 0; -+ -+ dev->announce_all_caps = _announce_all_caps; -+ dev->min_width = _min_width; -+ dev->min_height = _min_height; -+ dev->max_width = _max_width; -+ dev->max_height = _max_height; -+ dev->max_openers = _max_openers; -+ dev->buffers_number = dev->used_buffers = _max_buffers; -+ -+ dev->write_position = 0; -+ -+ MARK(); -+ spin_lock_init(&dev->lock); -+ spin_lock_init(&dev->list_lock); -+ INIT_LIST_HEAD(&dev->outbufs_list); -+ if (list_empty(&dev->outbufs_list)) { -+ int i; -+ -+ for (i = 0; i < dev->used_buffers; ++i) -+ list_add_tail(&dev->buffers[i].list_head, -+ &dev->outbufs_list); -+ } -+ memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); -+ atomic_set(&dev->open_count, 0); -+ dev->ready_for_capture = 0; -+ dev->ready_for_output = 1; -+ -+ dev->buffer_size = 0; -+ dev->image = NULL; -+ dev->imagesize = 0; -+#ifdef HAVE_TIMER_SETUP -+ timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); -+ timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); -+#else -+ setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); -+ setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); -+#endif -+ dev->reread_count = 0; -+ dev->timeout_jiffies = 0; -+ dev->timeout_image = NULL; -+ dev->timeout_happened = 0; -+ -+ hdl = &dev->ctrl_handler; -+ err = v4l2_ctrl_handler_init(hdl, 4); -+ if (err) -+ goto out_unregister; -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); -+ v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); -+ if (hdl->error) { -+ err = hdl->error; -+ goto out_free_handler; -+ } -+ dev->v4l2_dev.ctrl_handler = hdl; -+ -+ err = v4l2_ctrl_handler_setup(hdl); -+ if (err) -+ goto out_free_handler; -+ -+ /* FIXME set buffers to 0 */ -+ -+ /* Set initial format */ -+ if (_width < _min_width) -+ _width = _min_width; -+ if (_width > _max_width) -+ _width = _max_width; -+ if (_height < _min_height) -+ _height = _min_height; -+ if (_height > _max_height) -+ _height = _max_height; -+ -+ dev->pix_format.width = _width; -+ dev->pix_format.height = _height; -+ dev->pix_format.pixelformat = formats[0].fourcc; -+ dev->pix_format.colorspace = -+ V4L2_COLORSPACE_DEFAULT; /* do we need to set this ? */ -+ dev->pix_format.field = V4L2_FIELD_NONE; -+ -+ dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); -+ dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size, -+ dev->pix_format.sizeimage); -+ -+ if (dev->buffer_size && ((err = allocate_buffers(dev)) < 0)) -+ goto out_free_handler; -+ -+ init_waitqueue_head(&dev->read_event); -+ -+ /* register the device -> it creates /dev/video* */ -+ if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { -+ printk(KERN_ERR -+ "v4l2loopback: failed video_register_device()\n"); -+ err = -EFAULT; -+ goto out_free_device; -+ } -+ v4l2loopback_create_sysfs(dev->vdev); -+ -+ MARK(); -+ if (ret_nr) -+ *ret_nr = dev->vdev->num; -+ return 0; -+ -+out_free_device: -+ video_device_release(dev->vdev); -+out_free_handler: -+ v4l2_ctrl_handler_free(&dev->ctrl_handler); -+out_unregister: -+ video_set_drvdata(dev->vdev, NULL); -+ if (vdev_priv != NULL) -+ kfree(vdev_priv); -+ v4l2_device_unregister(&dev->v4l2_dev); -+out_free_idr: -+ idr_remove(&v4l2loopback_index_idr, nr); -+out_free_dev: -+ kfree(dev); -+ return err; -+} -+ -+static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) -+{ -+ free_buffers(dev); -+ v4l2loopback_remove_sysfs(dev->vdev); -+ kfree(video_get_drvdata(dev->vdev)); -+ video_unregister_device(dev->vdev); -+ v4l2_device_unregister(&dev->v4l2_dev); -+ v4l2_ctrl_handler_free(&dev->ctrl_handler); -+ kfree(dev); -+} -+ -+static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, -+ unsigned long parm) -+{ -+ struct v4l2_loopback_device *dev; -+ struct v4l2_loopback_config conf; -+ struct v4l2_loopback_config *confptr = &conf; -+ int device_nr, capture_nr, output_nr; -+ int ret; -+ -+ ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); -+ if (ret) -+ return ret; -+ -+ ret = -EINVAL; -+ switch (cmd) { -+ default: -+ ret = -ENOSYS; -+ break; -+ /* add a v4l2loopback device (pair), based on the user-provided specs */ -+ case V4L2LOOPBACK_CTL_ADD: -+ if (parm) { -+ if ((ret = copy_from_user(&conf, (void *)parm, -+ sizeof(conf))) < 0) -+ break; -+ } else -+ confptr = NULL; -+ ret = v4l2_loopback_add(confptr, &device_nr); -+ if (ret >= 0) -+ ret = device_nr; -+ break; -+ /* remove a v4l2loopback device (both capture and output) */ -+ case V4L2LOOPBACK_CTL_REMOVE: -+ ret = v4l2loopback_lookup((int)parm, &dev); -+ if (ret >= 0 && dev) { -+ int nr = ret; -+ ret = -EBUSY; -+ if (dev->open_count.counter > 0) -+ break; -+ idr_remove(&v4l2loopback_index_idr, nr); -+ v4l2_loopback_remove(dev); -+ ret = 0; -+ }; -+ break; -+ /* get information for a loopback device. -+ * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends -+ */ -+ case V4L2LOOPBACK_CTL_QUERY: -+ if (!parm) -+ break; -+ if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < -+ 0) -+ break; -+ capture_nr = output_nr = conf.output_nr; -+#ifdef SPLIT_DEVICES -+ capture_nr = conf.capture_nr; -+#endif -+ device_nr = (output_nr < 0) ? capture_nr : output_nr; -+ MARK(); -+ /* get the device from either capture_nr or output_nr (whatever is valid) */ -+ if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) -+ break; -+ MARK(); -+ /* if we got the device from output_nr and there is a valid capture_nr, -+ * make sure that both refer to the same device (or bail out) -+ */ -+ if ((device_nr != capture_nr) && (capture_nr >= 0) && -+ ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0)) -+ break; -+ MARK(); -+ /* if otoh, we got the device from capture_nr and there is a valid output_nr, -+ * make sure that both refer to the same device (or bail out) -+ */ -+ if ((device_nr != output_nr) && (output_nr >= 0) && -+ ((ret = v4l2loopback_lookup(output_nr, 0)) < 0)) -+ break; -+ MARK(); -+ -+ /* v4l2_loopback_config identified a single device, so fetch the data */ -+ snprintf(conf.card_label, sizeof(conf.card_label), "%s", -+ dev->card_label); -+ MARK(); -+ conf.output_nr = dev->vdev->num; -+#ifdef SPLIT_DEVICES -+ conf.capture_nr = dev->vdev->num; -+#endif -+ conf.min_width = dev->min_width; -+ conf.min_height = dev->min_height; -+ conf.max_width = dev->max_width; -+ conf.max_height = dev->max_height; -+ conf.announce_all_caps = dev->announce_all_caps; -+ conf.max_buffers = dev->buffers_number; -+ conf.max_openers = dev->max_openers; -+ conf.debug = debug; -+ MARK(); -+ if (copy_to_user((void *)parm, &conf, sizeof(conf))) { -+ ret = -EFAULT; -+ break; -+ } -+ MARK(); -+ ret = 0; -+ ; -+ break; -+ } -+ -+ MARK(); -+ mutex_unlock(&v4l2loopback_ctl_mutex); -+ MARK(); -+ return ret; -+} -+ -+/* LINUX KERNEL */ -+ -+static const struct file_operations v4l2loopback_ctl_fops = { -+ // clang-format off -+ .owner = THIS_MODULE, -+ .open = nonseekable_open, -+ .unlocked_ioctl = v4l2loopback_control_ioctl, -+ .compat_ioctl = v4l2loopback_control_ioctl, -+ .llseek = noop_llseek, -+ // clang-format on -+}; -+ -+static struct miscdevice v4l2loopback_misc = { -+ // clang-format off -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "v4l2loopback", -+ .fops = &v4l2loopback_ctl_fops, -+ // clang-format on -+}; -+ -+static const struct v4l2_file_operations v4l2_loopback_fops = { -+ // clang-format off -+ .owner = THIS_MODULE, -+ .open = v4l2_loopback_open, -+ .release = v4l2_loopback_close, -+ .read = v4l2_loopback_read, -+ .write = v4l2_loopback_write, -+ .poll = v4l2_loopback_poll, -+ .mmap = v4l2_loopback_mmap, -+ .unlocked_ioctl = video_ioctl2, -+ // clang-format on -+}; -+ -+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { -+ // clang-format off -+ .vidioc_querycap = &vidioc_querycap, -+ .vidioc_enum_framesizes = &vidioc_enum_framesizes, -+ .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, -+ -+ .vidioc_enum_output = &vidioc_enum_output, -+ .vidioc_g_output = &vidioc_g_output, -+ .vidioc_s_output = &vidioc_s_output, -+ -+ .vidioc_enum_input = &vidioc_enum_input, -+ .vidioc_g_input = &vidioc_g_input, -+ .vidioc_s_input = &vidioc_s_input, -+ -+ .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, -+ .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, -+ .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, -+ .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, -+ -+ .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, -+ .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, -+ .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, -+ .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, -+ -+#ifdef V4L2L_OVERLAY -+ .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, -+ .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, -+#endif -+ -+#ifdef V4L2LOOPBACK_WITH_STD -+ .vidioc_s_std = &vidioc_s_std, -+ .vidioc_g_std = &vidioc_g_std, -+ .vidioc_querystd = &vidioc_querystd, -+#endif /* V4L2LOOPBACK_WITH_STD */ -+ -+ .vidioc_g_parm = &vidioc_g_parm, -+ .vidioc_s_parm = &vidioc_s_parm, -+ -+ .vidioc_reqbufs = &vidioc_reqbufs, -+ .vidioc_querybuf = &vidioc_querybuf, -+ .vidioc_qbuf = &vidioc_qbuf, -+ .vidioc_dqbuf = &vidioc_dqbuf, -+ -+ .vidioc_streamon = &vidioc_streamon, -+ .vidioc_streamoff = &vidioc_streamoff, -+ -+#ifdef CONFIG_VIDEO_V4L1_COMPAT -+ .vidiocgmbuf = &vidiocgmbuf, -+#endif -+ -+ .vidioc_subscribe_event = &vidioc_subscribe_event, -+ .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, -+ // clang-format on -+}; -+ -+static int free_device_cb(int id, void *ptr, void *data) -+{ -+ struct v4l2_loopback_device *dev = ptr; -+ v4l2_loopback_remove(dev); -+ return 0; -+} -+static void free_devices(void) -+{ -+ idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); -+ idr_destroy(&v4l2loopback_index_idr); -+} -+ -+static int __init v4l2loopback_init_module(void) -+{ -+ const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; -+ const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; -+ int err; -+ int i; -+ MARK(); -+ -+ err = misc_register(&v4l2loopback_misc); -+ if (err < 0) -+ return err; -+ -+ if (devices < 0) { -+ devices = 1; -+ -+ /* try guessing the devices from the "video_nr" parameter */ -+ for (i = MAX_DEVICES - 1; i >= 0; i--) { -+ if (video_nr[i] >= 0) { -+ devices = i + 1; -+ break; -+ } -+ } -+ } -+ -+ if (devices > MAX_DEVICES) { -+ devices = MAX_DEVICES; -+ printk(KERN_INFO -+ "v4l2loopback: number of initial devices is limited to: %d\n", -+ MAX_DEVICES); -+ } -+ -+ if (max_buffers > MAX_BUFFERS) { -+ max_buffers = MAX_BUFFERS; -+ printk(KERN_INFO -+ "v4l2loopback: number of buffers is limited to: %d\n", -+ MAX_BUFFERS); -+ } -+ -+ if (max_openers < 0) { -+ printk(KERN_INFO -+ "v4l2loopback: allowing %d openers rather than %d\n", -+ 2, max_openers); -+ max_openers = 2; -+ } -+ -+ if (max_width < min_width) { -+ max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; -+ printk(KERN_INFO "v4l2loopback: using max_width %d\n", -+ max_width); -+ } -+ if (max_height < min_height) { -+ max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; -+ printk(KERN_INFO "v4l2loopback: using max_height %d\n", -+ max_height); -+ } -+ -+ for (i = 0; i < devices; i++) { -+ struct v4l2_loopback_config cfg = { -+ // clang-format off -+ .output_nr = video_nr[i], -+#ifdef SPLIT_DEVICES -+ .capture_nr = video_nr[i], -+#endif -+ .min_width = min_width, -+ .min_height = min_height, -+ .max_width = max_width, -+ .max_height = max_height, -+ .announce_all_caps = (!exclusive_caps[i]), -+ .max_buffers = max_buffers, -+ .max_openers = max_openers, -+ .debug = debug, -+ // clang-format on -+ }; -+ cfg.card_label[0] = 0; -+ if (card_label[i]) -+ snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", -+ card_label[i]); -+ err = v4l2_loopback_add(&cfg, 0); -+ if (err) { -+ free_devices(); -+ goto error; -+ } -+ } -+ -+ dprintk("module installed\n"); -+ -+ printk(KERN_INFO "v4l2loopback driver version %d.%d.%d%s loaded\n", -+ // clang-format off -+ (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, -+ (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, -+ (V4L2LOOPBACK_VERSION_CODE ) & 0xff, -+#ifdef SNAPSHOT_VERSION -+ " (" __stringify(SNAPSHOT_VERSION) ")" -+#else -+ "" -+#endif -+ ); -+ // clang-format on -+ -+ return 0; -+error: -+ misc_deregister(&v4l2loopback_misc); -+ return err; -+} -+ -+static void v4l2loopback_cleanup_module(void) -+{ -+ MARK(); -+ /* unregister the device -> it deletes /dev/video* */ -+ free_devices(); -+ /* and get rid of /dev/v4l2loopback */ -+ misc_deregister(&v4l2loopback_misc); -+ dprintk("module removed\n"); -+} -+ -+MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); -+ -+module_init(v4l2loopback_init_module); -+module_exit(v4l2loopback_cleanup_module); -diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h -new file mode 100644 -index 000000000000..1bc7e6b747a4 ---- /dev/null -+++ b/drivers/media/v4l2-core/v4l2loopback.h -@@ -0,0 +1,98 @@ -+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -+/* -+ * v4l2loopback.h -+ * -+ * Written by IOhannes m zmölnig, 7/1/20. -+ * -+ * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is -+ * permitted under the GNU General Public License. -+ */ -+#ifndef _V4L2LOOPBACK_H -+#define _V4L2LOOPBACK_H -+ -+#define V4L2LOOPBACK_VERSION_MAJOR 0 -+#define V4L2LOOPBACK_VERSION_MINOR 13 -+#define V4L2LOOPBACK_VERSION_BUGFIX 1 -+ -+/* /dev/v4l2loopback interface */ -+ -+struct v4l2_loopback_config { -+ /** -+ * the device-number (/dev/video) -+ * V4L2LOOPBACK_CTL_ADD: -+ * setting this to a value<0, will allocate an available one -+ * if nr>=0 and the device already exists, the ioctl will EEXIST -+ * if output_nr and capture_nr are the same, only a single device will be created -+ * NOTE: currently split-devices (where output_nr and capture_nr differ) -+ * are not implemented yet. -+ * until then, requesting different device-IDs will result in EINVAL. -+ * -+ * V4L2LOOPBACK_CTL_QUERY: -+ * either both output_nr and capture_nr must refer to the same loopback, -+ * or one (and only one) of them must be -1 -+ * -+ */ -+ int output_nr; -+ int unused; /*capture_nr;*/ -+ -+ /** -+ * a nice name for your device -+ * if (*card_label)==0, an automatic name is assigned -+ */ -+ char card_label[32]; -+ -+ /** -+ * allowed frame size -+ * if too low, default values are used -+ */ -+ unsigned int min_width; -+ unsigned int max_width; -+ unsigned int min_height; -+ unsigned int max_height; -+ -+ /** -+ * number of buffers to allocate for the queue -+ * if set to <=0, default values are used -+ */ -+ int max_buffers; -+ -+ /** -+ * how many consumers are allowed to open this device concurrently -+ * if set to <=0, default values are used -+ */ -+ int max_openers; -+ -+ /** -+ * set the debugging level for this device -+ */ -+ int debug; -+ -+ /** -+ * whether to announce OUTPUT/CAPTURE capabilities exclusively -+ * for this device or not -+ * (!exclusive_caps) -+ * NOTE: this is going to be removed once separate output/capture -+ * devices are implemented -+ */ -+ int announce_all_caps; -+}; -+ -+/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the -+ * to-be-created device set. -+ * if the ptr is NULL, a new device is created with default values at the driver's discretion. -+ * -+ * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, -+ * to get more information on the device) -+ */ -+#define V4L2LOOPBACK_CTL_ADD 0x4C80 -+ -+/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set -+ * (the two values must either refer to video-devices associated with the same loopback device -+ * or exactly one of them must be <0 -+ */ -+#define V4L2LOOPBACK_CTL_QUERY 0x4C82 -+ -+/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ -+#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 -+ -+#endif /* _V4L2LOOPBACK_H */ -diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h -new file mode 100644 -index 000000000000..d855a3796554 ---- /dev/null -+++ b/drivers/media/v4l2-core/v4l2loopback_formats.h -@@ -0,0 +1,445 @@ -+static const struct v4l2l_format formats[] = { -+#ifndef V4L2_PIX_FMT_VP9 -+#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') -+#endif -+#ifndef V4L2_PIX_FMT_HEVC -+#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') -+#endif -+ -+ /* here come the packed formats */ -+ { -+ .name = "32 bpp RGB, le", -+ .fourcc = V4L2_PIX_FMT_BGR32, -+ .depth = 32, -+ .flags = 0, -+ }, -+ { -+ .name = "32 bpp RGB, be", -+ .fourcc = V4L2_PIX_FMT_RGB32, -+ .depth = 32, -+ .flags = 0, -+ }, -+ { -+ .name = "24 bpp RGB, le", -+ .fourcc = V4L2_PIX_FMT_BGR24, -+ .depth = 24, -+ .flags = 0, -+ }, -+ { -+ .name = "24 bpp RGB, be", -+ .fourcc = V4L2_PIX_FMT_RGB24, -+ .depth = 24, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_ABGR32 -+ { -+ .name = "32 bpp RGBA, le", -+ .fourcc = V4L2_PIX_FMT_ABGR32, -+ .depth = 32, -+ .flags = 0, -+ }, -+#endif -+#ifdef V4L2_PIX_FMT_RGBA32 -+ { -+ .name = "32 bpp RGBA", -+ .fourcc = V4L2_PIX_FMT_RGBA32, -+ .depth = 32, -+ .flags = 0, -+ }, -+#endif -+#ifdef V4L2_PIX_FMT_RGB332 -+ { -+ .name = "8 bpp RGB-3-3-2", -+ .fourcc = V4L2_PIX_FMT_RGB332, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB332 */ -+#ifdef V4L2_PIX_FMT_RGB444 -+ { -+ .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", -+ .fourcc = V4L2_PIX_FMT_RGB444, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB444 */ -+#ifdef V4L2_PIX_FMT_RGB555 -+ { -+ .name = "16 bpp RGB-5-5-5", -+ .fourcc = V4L2_PIX_FMT_RGB555, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB555 */ -+#ifdef V4L2_PIX_FMT_RGB565 -+ { -+ .name = "16 bpp RGB-5-6-5", -+ .fourcc = V4L2_PIX_FMT_RGB565, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB565 */ -+#ifdef V4L2_PIX_FMT_RGB555X -+ { -+ .name = "16 bpp RGB-5-5-5 BE", -+ .fourcc = V4L2_PIX_FMT_RGB555X, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB555X */ -+#ifdef V4L2_PIX_FMT_RGB565X -+ { -+ .name = "16 bpp RGB-5-6-5 BE", -+ .fourcc = V4L2_PIX_FMT_RGB565X, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_RGB565X */ -+#ifdef V4L2_PIX_FMT_BGR666 -+ { -+ .name = "18 bpp BGR-6-6-6", -+ .fourcc = V4L2_PIX_FMT_BGR666, -+ .depth = 18, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_BGR666 */ -+ { -+ .name = "4:2:2, packed, YUYV", -+ .fourcc = V4L2_PIX_FMT_YUYV, -+ .depth = 16, -+ .flags = 0, -+ }, -+ { -+ .name = "4:2:2, packed, UYVY", -+ .fourcc = V4L2_PIX_FMT_UYVY, -+ .depth = 16, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_YVYU -+ { -+ .name = "4:2:2, packed YVYU", -+ .fourcc = V4L2_PIX_FMT_YVYU, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif -+#ifdef V4L2_PIX_FMT_VYUY -+ { -+ .name = "4:2:2, packed VYUY", -+ .fourcc = V4L2_PIX_FMT_VYUY, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif -+ { -+ .name = "4:2:2, packed YYUV", -+ .fourcc = V4L2_PIX_FMT_YYUV, -+ .depth = 16, -+ .flags = 0, -+ }, -+ { -+ .name = "YUV-8-8-8-8", -+ .fourcc = V4L2_PIX_FMT_YUV32, -+ .depth = 32, -+ .flags = 0, -+ }, -+ { -+ .name = "8 bpp, Greyscale", -+ .fourcc = V4L2_PIX_FMT_GREY, -+ .depth = 8, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_Y4 -+ { -+ .name = "4 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y4, -+ .depth = 4, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y4 */ -+#ifdef V4L2_PIX_FMT_Y6 -+ { -+ .name = "6 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y6, -+ .depth = 6, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y6 */ -+#ifdef V4L2_PIX_FMT_Y10 -+ { -+ .name = "10 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y10, -+ .depth = 10, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y10 */ -+#ifdef V4L2_PIX_FMT_Y12 -+ { -+ .name = "12 bpp Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y12, -+ .depth = 12, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_Y12 */ -+ { -+ .name = "16 bpp, Greyscale", -+ .fourcc = V4L2_PIX_FMT_Y16, -+ .depth = 16, -+ .flags = 0, -+ }, -+#ifdef V4L2_PIX_FMT_YUV444 -+ { -+ .name = "16 bpp xxxxyyyy uuuuvvvv", -+ .fourcc = V4L2_PIX_FMT_YUV444, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_YUV444 */ -+#ifdef V4L2_PIX_FMT_YUV555 -+ { -+ .name = "16 bpp YUV-5-5-5", -+ .fourcc = V4L2_PIX_FMT_YUV555, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_YUV555 */ -+#ifdef V4L2_PIX_FMT_YUV565 -+ { -+ .name = "16 bpp YUV-5-6-5", -+ .fourcc = V4L2_PIX_FMT_YUV565, -+ .depth = 16, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_YUV565 */ -+ -+/* bayer formats */ -+#ifdef V4L2_PIX_FMT_SRGGB8 -+ { -+ .name = "Bayer RGGB 8bit", -+ .fourcc = V4L2_PIX_FMT_SRGGB8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SRGGB8 */ -+#ifdef V4L2_PIX_FMT_SGRBG8 -+ { -+ .name = "Bayer GRBG 8bit", -+ .fourcc = V4L2_PIX_FMT_SGRBG8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SGRBG8 */ -+#ifdef V4L2_PIX_FMT_SGBRG8 -+ { -+ .name = "Bayer GBRG 8bit", -+ .fourcc = V4L2_PIX_FMT_SGBRG8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SGBRG8 */ -+#ifdef V4L2_PIX_FMT_SBGGR8 -+ { -+ .name = "Bayer BA81 8bit", -+ .fourcc = V4L2_PIX_FMT_SBGGR8, -+ .depth = 8, -+ .flags = 0, -+ }, -+#endif /* V4L2_PIX_FMT_SBGGR8 */ -+ -+ /* here come the planar formats */ -+ { -+ .name = "4:1:0, planar, Y-Cr-Cb", -+ .fourcc = V4L2_PIX_FMT_YVU410, -+ .depth = 9, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+ { -+ .name = "4:2:0, planar, Y-Cr-Cb", -+ .fourcc = V4L2_PIX_FMT_YVU420, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+ { -+ .name = "4:1:0, planar, Y-Cb-Cr", -+ .fourcc = V4L2_PIX_FMT_YUV410, -+ .depth = 9, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+ { -+ .name = "4:2:0, planar, Y-Cb-Cr", -+ .fourcc = V4L2_PIX_FMT_YUV420, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#ifdef V4L2_PIX_FMT_YUV422P -+ { -+ .name = "16 bpp YVU422 planar", -+ .fourcc = V4L2_PIX_FMT_YUV422P, -+ .depth = 16, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_YUV422P */ -+#ifdef V4L2_PIX_FMT_YUV411P -+ { -+ .name = "16 bpp YVU411 planar", -+ .fourcc = V4L2_PIX_FMT_YUV411P, -+ .depth = 16, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_YUV411P */ -+#ifdef V4L2_PIX_FMT_Y41P -+ { -+ .name = "12 bpp YUV 4:1:1", -+ .fourcc = V4L2_PIX_FMT_Y41P, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_Y41P */ -+#ifdef V4L2_PIX_FMT_NV12 -+ { -+ .name = "12 bpp Y/CbCr 4:2:0 ", -+ .fourcc = V4L2_PIX_FMT_NV12, -+ .depth = 12, -+ .flags = FORMAT_FLAGS_PLANAR, -+ }, -+#endif /* V4L2_PIX_FMT_NV12 */ -+ -+/* here come the compressed formats */ -+ -+#ifdef V4L2_PIX_FMT_MJPEG -+ { -+ .name = "Motion-JPEG", -+ .fourcc = V4L2_PIX_FMT_MJPEG, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MJPEG */ -+#ifdef V4L2_PIX_FMT_JPEG -+ { -+ .name = "JFIF JPEG", -+ .fourcc = V4L2_PIX_FMT_JPEG, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_JPEG */ -+#ifdef V4L2_PIX_FMT_DV -+ { -+ .name = "DV1394", -+ .fourcc = V4L2_PIX_FMT_DV, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_DV */ -+#ifdef V4L2_PIX_FMT_MPEG -+ { -+ .name = "MPEG-1/2/4 Multiplexed", -+ .fourcc = V4L2_PIX_FMT_MPEG, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG */ -+#ifdef V4L2_PIX_FMT_H264 -+ { -+ .name = "H264 with start codes", -+ .fourcc = V4L2_PIX_FMT_H264, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H264 */ -+#ifdef V4L2_PIX_FMT_H264_NO_SC -+ { -+ .name = "H264 without start codes", -+ .fourcc = V4L2_PIX_FMT_H264_NO_SC, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H264_NO_SC */ -+#ifdef V4L2_PIX_FMT_H264_MVC -+ { -+ .name = "H264 MVC", -+ .fourcc = V4L2_PIX_FMT_H264_MVC, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H264_MVC */ -+#ifdef V4L2_PIX_FMT_H263 -+ { -+ .name = "H263", -+ .fourcc = V4L2_PIX_FMT_H263, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_H263 */ -+#ifdef V4L2_PIX_FMT_MPEG1 -+ { -+ .name = "MPEG-1 ES", -+ .fourcc = V4L2_PIX_FMT_MPEG1, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG1 */ -+#ifdef V4L2_PIX_FMT_MPEG2 -+ { -+ .name = "MPEG-2 ES", -+ .fourcc = V4L2_PIX_FMT_MPEG2, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG2 */ -+#ifdef V4L2_PIX_FMT_MPEG4 -+ { -+ .name = "MPEG-4 part 2 ES", -+ .fourcc = V4L2_PIX_FMT_MPEG4, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_MPEG4 */ -+#ifdef V4L2_PIX_FMT_XVID -+ { -+ .name = "Xvid", -+ .fourcc = V4L2_PIX_FMT_XVID, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_XVID */ -+#ifdef V4L2_PIX_FMT_VC1_ANNEX_G -+ { -+ .name = "SMPTE 421M Annex G compliant stream", -+ .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ -+#ifdef V4L2_PIX_FMT_VC1_ANNEX_L -+ { -+ .name = "SMPTE 421M Annex L compliant stream", -+ .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ -+#ifdef V4L2_PIX_FMT_VP8 -+ { -+ .name = "VP8", -+ .fourcc = V4L2_PIX_FMT_VP8, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VP8 */ -+#ifdef V4L2_PIX_FMT_VP9 -+ { -+ .name = "VP9", -+ .fourcc = V4L2_PIX_FMT_VP9, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_VP9 */ -+#ifdef V4L2_PIX_FMT_HEVC -+ { -+ .name = "HEVC", -+ .fourcc = V4L2_PIX_FMT_HEVC, -+ .depth = 32, -+ .flags = FORMAT_FLAGS_COMPRESSED, -+ }, -+#endif /* V4L2_PIX_FMT_HEVC */ -+}; -diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile -index f2b19e6174af..4fef4b174321 100644 ---- a/drivers/pci/controller/Makefile -+++ b/drivers/pci/controller/Makefile -@@ -1,4 +1,10 @@ - # SPDX-License-Identifier: GPL-2.0 -+ifdef CONFIG_X86_64 -+ifdef CONFIG_SATA_AHCI -+obj-y += intel-nvme-remap.o -+endif -+endif -+ - obj-$(CONFIG_PCIE_CADENCE) += cadence/ - obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o - obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o -diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c -new file mode 100644 -index 000000000000..e105e6f5cc91 ---- /dev/null -+++ b/drivers/pci/controller/intel-nvme-remap.c -@@ -0,0 +1,462 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Intel remapped NVMe device support. -+ * -+ * Copyright (c) 2019 Endless Mobile, Inc. -+ * Author: Daniel Drake -+ * -+ * Some products ship by default with the SATA controller in "RAID" or -+ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this -+ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe -+ * devices disappear from the PCI bus, and instead their I/O memory becomes -+ * available within the AHCI device BARs. -+ * -+ * This scheme is understood to be a way of avoiding usage of the standard -+ * Windows NVMe driver under that OS, instead mandating usage of Intel's -+ * driver instead, which has better power management, and presumably offers -+ * some RAID/disk-caching solutions too. -+ * -+ * Here in this driver, we support the remapped NVMe mode by claiming the -+ * AHCI device and creating a fake PCIe root port. On the new bus, the -+ * original AHCI device is exposed with only minor tweaks. Then, fake PCI -+ * devices corresponding to the remapped NVMe devices are created. The usual -+ * ahci and nvme drivers are then expected to bind to these devices and -+ * operate as normal. -+ * -+ * The PCI configuration space for the NVMe devices is completely -+ * unavailable, so we fake a minimal one and hope for the best. -+ * -+ * Interrupts are shared between the AHCI and NVMe devices. For simplicity, -+ * we only support the legacy interrupt here, although MSI support -+ * could potentially be added later. -+ */ -+ -+#define MODULE_NAME "intel-nvme-remap" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#define AHCI_PCI_BAR_STANDARD 5 -+ -+struct nvme_remap_dev { -+ struct pci_dev *dev; /* AHCI device */ -+ struct pci_bus *bus; /* our fake PCI bus */ -+ struct pci_sysdata sysdata; -+ int irq_base; /* our fake interrupts */ -+ -+ /* -+ * When we detect an all-ones write to a BAR register, this flag -+ * is set, so that we return the BAR size on the next read (a -+ * standard PCI behaviour). -+ * This includes the assumption that an all-ones BAR write is -+ * immediately followed by a read of the same register. -+ */ -+ bool bar_sizing; -+ -+ /* -+ * Resources copied from the AHCI device, to be regarded as -+ * resources on our fake bus. -+ */ -+ struct resource ahci_resources[PCI_NUM_RESOURCES]; -+ -+ /* Resources corresponding to the NVMe devices. */ -+ struct resource remapped_dev_mem[AHCI_MAX_REMAP]; -+ -+ /* Number of remapped NVMe devices found. */ -+ int num_remapped_devices; -+}; -+ -+static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus) -+{ -+ return container_of(bus->sysdata, struct nvme_remap_dev, sysdata); -+} -+ -+ -+/******** PCI configuration space **********/ -+ -+/* -+ * Helper macros for tweaking returned contents of PCI configuration space. -+ * -+ * value contains len bytes of data read from reg. -+ * If fixup_reg is included in that range, fix up the contents of that -+ * register to fixed_value. -+ */ -+#define NR_FIX8(fixup_reg, fixed_value) do { \ -+ if (reg <= fixup_reg && fixup_reg < reg + len) \ -+ ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \ -+ } while (0) -+ -+#define NR_FIX16(fixup_reg, fixed_value) do { \ -+ NR_FIX8(fixup_reg, fixed_value); \ -+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ -+ } while (0) -+ -+#define NR_FIX24(fixup_reg, fixed_value) do { \ -+ NR_FIX8(fixup_reg, fixed_value); \ -+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ -+ NR_FIX8(fixup_reg + 2, fixed_value >> 16); \ -+ } while (0) -+ -+#define NR_FIX32(fixup_reg, fixed_value) do { \ -+ NR_FIX16(fixup_reg, (u16) fixed_value); \ -+ NR_FIX16(fixup_reg + 2, fixed_value >> 16); \ -+ } while (0) -+ -+/* -+ * Read PCI config space of the slot 0 (AHCI) device. -+ * We pass through the read request to the underlying device, but -+ * tweak the results in some cases. -+ */ -+static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg, -+ int len, u32 *value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus; -+ int ret; -+ -+ ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn, -+ reg, len, value); -+ if (ret) -+ return ret; -+ -+ /* -+ * Adjust the device class, to prevent this driver from attempting to -+ * additionally probe the device we're simulating here. -+ */ -+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI); -+ -+ /* -+ * Unset interrupt pin, otherwise ACPI tries to find routing -+ * info for our virtual IRQ, fails, and complains. -+ */ -+ NR_FIX8(PCI_INTERRUPT_PIN, 0); -+ -+ /* -+ * Truncate the AHCI BAR to not include the region that covers the -+ * hidden devices. This will cause the ahci driver to successfully -+ * probe th new device (instead of handing it over to this driver). -+ */ -+ if (nrdev->bar_sizing) { -+ NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1)); -+ nrdev->bar_sizing = false; -+ } -+ -+ return PCIBIOS_SUCCESSFUL; -+} -+ -+/* -+ * Read PCI config space of a remapped device. -+ * Since the original PCI config space is inaccessible, we provide a minimal, -+ * fake config space instead. -+ */ -+static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port, -+ int reg, int len, u32 *value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ struct resource *remapped_mem; -+ -+ if (port > nrdev->num_remapped_devices) -+ return PCIBIOS_DEVICE_NOT_FOUND; -+ -+ *value = 0; -+ remapped_mem = &nrdev->remapped_dev_mem[port - 1]; -+ -+ /* Set a Vendor ID, otherwise Linux assumes no device is present */ -+ NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL); -+ -+ /* Always appear on & bus mastering */ -+ NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); -+ -+ /* Set class so that nvme driver probes us */ -+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS); -+ -+ if (nrdev->bar_sizing) { -+ NR_FIX32(PCI_BASE_ADDRESS_0, -+ ~(resource_size(remapped_mem) - 1)); -+ nrdev->bar_sizing = false; -+ } else { -+ resource_size_t mem_start = remapped_mem->start; -+ -+ mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64; -+ NR_FIX32(PCI_BASE_ADDRESS_0, mem_start); -+ mem_start >>= 32; -+ NR_FIX32(PCI_BASE_ADDRESS_1, mem_start); -+ } -+ -+ return PCIBIOS_SUCCESSFUL; -+} -+ -+/* Read PCI configuration space. */ -+static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn, -+ int reg, int len, u32 *value) -+{ -+ if (PCI_SLOT(devfn) == 0) -+ return nvme_remap_pci_read_slot0(bus, reg, len, value); -+ else -+ return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn), -+ reg, len, value); -+} -+ -+/* -+ * Write PCI config space of the slot 0 (AHCI) device. -+ * Apart from the special case of BAR sizing, we disable all writes. -+ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master) -+ * that would affect the operation of the NVMe devices. -+ */ -+static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg, -+ int len, u32 value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus; -+ -+ if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) { -+ /* -+ * Writing all-ones to a BAR means that the size of the -+ * memory region is being checked. Flag this so that we can -+ * reply with an appropriate size on the next read. -+ */ -+ if (value == ~0) -+ nrdev->bar_sizing = true; -+ -+ return ahci_dev_bus->ops->write(ahci_dev_bus, -+ nrdev->dev->devfn, -+ reg, len, value); -+ } -+ -+ return PCIBIOS_SET_FAILED; -+} -+ -+/* -+ * Write PCI config space of a remapped device. -+ * Since the original PCI config space is inaccessible, we reject all -+ * writes, except for the special case of BAR probing. -+ */ -+static int nvme_remap_pci_write_remapped(struct pci_bus *bus, -+ unsigned int port, -+ int reg, int len, u32 value) -+{ -+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); -+ -+ if (port > nrdev->num_remapped_devices) -+ return PCIBIOS_DEVICE_NOT_FOUND; -+ -+ /* -+ * Writing all-ones to a BAR means that the size of the memory -+ * region is being checked. Flag this so that we can reply with -+ * an appropriate size on the next read. -+ */ -+ if (value == ~0 && reg >= PCI_BASE_ADDRESS_0 -+ && reg <= PCI_BASE_ADDRESS_5) { -+ nrdev->bar_sizing = true; -+ return PCIBIOS_SUCCESSFUL; -+ } -+ -+ return PCIBIOS_SET_FAILED; -+} -+ -+/* Write PCI configuration space. */ -+static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn, -+ int reg, int len, u32 value) -+{ -+ if (PCI_SLOT(devfn) == 0) -+ return nvme_remap_pci_write_slot0(bus, reg, len, value); -+ else -+ return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn), -+ reg, len, value); -+} -+ -+static struct pci_ops nvme_remap_pci_ops = { -+ .read = nvme_remap_pci_read, -+ .write = nvme_remap_pci_write, -+}; -+ -+ -+/******** Initialization & exit **********/ -+ -+/* -+ * Find a PCI domain ID to use for our fake bus. -+ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits). -+ */ -+static int find_free_domain(void) -+{ -+ int domain = 0xffff; -+ struct pci_bus *bus = NULL; -+ -+ while ((bus = pci_find_next_bus(bus)) != NULL) -+ domain = max_t(int, domain, pci_domain_nr(bus)); -+ -+ return domain + 1; -+} -+ -+static int find_remapped_devices(struct nvme_remap_dev *nrdev, -+ struct list_head *resources) -+{ -+ void __iomem *mmio; -+ int i, count = 0; -+ u32 cap; -+ -+ mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD, -+ pci_resource_len(nrdev->dev, -+ AHCI_PCI_BAR_STANDARD)); -+ if (!mmio) -+ return -ENODEV; -+ -+ /* Check if this device might have remapped nvme devices. */ -+ if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K || -+ !(readl(mmio + AHCI_VSCAP) & 1)) -+ return -ENODEV; -+ -+ cap = readq(mmio + AHCI_REMAP_CAP); -+ for (i = AHCI_MAX_REMAP-1; i >= 0; i--) { -+ struct resource *remapped_mem; -+ -+ if ((cap & (1 << i)) == 0) -+ continue; -+ if (readl(mmio + ahci_remap_dcc(i)) -+ != PCI_CLASS_STORAGE_EXPRESS) -+ continue; -+ -+ /* We've found a remapped device */ -+ remapped_mem = &nrdev->remapped_dev_mem[count++]; -+ remapped_mem->start = -+ pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD) -+ + ahci_remap_base(i); -+ remapped_mem->end = remapped_mem->start -+ + AHCI_REMAP_N_SIZE - 1; -+ remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; -+ pci_add_resource(resources, remapped_mem); -+ } -+ -+ pcim_iounmap(nrdev->dev, mmio); -+ -+ if (count == 0) -+ return -ENODEV; -+ -+ nrdev->num_remapped_devices = count; -+ dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n", -+ nrdev->num_remapped_devices); -+ return 0; -+} -+ -+static void nvme_remap_remove_root_bus(void *data) -+{ -+ struct pci_bus *bus = data; -+ -+ pci_stop_root_bus(bus); -+ pci_remove_root_bus(bus); -+} -+ -+static int nvme_remap_probe(struct pci_dev *dev, -+ const struct pci_device_id *id) -+{ -+ struct nvme_remap_dev *nrdev; -+ LIST_HEAD(resources); -+ int i; -+ int ret; -+ struct pci_dev *child; -+ -+ nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL); -+ nrdev->sysdata.domain = find_free_domain(); -+ nrdev->sysdata.nvme_remap_dev = dev; -+ nrdev->dev = dev; -+ pci_set_drvdata(dev, nrdev); -+ -+ ret = pcim_enable_device(dev); -+ if (ret < 0) -+ return ret; -+ -+ pci_set_master(dev); -+ -+ ret = find_remapped_devices(nrdev, &resources); -+ if (ret) -+ return ret; -+ -+ /* Add resources from the original AHCI device */ -+ for (i = 0; i < PCI_NUM_RESOURCES; i++) { -+ struct resource *res = &dev->resource[i]; -+ -+ if (res->start) { -+ struct resource *nr_res = &nrdev->ahci_resources[i]; -+ -+ nr_res->start = res->start; -+ nr_res->end = res->end; -+ nr_res->flags = res->flags; -+ pci_add_resource(&resources, nr_res); -+ } -+ } -+ -+ /* Create virtual interrupts */ -+ nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0, -+ nrdev->num_remapped_devices + 1, -+ 0); -+ if (nrdev->irq_base < 0) -+ return nrdev->irq_base; -+ -+ /* Create and populate PCI bus */ -+ nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops, -+ &nrdev->sysdata, &resources); -+ if (!nrdev->bus) -+ return -ENODEV; -+ -+ if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus, -+ nrdev->bus)) -+ return -ENOMEM; -+ -+ /* We don't support sharing MSI interrupts between these devices */ -+ nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; -+ -+ pci_scan_child_bus(nrdev->bus); -+ -+ list_for_each_entry(child, &nrdev->bus->devices, bus_list) { -+ /* -+ * Prevent PCI core from trying to move memory BARs around. -+ * The hidden NVMe devices are at fixed locations. -+ */ -+ for (i = 0; i < PCI_NUM_RESOURCES; i++) { -+ struct resource *res = &child->resource[i]; -+ -+ if (res->flags & IORESOURCE_MEM) -+ res->flags |= IORESOURCE_PCI_FIXED; -+ } -+ -+ /* Share the legacy IRQ between all devices */ -+ child->irq = dev->irq; -+ } -+ -+ pci_assign_unassigned_bus_resources(nrdev->bus); -+ pci_bus_add_devices(nrdev->bus); -+ -+ return 0; -+} -+ -+static const struct pci_device_id nvme_remap_ids[] = { -+ /* -+ * Match all Intel RAID controllers. -+ * -+ * There's overlap here with the set of devices detected by the ahci -+ * driver, but ahci will only successfully probe when there -+ * *aren't* any remapped NVMe devices, and this driver will only -+ * successfully probe when there *are* remapped NVMe devices that -+ * need handling. -+ */ -+ { -+ PCI_VDEVICE(INTEL, PCI_ANY_ID), -+ .class = PCI_CLASS_STORAGE_RAID << 8, -+ .class_mask = 0xffffff00, -+ }, -+ {0,} -+}; -+MODULE_DEVICE_TABLE(pci, nvme_remap_ids); -+ -+static struct pci_driver nvme_remap_drv = { -+ .name = MODULE_NAME, -+ .id_table = nvme_remap_ids, -+ .probe = nvme_remap_probe, -+}; -+module_pci_driver(nvme_remap_drv); -+ -+MODULE_AUTHOR("Daniel Drake "); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index 568410e64ce6..192d0557fb05 100644 ---- a/drivers/pci/quirks.c -+++ b/drivers/pci/quirks.c -@@ -3732,6 +3732,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) - dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; - } - -+static bool acs_on_downstream; -+static bool acs_on_multifunction; -+ -+#define NUM_ACS_IDS 16 -+struct acs_on_id { -+ unsigned short vendor; -+ unsigned short device; -+}; -+static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; -+static u8 max_acs_id; -+ -+static __init int pcie_acs_override_setup(char *p) -+{ -+ if (!p) -+ return -EINVAL; -+ -+ while (*p) { -+ if (!strncmp(p, "downstream", 10)) -+ acs_on_downstream = true; -+ if (!strncmp(p, "multifunction", 13)) -+ acs_on_multifunction = true; -+ if (!strncmp(p, "id:", 3)) { -+ char opt[5]; -+ int ret; -+ long val; -+ -+ if (max_acs_id >= NUM_ACS_IDS - 1) { -+ pr_warn("Out of PCIe ACS override slots (%d)\n", -+ NUM_ACS_IDS); -+ goto next; -+ } -+ -+ p += 3; -+ snprintf(opt, 5, "%s", p); -+ ret = kstrtol(opt, 16, &val); -+ if (ret) { -+ pr_warn("PCIe ACS ID parse error %d\n", ret); -+ goto next; -+ } -+ acs_on_ids[max_acs_id].vendor = val; -+ -+ p += strcspn(p, ":"); -+ if (*p != ':') { -+ pr_warn("PCIe ACS invalid ID\n"); -+ goto next; -+ } -+ -+ p++; -+ snprintf(opt, 5, "%s", p); -+ ret = kstrtol(opt, 16, &val); -+ if (ret) { -+ pr_warn("PCIe ACS ID parse error %d\n", ret); -+ goto next; -+ } -+ acs_on_ids[max_acs_id].device = val; -+ max_acs_id++; -+ } -+next: -+ p += strcspn(p, ","); -+ if (*p == ',') -+ p++; -+ } -+ -+ if (acs_on_downstream || acs_on_multifunction || max_acs_id) -+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); -+ -+ return 0; -+} -+early_param("pcie_acs_override", pcie_acs_override_setup); -+ -+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) -+{ -+ int i; -+ -+ /* Never override ACS for legacy devices or devices with ACS caps */ -+ if (!pci_is_pcie(dev) || -+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) -+ return -ENOTTY; -+ -+ for (i = 0; i < max_acs_id; i++) -+ if (acs_on_ids[i].vendor == dev->vendor && -+ acs_on_ids[i].device == dev->device) -+ return 1; -+ -+ switch (pci_pcie_type(dev)) { -+ case PCI_EXP_TYPE_DOWNSTREAM: -+ case PCI_EXP_TYPE_ROOT_PORT: -+ if (acs_on_downstream) -+ return 1; -+ break; -+ case PCI_EXP_TYPE_ENDPOINT: -+ case PCI_EXP_TYPE_UPSTREAM: -+ case PCI_EXP_TYPE_LEG_END: -+ case PCI_EXP_TYPE_RC_END: -+ if (acs_on_multifunction && dev->multifunction) -+ return 1; -+ } -+ -+ return -ENOTTY; -+} - /* - * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be - * prevented for those affected devices. -@@ -5143,6 +5243,7 @@ static const struct pci_dev_acs_enabled { - { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, - /* Wangxun nics */ - { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, -+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, - { 0 } - }; - -diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h -index a0a026d2d244..8bece21a8998 100644 ---- a/include/linux/pagemap.h -+++ b/include/linux/pagemap.h -@@ -1281,7 +1281,7 @@ struct readahead_control { - ._index = i, \ - } - --#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) -+#define VM_READAHEAD_PAGES (SZ_8M / PAGE_SIZE) - - void page_cache_ra_unbounded(struct readahead_control *, - unsigned long nr_to_read, unsigned long lookahead_count); -diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h -index 6030a8235617..60b7fe5fa74a 100644 ---- a/include/linux/user_namespace.h -+++ b/include/linux/user_namespace.h -@@ -156,6 +156,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, - - #ifdef CONFIG_USER_NS - -+extern int unprivileged_userns_clone; -+ - static inline struct user_namespace *get_user_ns(struct user_namespace *ns) - { - if (ns) -@@ -189,6 +191,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); - struct ns_common *ns_get_owner(struct ns_common *ns); - #else - -+#define unprivileged_userns_clone 0 -+ - static inline struct user_namespace *get_user_ns(struct user_namespace *ns) - { - return &init_user_ns; -diff --git a/init/Kconfig b/init/Kconfig -index febdea2afc3b..3ba6142f2f42 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -132,6 +132,10 @@ config THREAD_INFO_IN_TASK - - menu "General setup" - -+config CACHY -+ bool "Some kernel tweaks by CachyOS" -+ default y -+ - config BROKEN - bool - -@@ -1251,6 +1255,22 @@ config USER_NS - - If unsure, say N. - -+config USER_NS_UNPRIVILEGED -+ bool "Allow unprivileged users to create namespaces" -+ default y -+ depends on USER_NS -+ help -+ When disabled, unprivileged users will not be able to create -+ new namespaces. Allowing users to create their own namespaces -+ has been part of several recent local privilege escalation -+ exploits, so if you need user namespaces but are -+ paranoid^Wsecurity-conscious you want to disable this. -+ -+ This setting can be overridden at runtime via the -+ kernel.unprivileged_userns_clone sysctl. -+ -+ If unsure, say Y. -+ - config PID_NS - bool "PID Namespaces" - default y -@@ -1393,6 +1413,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE - with the "-O2" compiler flag for best performance and most - helpful compile-time warnings. - -+config CC_OPTIMIZE_FOR_PERFORMANCE_O3 -+ bool "Optimize more for performance (-O3)" -+ help -+ Choosing this option will pass "-O3" to your compiler to optimize -+ the kernel yet more for performance. -+ - config CC_OPTIMIZE_FOR_SIZE - bool "Optimize for size (-Os)" - help -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index 38ef6d06888e..0f78364efd4f 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -40,6 +40,27 @@ choice - on SMP and NUMA systems and exactly dividing by both PAL and - NTSC frame rates for video and multimedia work. - -+ config HZ_500 -+ bool "500 HZ" -+ help -+ 500 Hz is a balanced timer frequency. Provides fast interactivity -+ on desktops with good smoothness without increasing CPU power -+ consumption and sacrificing the battery life on laptops. -+ -+ config HZ_600 -+ bool "600 HZ" -+ help -+ 600 Hz is a balanced timer frequency. Provides fast interactivity -+ on desktops with good smoothness without increasing CPU power -+ consumption and sacrificing the battery life on laptops. -+ -+ config HZ_750 -+ bool "750 HZ" -+ help -+ 750 Hz is a balanced timer frequency. Provides fast interactivity -+ on desktops with good smoothness without increasing CPU power -+ consumption and sacrificing the battery life on laptops. -+ - config HZ_1000 - bool "1000 HZ" - help -@@ -53,6 +74,9 @@ config HZ - default 100 if HZ_100 - default 250 if HZ_250 - default 300 if HZ_300 -+ default 500 if HZ_500 -+ default 600 if HZ_600 -+ default 750 if HZ_750 - default 1000 if HZ_1000 - - config SCHED_HRTICK -diff --git a/kernel/fork.c b/kernel/fork.c -index 99076dbe27d8..18750b83c564 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -104,6 +104,10 @@ - #include - #include - -+#ifdef CONFIG_USER_NS -+#include -+#endif -+ - #include - #include - #include -@@ -2154,6 +2158,10 @@ __latent_entropy struct task_struct *copy_process( - if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) - return ERR_PTR(-EINVAL); - -+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) -+ if (!capable(CAP_SYS_ADMIN)) -+ return ERR_PTR(-EPERM); -+ - /* - * Thread groups must share signals as well, and detached threads - * can only be started up within the thread group. -@@ -3301,6 +3309,12 @@ int ksys_unshare(unsigned long unshare_flags) - if (unshare_flags & CLONE_NEWNS) - unshare_flags |= CLONE_FS; - -+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { -+ err = -EPERM; -+ if (!capable(CAP_SYS_ADMIN)) -+ goto bad_unshare_out; -+ } -+ - err = check_unshare_flags(unshare_flags); - if (err) - goto bad_unshare_out; -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 24dda708b699..c2bb8eb1d6ba 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; - * - * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) - */ -+#ifdef CONFIG_CACHY -+unsigned int sysctl_sched_base_slice = 350000ULL; -+static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; -+#else - unsigned int sysctl_sched_base_slice = 750000ULL; - static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; -+#endif - -+#ifdef CONFIG_CACHY -+const_debug unsigned int sysctl_sched_migration_cost = 300000UL; -+#else - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; -+#endif - - static int __init setup_sched_thermal_decay_shift(char *str) - { -@@ -121,8 +130,12 @@ int __weak arch_asym_cpu_priority(int cpu) - * - * (default: 5 msec, units: microseconds) - */ -+#ifdef CONFIG_CACHY -+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; -+#else - static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; - #endif -+#endif - - #ifdef CONFIG_NUMA_BALANCING - /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index ef20c61004eb..10c1caff5e06 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2544,7 +2544,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); - - extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); - --#ifdef CONFIG_PREEMPT_RT -+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY) - #define SCHED_NR_MIGRATE_BREAK 8 - #else - #define SCHED_NR_MIGRATE_BREAK 32 -diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index e0b917328cf9..e70ae9c11dea 100644 ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -80,6 +80,9 @@ - #ifdef CONFIG_RT_MUTEXES - #include - #endif -+#ifdef CONFIG_USER_NS -+#include -+#endif - - /* shared constants to be used in various sysctls */ - const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; -@@ -1623,6 +1626,15 @@ static struct ctl_table kern_table[] = { - .mode = 0644, - .proc_handler = proc_dointvec, - }, -+#ifdef CONFIG_USER_NS -+ { -+ .procname = "unprivileged_userns_clone", -+ .data = &unprivileged_userns_clone, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+#endif - #ifdef CONFIG_PROC_SYSCTL - { - .procname = "tainted", -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c -index 0b0b95418b16..c4b835b91fc0 100644 ---- a/kernel/user_namespace.c -+++ b/kernel/user_namespace.c -@@ -22,6 +22,13 @@ - #include - #include - -+/* sysctl */ -+#ifdef CONFIG_USER_NS_UNPRIVILEGED -+int unprivileged_userns_clone = 1; -+#else -+int unprivileged_userns_clone; -+#endif -+ - static struct kmem_cache *user_ns_cachep __ro_after_init; - static DEFINE_MUTEX(userns_state_mutex); - -diff --git a/mm/Kconfig b/mm/Kconfig -index b4cb45255a54..8635b3b24739 100644 ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -613,7 +613,7 @@ config COMPACTION - config COMPACT_UNEVICTABLE_DEFAULT - int - depends on COMPACTION -- default 0 if PREEMPT_RT -+ default 0 if PREEMPT_RT || CACHY - default 1 - - # -diff --git a/mm/compaction.c b/mm/compaction.c -index 739b1bf3d637..3a4269c02fb2 100644 ---- a/mm/compaction.c -+++ b/mm/compaction.c -@@ -1950,7 +1950,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE - * aggressively the kernel should compact memory in the - * background. It takes values in the range [0, 100]. - */ -+#ifdef CONFIG_CACHY -+static unsigned int __read_mostly sysctl_compaction_proactiveness; -+#else - static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; -+#endif - static int sysctl_extfrag_threshold = 500; - static int __read_mostly sysctl_compact_memory; - -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 2120f7478e55..765ea6197e1e 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -63,7 +63,11 @@ unsigned long transparent_hugepage_flags __read_mostly = - #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE - (1<> (20 - PAGE_SHIFT); - - /* Use a smaller cluster for small-memory machines */ -@@ -1122,4 +1126,5 @@ void __init swap_setup(void) - * Right now other parts of the system means that we - * _really_ don't want to cluster much more - */ -+#endif - } -diff --git a/mm/vmpressure.c b/mm/vmpressure.c -index bd5183dfd879..3a410f53a07c 100644 ---- a/mm/vmpressure.c -+++ b/mm/vmpressure.c -@@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; - * essence, they are percents: the higher the value, the more number - * unsuccessful reclaims there were. - */ -+#ifdef CONFIG_CACHY -+static const unsigned int vmpressure_level_med = 65; -+#else - static const unsigned int vmpressure_level_med = 60; -+#endif - static const unsigned int vmpressure_level_critical = 95; - - /* -diff --git a/mm/vmscan.c b/mm/vmscan.c -index 2e34de9cd0d4..be9e40acc93b 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -191,7 +191,11 @@ struct scan_control { - /* - * From 0 .. 200. Higher means more swappy. - */ -+#ifdef CONFIG_CACHY -+int vm_swappiness = 20; -+#else - int vm_swappiness = 60; -+#endif - - #ifdef CONFIG_MEMCG - -@@ -3949,7 +3953,11 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc - } - - /* to protect the working set of the last N jiffies */ -+#ifdef CONFIG_CACHY -+static unsigned long lru_gen_min_ttl __read_mostly = 1000; -+#else - static unsigned long lru_gen_min_ttl __read_mostly; -+#endif - - static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) - { --- -2.46.0.rc1 - -From e91af07ae5c96cff206bbbe52c16edb871050bc9 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:24:26 +0200 -Subject: [PATCH 05/11] crypto - -Signed-off-by: Peter Jung ---- - arch/x86/crypto/Kconfig | 1 + - arch/x86/crypto/Makefile | 8 +- - arch/x86/crypto/aes-gcm-aesni-x86_64.S | 1128 +++++++++ - arch/x86/crypto/aes-gcm-avx10-x86_64.S | 1222 ++++++++++ - arch/x86/crypto/aesni-intel_asm.S | 1503 +----------- - arch/x86/crypto/aesni-intel_avx-x86_64.S | 2804 ---------------------- - arch/x86/crypto/aesni-intel_glue.c | 1269 ++++++---- - 7 files changed, 3125 insertions(+), 4810 deletions(-) - create mode 100644 arch/x86/crypto/aes-gcm-aesni-x86_64.S - create mode 100644 arch/x86/crypto/aes-gcm-avx10-x86_64.S - delete mode 100644 arch/x86/crypto/aesni-intel_avx-x86_64.S - -diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig -index c9e59589a1ce..24875e6295f2 100644 ---- a/arch/x86/crypto/Kconfig -+++ b/arch/x86/crypto/Kconfig -@@ -18,6 +18,7 @@ config CRYPTO_AES_NI_INTEL - depends on X86 - select CRYPTO_AEAD - select CRYPTO_LIB_AES -+ select CRYPTO_LIB_GF128MUL - select CRYPTO_ALGAPI - select CRYPTO_SKCIPHER - select CRYPTO_SIMD -diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile -index 9c5ce5613738..53b4a277809e 100644 ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -48,8 +48,12 @@ chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o - - obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o - aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o --aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o \ -- aes_ctrby8_avx-x86_64.o aes-xts-avx-x86_64.o -+aesni-intel-$(CONFIG_64BIT) += aes_ctrby8_avx-x86_64.o \ -+ aes-gcm-aesni-x86_64.o \ -+ aes-xts-avx-x86_64.o -+ifeq ($(CONFIG_AS_VAES)$(CONFIG_AS_VPCLMULQDQ),yy) -+aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o -+endif - - obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o - sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o -diff --git a/arch/x86/crypto/aes-gcm-aesni-x86_64.S b/arch/x86/crypto/aes-gcm-aesni-x86_64.S -new file mode 100644 -index 000000000000..45940e2883a0 ---- /dev/null -+++ b/arch/x86/crypto/aes-gcm-aesni-x86_64.S -@@ -0,0 +1,1128 @@ -+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ -+// -+// AES-NI optimized AES-GCM for x86_64 -+// -+// Copyright 2024 Google LLC -+// -+// Author: Eric Biggers -+// -+//------------------------------------------------------------------------------ -+// -+// This file is dual-licensed, meaning that you can use it under your choice of -+// either of the following two licenses: -+// -+// Licensed under the Apache License 2.0 (the "License"). You may obtain a copy -+// of the License at -+// -+// http://www.apache.org/licenses/LICENSE-2.0 -+// -+// Unless required by applicable law or agreed to in writing, software -+// distributed under the License is distributed on an "AS IS" BASIS, -+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+// See the License for the specific language governing permissions and -+// limitations under the License. -+// -+// or -+// -+// Redistribution and use in source and binary forms, with or without -+// modification, are permitted provided that the following conditions are met: -+// -+// 1. Redistributions of source code must retain the above copyright notice, -+// this list of conditions and the following disclaimer. -+// -+// 2. Redistributions in binary form must reproduce the above copyright -+// notice, this list of conditions and the following disclaimer in the -+// documentation and/or other materials provided with the distribution. -+// -+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+// POSSIBILITY OF SUCH DAMAGE. -+// -+//------------------------------------------------------------------------------ -+// -+// This file implements AES-GCM (Galois/Counter Mode) for x86_64 CPUs that -+// support the original set of AES instructions, i.e. AES-NI. Two -+// implementations are provided, one that uses AVX and one that doesn't. They -+// are very similar, being generated by the same macros. The only difference is -+// that the AVX implementation takes advantage of VEX-coded instructions in some -+// places to avoid some 'movdqu' and 'movdqa' instructions. The AVX -+// implementation does *not* use 256-bit vectors, as AES is not supported on -+// 256-bit vectors until the VAES feature (which this file doesn't target). -+// -+// The specific CPU feature prerequisites are AES-NI and PCLMULQDQ, plus SSE4.1 -+// for the *_aesni functions or AVX for the *_aesni_avx ones. (But it seems -+// there are no CPUs that support AES-NI without also PCLMULQDQ and SSE4.1.) -+// -+// The design generally follows that of aes-gcm-avx10-x86_64.S, and that file is -+// more thoroughly commented. This file has the following notable changes: -+// -+// - The vector length is fixed at 128-bit, i.e. xmm registers. This means -+// there is only one AES block (and GHASH block) per register. -+// -+// - Without AVX512 / AVX10, only 16 SIMD registers are available instead of -+// 32. We work around this by being much more careful about using -+// registers, relying heavily on loads to load values as they are needed. -+// -+// - Masking is not available either. We work around this by implementing -+// partial block loads and stores using overlapping scalar loads and stores -+// combined with shifts and SSE4.1 insertion and extraction instructions. -+// -+// - The main loop is organized differently due to the different design -+// constraints. First, with just one AES block per SIMD register, on some -+// CPUs 4 registers don't saturate the 'aesenc' throughput. We therefore -+// do an 8-register wide loop. Considering that and the fact that we have -+// just 16 SIMD registers to work with, it's not feasible to cache AES -+// round keys and GHASH key powers in registers across loop iterations. -+// That's not ideal, but also not actually that bad, since loads can run in -+// parallel with other instructions. Significantly, this also makes it -+// possible to roll up the inner loops, relying on hardware loop unrolling -+// instead of software loop unrolling, greatly reducing code size. -+// -+// - We implement the GHASH multiplications in the main loop using Karatsuba -+// multiplication instead of schoolbook multiplication. This saves one -+// pclmulqdq instruction per block, at the cost of one 64-bit load, one -+// pshufd, and 0.25 pxors per block. (This is without the three-argument -+// XOR support that would be provided by AVX512 / AVX10, which would be -+// more beneficial to schoolbook than Karatsuba.) -+// -+// As a rough approximation, we can assume that Karatsuba multiplication is -+// faster than schoolbook multiplication in this context if one pshufd and -+// 0.25 pxors are cheaper than a pclmulqdq. (We assume that the 64-bit -+// load is "free" due to running in parallel with arithmetic instructions.) -+// This is true on AMD CPUs, including all that support pclmulqdq up to at -+// least Zen 3. It's also true on older Intel CPUs: Westmere through -+// Haswell on the Core side, and Silvermont through Goldmont Plus on the -+// low-power side. On some of these CPUs, pclmulqdq is quite slow, and the -+// benefit of Karatsuba should be substantial. On newer Intel CPUs, -+// schoolbook multiplication should be faster, but only marginally. -+// -+// Not all these CPUs were available to be tested. However, benchmarks on -+// available CPUs suggest that this approximation is plausible. Switching -+// to Karatsuba showed negligible change (< 1%) on Intel Broadwell, -+// Skylake, and Cascade Lake, but it improved AMD Zen 1-3 by 6-7%. -+// Considering that and the fact that Karatsuba should be even more -+// beneficial on older Intel CPUs, it seems like the right choice here. -+// -+// An additional 0.25 pclmulqdq per block (2 per 8 blocks) could be -+// saved by using a multiplication-less reduction method. We don't do that -+// because it would require a large number of shift and xor instructions, -+// making it less worthwhile and likely harmful on newer CPUs. -+// -+// It does make sense to sometimes use a different reduction optimization -+// that saves a pclmulqdq, though: precompute the hash key times x^64, and -+// multiply the low half of the data block by the hash key with the extra -+// factor of x^64. This eliminates one step of the reduction. However, -+// this is incompatible with Karatsuba multiplication. Therefore, for -+// multi-block processing we use Karatsuba multiplication with a regular -+// reduction. For single-block processing, we use the x^64 optimization. -+ -+#include -+ -+.section .rodata -+.p2align 4 -+.Lbswap_mask: -+ .octa 0x000102030405060708090a0b0c0d0e0f -+.Lgfpoly: -+ .quad 0xc200000000000000 -+.Lone: -+ .quad 1 -+.Lgfpoly_and_internal_carrybit: -+ .octa 0xc2000000000000010000000000000001 -+ // Loading 16 bytes from '.Lzeropad_mask + 16 - len' produces a mask of -+ // 'len' 0xff bytes and the rest zeroes. -+.Lzeropad_mask: -+ .octa 0xffffffffffffffffffffffffffffffff -+ .octa 0 -+ -+// Offsets in struct aes_gcm_key_aesni -+#define OFFSETOF_AESKEYLEN 480 -+#define OFFSETOF_H_POWERS 496 -+#define OFFSETOF_H_POWERS_XORED 624 -+#define OFFSETOF_H_TIMES_X64 688 -+ -+.text -+ -+// Do a vpclmulqdq, or fall back to a movdqa and a pclmulqdq. The fallback -+// assumes that all operands are distinct and that any mem operand is aligned. -+.macro _vpclmulqdq imm, src1, src2, dst -+.if USE_AVX -+ vpclmulqdq \imm, \src1, \src2, \dst -+.else -+ movdqa \src2, \dst -+ pclmulqdq \imm, \src1, \dst -+.endif -+.endm -+ -+// Do a vpshufb, or fall back to a movdqa and a pshufb. The fallback assumes -+// that all operands are distinct and that any mem operand is aligned. -+.macro _vpshufb src1, src2, dst -+.if USE_AVX -+ vpshufb \src1, \src2, \dst -+.else -+ movdqa \src2, \dst -+ pshufb \src1, \dst -+.endif -+.endm -+ -+// Do a vpand, or fall back to a movdqu and a pand. The fallback assumes that -+// all operands are distinct. -+.macro _vpand src1, src2, dst -+.if USE_AVX -+ vpand \src1, \src2, \dst -+.else -+ movdqu \src1, \dst -+ pand \src2, \dst -+.endif -+.endm -+ -+// XOR the unaligned memory operand \mem into the xmm register \reg. \tmp must -+// be a temporary xmm register. -+.macro _xor_mem_to_reg mem, reg, tmp -+.if USE_AVX -+ vpxor \mem, \reg, \reg -+.else -+ movdqu \mem, \tmp -+ pxor \tmp, \reg -+.endif -+.endm -+ -+// Test the unaligned memory operand \mem against the xmm register \reg. \tmp -+// must be a temporary xmm register. -+.macro _test_mem mem, reg, tmp -+.if USE_AVX -+ vptest \mem, \reg -+.else -+ movdqu \mem, \tmp -+ ptest \tmp, \reg -+.endif -+.endm -+ -+// Load 1 <= %ecx <= 15 bytes from the pointer \src into the xmm register \dst -+// and zeroize any remaining bytes. Clobbers %rax, %rcx, and \tmp{64,32}. -+.macro _load_partial_block src, dst, tmp64, tmp32 -+ sub $8, %ecx // LEN - 8 -+ jle .Lle8\@ -+ -+ // Load 9 <= LEN <= 15 bytes. -+ movq (\src), \dst // Load first 8 bytes -+ mov (\src, %rcx), %rax // Load last 8 bytes -+ neg %ecx -+ shl $3, %ecx -+ shr %cl, %rax // Discard overlapping bytes -+ pinsrq $1, %rax, \dst -+ jmp .Ldone\@ -+ -+.Lle8\@: -+ add $4, %ecx // LEN - 4 -+ jl .Llt4\@ -+ -+ // Load 4 <= LEN <= 8 bytes. -+ mov (\src), %eax // Load first 4 bytes -+ mov (\src, %rcx), \tmp32 // Load last 4 bytes -+ jmp .Lcombine\@ -+ -+.Llt4\@: -+ // Load 1 <= LEN <= 3 bytes. -+ add $2, %ecx // LEN - 2 -+ movzbl (\src), %eax // Load first byte -+ jl .Lmovq\@ -+ movzwl (\src, %rcx), \tmp32 // Load last 2 bytes -+.Lcombine\@: -+ shl $3, %ecx -+ shl %cl, \tmp64 -+ or \tmp64, %rax // Combine the two parts -+.Lmovq\@: -+ movq %rax, \dst -+.Ldone\@: -+.endm -+ -+// Store 1 <= %ecx <= 15 bytes from the xmm register \src to the pointer \dst. -+// Clobbers %rax, %rcx, and %rsi. -+.macro _store_partial_block src, dst -+ sub $8, %ecx // LEN - 8 -+ jl .Llt8\@ -+ -+ // Store 8 <= LEN <= 15 bytes. -+ pextrq $1, \src, %rax -+ mov %ecx, %esi -+ shl $3, %ecx -+ ror %cl, %rax -+ mov %rax, (\dst, %rsi) // Store last LEN - 8 bytes -+ movq \src, (\dst) // Store first 8 bytes -+ jmp .Ldone\@ -+ -+.Llt8\@: -+ add $4, %ecx // LEN - 4 -+ jl .Llt4\@ -+ -+ // Store 4 <= LEN <= 7 bytes. -+ pextrd $1, \src, %eax -+ mov %ecx, %esi -+ shl $3, %ecx -+ ror %cl, %eax -+ mov %eax, (\dst, %rsi) // Store last LEN - 4 bytes -+ movd \src, (\dst) // Store first 4 bytes -+ jmp .Ldone\@ -+ -+.Llt4\@: -+ // Store 1 <= LEN <= 3 bytes. -+ pextrb $0, \src, 0(\dst) -+ cmp $-2, %ecx // LEN - 4 == -2, i.e. LEN == 2? -+ jl .Ldone\@ -+ pextrb $1, \src, 1(\dst) -+ je .Ldone\@ -+ pextrb $2, \src, 2(\dst) -+.Ldone\@: -+.endm -+ -+// Do one step of GHASH-multiplying \a by \b and storing the reduced product in -+// \b. To complete all steps, this must be invoked with \i=0 through \i=9. -+// \a_times_x64 must contain \a * x^64 in reduced form, \gfpoly must contain the -+// .Lgfpoly constant, and \t0-\t1 must be temporary registers. -+.macro _ghash_mul_step i, a, a_times_x64, b, gfpoly, t0, t1 -+ -+ // MI = (a_L * b_H) + ((a*x^64)_L * b_L) -+.if \i == 0 -+ _vpclmulqdq $0x01, \a, \b, \t0 -+.elseif \i == 1 -+ _vpclmulqdq $0x00, \a_times_x64, \b, \t1 -+.elseif \i == 2 -+ pxor \t1, \t0 -+ -+ // HI = (a_H * b_H) + ((a*x^64)_H * b_L) -+.elseif \i == 3 -+ _vpclmulqdq $0x11, \a, \b, \t1 -+.elseif \i == 4 -+ pclmulqdq $0x10, \a_times_x64, \b -+.elseif \i == 5 -+ pxor \t1, \b -+.elseif \i == 6 -+ -+ // Fold MI into HI. -+ pshufd $0x4e, \t0, \t1 // Swap halves of MI -+.elseif \i == 7 -+ pclmulqdq $0x00, \gfpoly, \t0 // MI_L*(x^63 + x^62 + x^57) -+.elseif \i == 8 -+ pxor \t1, \b -+.elseif \i == 9 -+ pxor \t0, \b -+.endif -+.endm -+ -+// GHASH-multiply \a by \b and store the reduced product in \b. -+// See _ghash_mul_step for details. -+.macro _ghash_mul a, a_times_x64, b, gfpoly, t0, t1 -+.irp i, 0,1,2,3,4,5,6,7,8,9 -+ _ghash_mul_step \i, \a, \a_times_x64, \b, \gfpoly, \t0, \t1 -+.endr -+.endm -+ -+// GHASH-multiply \a by \b and add the unreduced product to \lo, \mi, and \hi. -+// This does Karatsuba multiplication and must be paired with _ghash_reduce. On -+// the first call, \lo, \mi, and \hi must be zero. \a_xored must contain the -+// two halves of \a XOR'd together, i.e. a_L + a_H. \b is clobbered. -+.macro _ghash_mul_noreduce a, a_xored, b, lo, mi, hi, t0 -+ -+ // LO += a_L * b_L -+ _vpclmulqdq $0x00, \a, \b, \t0 -+ pxor \t0, \lo -+ -+ // b_L + b_H -+ pshufd $0x4e, \b, \t0 -+ pxor \b, \t0 -+ -+ // HI += a_H * b_H -+ pclmulqdq $0x11, \a, \b -+ pxor \b, \hi -+ -+ // MI += (a_L + a_H) * (b_L + b_H) -+ pclmulqdq $0x00, \a_xored, \t0 -+ pxor \t0, \mi -+.endm -+ -+// Reduce the product from \lo, \mi, and \hi, and store the result in \dst. -+// This assumes that _ghash_mul_noreduce was used. -+.macro _ghash_reduce lo, mi, hi, dst, t0 -+ -+ movq .Lgfpoly(%rip), \t0 -+ -+ // MI += LO + HI (needed because we used Karatsuba multiplication) -+ pxor \lo, \mi -+ pxor \hi, \mi -+ -+ // Fold LO into MI. -+ pshufd $0x4e, \lo, \dst -+ pclmulqdq $0x00, \t0, \lo -+ pxor \dst, \mi -+ pxor \lo, \mi -+ -+ // Fold MI into HI. -+ pshufd $0x4e, \mi, \dst -+ pclmulqdq $0x00, \t0, \mi -+ pxor \hi, \dst -+ pxor \mi, \dst -+.endm -+ -+// Do the first step of the GHASH update of a set of 8 ciphertext blocks. -+// -+// The whole GHASH update does: -+// -+// GHASH_ACC = (blk0+GHASH_ACC)*H^8 + blk1*H^7 + blk2*H^6 + blk3*H^5 + -+// blk4*H^4 + blk5*H^3 + blk6*H^2 + blk7*H^1 -+// -+// This macro just does the first step: it does the unreduced multiplication -+// (blk0+GHASH_ACC)*H^8 and starts gathering the unreduced product in the xmm -+// registers LO, MI, and GHASH_ACC a.k.a. HI. It also zero-initializes the -+// inner block counter in %rax, which is a value that counts up by 8 for each -+// block in the set of 8 and is used later to index by 8*blknum and 16*blknum. -+// -+// To reduce the number of pclmulqdq instructions required, both this macro and -+// _ghash_update_continue_8x use Karatsuba multiplication instead of schoolbook -+// multiplication. See the file comment for more details about this choice. -+// -+// Both macros expect the ciphertext blocks blk[0-7] to be available at DST if -+// encrypting, or SRC if decrypting. They also expect the precomputed hash key -+// powers H^i and their XOR'd-together halves to be available in the struct -+// pointed to by KEY. Both macros clobber TMP[0-2]. -+.macro _ghash_update_begin_8x enc -+ -+ // Initialize the inner block counter. -+ xor %eax, %eax -+ -+ // Load the highest hash key power, H^8. -+ movdqa OFFSETOF_H_POWERS(KEY), TMP0 -+ -+ // Load the first ciphertext block and byte-reflect it. -+.if \enc -+ movdqu (DST), TMP1 -+.else -+ movdqu (SRC), TMP1 -+.endif -+ pshufb BSWAP_MASK, TMP1 -+ -+ // Add the GHASH accumulator to the ciphertext block to get the block -+ // 'b' that needs to be multiplied with the hash key power 'a'. -+ pxor TMP1, GHASH_ACC -+ -+ // b_L + b_H -+ pshufd $0x4e, GHASH_ACC, MI -+ pxor GHASH_ACC, MI -+ -+ // LO = a_L * b_L -+ _vpclmulqdq $0x00, TMP0, GHASH_ACC, LO -+ -+ // HI = a_H * b_H -+ pclmulqdq $0x11, TMP0, GHASH_ACC -+ -+ // MI = (a_L + a_H) * (b_L + b_H) -+ pclmulqdq $0x00, OFFSETOF_H_POWERS_XORED(KEY), MI -+.endm -+ -+// Continue the GHASH update of 8 ciphertext blocks as described above by doing -+// an unreduced multiplication of the next ciphertext block by the next lowest -+// key power and accumulating the result into LO, MI, and GHASH_ACC a.k.a. HI. -+.macro _ghash_update_continue_8x enc -+ add $8, %eax -+ -+ // Load the next lowest key power. -+ movdqa OFFSETOF_H_POWERS(KEY,%rax,2), TMP0 -+ -+ // Load the next ciphertext block and byte-reflect it. -+.if \enc -+ movdqu (DST,%rax,2), TMP1 -+.else -+ movdqu (SRC,%rax,2), TMP1 -+.endif -+ pshufb BSWAP_MASK, TMP1 -+ -+ // LO += a_L * b_L -+ _vpclmulqdq $0x00, TMP0, TMP1, TMP2 -+ pxor TMP2, LO -+ -+ // b_L + b_H -+ pshufd $0x4e, TMP1, TMP2 -+ pxor TMP1, TMP2 -+ -+ // HI += a_H * b_H -+ pclmulqdq $0x11, TMP0, TMP1 -+ pxor TMP1, GHASH_ACC -+ -+ // MI += (a_L + a_H) * (b_L + b_H) -+ movq OFFSETOF_H_POWERS_XORED(KEY,%rax), TMP1 -+ pclmulqdq $0x00, TMP1, TMP2 -+ pxor TMP2, MI -+.endm -+ -+// Reduce LO, MI, and GHASH_ACC a.k.a. HI into GHASH_ACC. This is similar to -+// _ghash_reduce, but it's hardcoded to use the registers of the main loop and -+// it uses the same register for HI and the destination. It's also divided into -+// two steps. TMP1 must be preserved across steps. -+// -+// One pshufd could be saved by shuffling MI and XOR'ing LO into it, instead of -+// shuffling LO, XOR'ing LO into MI, and shuffling MI. However, this would -+// increase the critical path length, and it seems to slightly hurt performance. -+.macro _ghash_update_end_8x_step i -+.if \i == 0 -+ movq .Lgfpoly(%rip), TMP1 -+ pxor LO, MI -+ pxor GHASH_ACC, MI -+ pshufd $0x4e, LO, TMP2 -+ pclmulqdq $0x00, TMP1, LO -+ pxor TMP2, MI -+ pxor LO, MI -+.elseif \i == 1 -+ pshufd $0x4e, MI, TMP2 -+ pclmulqdq $0x00, TMP1, MI -+ pxor TMP2, GHASH_ACC -+ pxor MI, GHASH_ACC -+.endif -+.endm -+ -+// void aes_gcm_precompute_##suffix(struct aes_gcm_key_aesni *key); -+// -+// Given the expanded AES key, derive the GHASH subkey and initialize the GHASH -+// related fields in the key struct. -+.macro _aes_gcm_precompute -+ -+ // Function arguments -+ .set KEY, %rdi -+ -+ // Additional local variables. -+ // %xmm0-%xmm1 and %rax are used as temporaries. -+ .set RNDKEYLAST_PTR, %rsi -+ .set H_CUR, %xmm2 -+ .set H_POW1, %xmm3 // H^1 -+ .set H_POW1_X64, %xmm4 // H^1 * x^64 -+ .set GFPOLY, %xmm5 -+ -+ // Encrypt an all-zeroes block to get the raw hash subkey. -+ movl OFFSETOF_AESKEYLEN(KEY), %eax -+ lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR -+ movdqa (KEY), H_POW1 // Zero-th round key XOR all-zeroes block -+ lea 16(KEY), %rax -+1: -+ aesenc (%rax), H_POW1 -+ add $16, %rax -+ cmp %rax, RNDKEYLAST_PTR -+ jne 1b -+ aesenclast (RNDKEYLAST_PTR), H_POW1 -+ -+ // Preprocess the raw hash subkey as needed to operate on GHASH's -+ // bit-reflected values directly: reflect its bytes, then multiply it by -+ // x^-1 (using the backwards interpretation of polynomial coefficients -+ // from the GCM spec) or equivalently x^1 (using the alternative, -+ // natural interpretation of polynomial coefficients). -+ pshufb .Lbswap_mask(%rip), H_POW1 -+ movdqa H_POW1, %xmm0 -+ pshufd $0xd3, %xmm0, %xmm0 -+ psrad $31, %xmm0 -+ paddq H_POW1, H_POW1 -+ pand .Lgfpoly_and_internal_carrybit(%rip), %xmm0 -+ pxor %xmm0, H_POW1 -+ -+ // Store H^1. -+ movdqa H_POW1, OFFSETOF_H_POWERS+7*16(KEY) -+ -+ // Compute and store H^1 * x^64. -+ movq .Lgfpoly(%rip), GFPOLY -+ pshufd $0x4e, H_POW1, %xmm0 -+ _vpclmulqdq $0x00, H_POW1, GFPOLY, H_POW1_X64 -+ pxor %xmm0, H_POW1_X64 -+ movdqa H_POW1_X64, OFFSETOF_H_TIMES_X64(KEY) -+ -+ // Compute and store the halves of H^1 XOR'd together. -+ pxor H_POW1, %xmm0 -+ movq %xmm0, OFFSETOF_H_POWERS_XORED+7*8(KEY) -+ -+ // Compute and store the remaining key powers H^2 through H^8. -+ movdqa H_POW1, H_CUR -+ mov $6*8, %eax -+.Lprecompute_next\@: -+ // Compute H^i = H^{i-1} * H^1. -+ _ghash_mul H_POW1, H_POW1_X64, H_CUR, GFPOLY, %xmm0, %xmm1 -+ // Store H^i. -+ movdqa H_CUR, OFFSETOF_H_POWERS(KEY,%rax,2) -+ // Compute and store the halves of H^i XOR'd together. -+ pshufd $0x4e, H_CUR, %xmm0 -+ pxor H_CUR, %xmm0 -+ movq %xmm0, OFFSETOF_H_POWERS_XORED(KEY,%rax) -+ sub $8, %eax -+ jge .Lprecompute_next\@ -+ -+ RET -+.endm -+ -+// void aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key, -+// u8 ghash_acc[16], const u8 *aad, int aadlen); -+// -+// This function processes the AAD (Additional Authenticated Data) in GCM. -+// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the -+// data given by |aad| and |aadlen|. On the first call, |ghash_acc| must be all -+// zeroes. |aadlen| must be a multiple of 16, except on the last call where it -+// can be any length. The caller must do any buffering needed to ensure this. -+.macro _aes_gcm_aad_update -+ -+ // Function arguments -+ .set KEY, %rdi -+ .set GHASH_ACC_PTR, %rsi -+ .set AAD, %rdx -+ .set AADLEN, %ecx -+ // Note: _load_partial_block relies on AADLEN being in %ecx. -+ -+ // Additional local variables. -+ // %rax, %r10, and %xmm0-%xmm1 are used as temporary registers. -+ .set BSWAP_MASK, %xmm2 -+ .set GHASH_ACC, %xmm3 -+ .set H_POW1, %xmm4 // H^1 -+ .set H_POW1_X64, %xmm5 // H^1 * x^64 -+ .set GFPOLY, %xmm6 -+ -+ movdqa .Lbswap_mask(%rip), BSWAP_MASK -+ movdqu (GHASH_ACC_PTR), GHASH_ACC -+ movdqa OFFSETOF_H_POWERS+7*16(KEY), H_POW1 -+ movdqa OFFSETOF_H_TIMES_X64(KEY), H_POW1_X64 -+ movq .Lgfpoly(%rip), GFPOLY -+ -+ // Process the AAD one full block at a time. -+ sub $16, AADLEN -+ jl .Laad_loop_1x_done\@ -+.Laad_loop_1x\@: -+ movdqu (AAD), %xmm0 -+ pshufb BSWAP_MASK, %xmm0 -+ pxor %xmm0, GHASH_ACC -+ _ghash_mul H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm0, %xmm1 -+ add $16, AAD -+ sub $16, AADLEN -+ jge .Laad_loop_1x\@ -+.Laad_loop_1x_done\@: -+ // Check whether there is a partial block at the end. -+ add $16, AADLEN -+ jz .Laad_done\@ -+ -+ // Process a partial block of length 1 <= AADLEN <= 15. -+ // _load_partial_block assumes that %ecx contains AADLEN. -+ _load_partial_block AAD, %xmm0, %r10, %r10d -+ pshufb BSWAP_MASK, %xmm0 -+ pxor %xmm0, GHASH_ACC -+ _ghash_mul H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm0, %xmm1 -+ -+.Laad_done\@: -+ movdqu GHASH_ACC, (GHASH_ACC_PTR) -+ RET -+.endm -+ -+// Increment LE_CTR eight times to generate eight little-endian counter blocks, -+// swap each to big-endian, and store them in AESDATA[0-7]. Also XOR them with -+// the zero-th AES round key. Clobbers TMP0 and TMP1. -+.macro _ctr_begin_8x -+ movq .Lone(%rip), TMP0 -+ movdqa (KEY), TMP1 // zero-th round key -+.irp i, 0,1,2,3,4,5,6,7 -+ _vpshufb BSWAP_MASK, LE_CTR, AESDATA\i -+ pxor TMP1, AESDATA\i -+ paddd TMP0, LE_CTR -+.endr -+.endm -+ -+// Do a non-last round of AES on AESDATA[0-7] using \round_key. -+.macro _aesenc_8x round_key -+.irp i, 0,1,2,3,4,5,6,7 -+ aesenc \round_key, AESDATA\i -+.endr -+.endm -+ -+// Do the last round of AES on AESDATA[0-7] using \round_key. -+.macro _aesenclast_8x round_key -+.irp i, 0,1,2,3,4,5,6,7 -+ aesenclast \round_key, AESDATA\i -+.endr -+.endm -+ -+// XOR eight blocks from SRC with the keystream blocks in AESDATA[0-7], and -+// store the result to DST. Clobbers TMP0. -+.macro _xor_data_8x -+.irp i, 0,1,2,3,4,5,6,7 -+ _xor_mem_to_reg \i*16(SRC), AESDATA\i, tmp=TMP0 -+.endr -+.irp i, 0,1,2,3,4,5,6,7 -+ movdqu AESDATA\i, \i*16(DST) -+.endr -+.endm -+ -+// void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_aesni *key, -+// const u32 le_ctr[4], u8 ghash_acc[16], -+// const u8 *src, u8 *dst, int datalen); -+// -+// This macro generates a GCM encryption or decryption update function with the -+// above prototype (with \enc selecting which one). -+// -+// This function computes the next portion of the CTR keystream, XOR's it with -+// |datalen| bytes from |src|, and writes the resulting encrypted or decrypted -+// data to |dst|. It also updates the GHASH accumulator |ghash_acc| using the -+// next |datalen| ciphertext bytes. -+// -+// |datalen| must be a multiple of 16, except on the last call where it can be -+// any length. The caller must do any buffering needed to ensure this. Both -+// in-place and out-of-place en/decryption are supported. -+// -+// |le_ctr| must give the current counter in little-endian format. For a new -+// message, the low word of the counter must be 2. This function loads the -+// counter from |le_ctr| and increments the loaded counter as needed, but it -+// does *not* store the updated counter back to |le_ctr|. The caller must -+// update |le_ctr| if any more data segments follow. Internally, only the low -+// 32-bit word of the counter is incremented, following the GCM standard. -+.macro _aes_gcm_update enc -+ -+ // Function arguments -+ .set KEY, %rdi -+ .set LE_CTR_PTR, %rsi // Note: overlaps with usage as temp reg -+ .set GHASH_ACC_PTR, %rdx -+ .set SRC, %rcx -+ .set DST, %r8 -+ .set DATALEN, %r9d -+ .set DATALEN64, %r9 // Zero-extend DATALEN before using! -+ // Note: the code setting up for _load_partial_block assumes that SRC is -+ // in %rcx (and that DATALEN is *not* in %rcx). -+ -+ // Additional local variables -+ -+ // %rax and %rsi are used as temporary registers. Note: %rsi overlaps -+ // with LE_CTR_PTR, which is used only at the beginning. -+ -+ .set AESKEYLEN, %r10d // AES key length in bytes -+ .set AESKEYLEN64, %r10 -+ .set RNDKEYLAST_PTR, %r11 // Pointer to last AES round key -+ -+ // Put the most frequently used values in %xmm0-%xmm7 to reduce code -+ // size. (%xmm0-%xmm7 take fewer bytes to encode than %xmm8-%xmm15.) -+ .set TMP0, %xmm0 -+ .set TMP1, %xmm1 -+ .set TMP2, %xmm2 -+ .set LO, %xmm3 // Low part of unreduced product -+ .set MI, %xmm4 // Middle part of unreduced product -+ .set GHASH_ACC, %xmm5 // GHASH accumulator; in main loop also -+ // the high part of unreduced product -+ .set BSWAP_MASK, %xmm6 // Shuffle mask for reflecting bytes -+ .set LE_CTR, %xmm7 // Little-endian counter value -+ .set AESDATA0, %xmm8 -+ .set AESDATA1, %xmm9 -+ .set AESDATA2, %xmm10 -+ .set AESDATA3, %xmm11 -+ .set AESDATA4, %xmm12 -+ .set AESDATA5, %xmm13 -+ .set AESDATA6, %xmm14 -+ .set AESDATA7, %xmm15 -+ -+ movdqa .Lbswap_mask(%rip), BSWAP_MASK -+ movdqu (GHASH_ACC_PTR), GHASH_ACC -+ movdqu (LE_CTR_PTR), LE_CTR -+ -+ movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN -+ lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR -+ -+ // If there are at least 8*16 bytes of data, then continue into the main -+ // loop, which processes 8*16 bytes of data per iteration. -+ // -+ // The main loop interleaves AES and GHASH to improve performance on -+ // CPUs that can execute these instructions in parallel. When -+ // decrypting, the GHASH input (the ciphertext) is immediately -+ // available. When encrypting, we instead encrypt a set of 8 blocks -+ // first and then GHASH those blocks while encrypting the next set of 8, -+ // repeat that as needed, and finally GHASH the last set of 8 blocks. -+ // -+ // Code size optimization: Prefer adding or subtracting -8*16 over 8*16, -+ // as this makes the immediate fit in a signed byte, saving 3 bytes. -+ add $-8*16, DATALEN -+ jl .Lcrypt_loop_8x_done\@ -+.if \enc -+ // Encrypt the first 8 plaintext blocks. -+ _ctr_begin_8x -+ lea 16(KEY), %rsi -+ .p2align 4 -+1: -+ movdqa (%rsi), TMP0 -+ _aesenc_8x TMP0 -+ add $16, %rsi -+ cmp %rsi, RNDKEYLAST_PTR -+ jne 1b -+ movdqa (%rsi), TMP0 -+ _aesenclast_8x TMP0 -+ _xor_data_8x -+ // Don't increment DST until the ciphertext blocks have been hashed. -+ sub $-8*16, SRC -+ add $-8*16, DATALEN -+ jl .Lghash_last_ciphertext_8x\@ -+.endif -+ -+ .p2align 4 -+.Lcrypt_loop_8x\@: -+ -+ // Generate the next set of 8 counter blocks and start encrypting them. -+ _ctr_begin_8x -+ lea 16(KEY), %rsi -+ -+ // Do a round of AES, and start the GHASH update of 8 ciphertext blocks -+ // by doing the unreduced multiplication for the first ciphertext block. -+ movdqa (%rsi), TMP0 -+ add $16, %rsi -+ _aesenc_8x TMP0 -+ _ghash_update_begin_8x \enc -+ -+ // Do 7 more rounds of AES, and continue the GHASH update by doing the -+ // unreduced multiplication for the remaining ciphertext blocks. -+ .p2align 4 -+1: -+ movdqa (%rsi), TMP0 -+ add $16, %rsi -+ _aesenc_8x TMP0 -+ _ghash_update_continue_8x \enc -+ cmp $7*8, %eax -+ jne 1b -+ -+ // Do the remaining AES rounds. -+ .p2align 4 -+1: -+ movdqa (%rsi), TMP0 -+ add $16, %rsi -+ _aesenc_8x TMP0 -+ cmp %rsi, RNDKEYLAST_PTR -+ jne 1b -+ -+ // Do the GHASH reduction and the last round of AES. -+ movdqa (RNDKEYLAST_PTR), TMP0 -+ _ghash_update_end_8x_step 0 -+ _aesenclast_8x TMP0 -+ _ghash_update_end_8x_step 1 -+ -+ // XOR the data with the AES-CTR keystream blocks. -+.if \enc -+ sub $-8*16, DST -+.endif -+ _xor_data_8x -+ sub $-8*16, SRC -+.if !\enc -+ sub $-8*16, DST -+.endif -+ add $-8*16, DATALEN -+ jge .Lcrypt_loop_8x\@ -+ -+.if \enc -+.Lghash_last_ciphertext_8x\@: -+ // Update GHASH with the last set of 8 ciphertext blocks. -+ _ghash_update_begin_8x \enc -+ .p2align 4 -+1: -+ _ghash_update_continue_8x \enc -+ cmp $7*8, %eax -+ jne 1b -+ _ghash_update_end_8x_step 0 -+ _ghash_update_end_8x_step 1 -+ sub $-8*16, DST -+.endif -+ -+.Lcrypt_loop_8x_done\@: -+ -+ sub $-8*16, DATALEN -+ jz .Ldone\@ -+ -+ // Handle the remainder of length 1 <= DATALEN < 8*16 bytes. We keep -+ // things simple and keep the code size down by just going one block at -+ // a time, again taking advantage of hardware loop unrolling. Since -+ // there are enough key powers available for all remaining data, we do -+ // the GHASH multiplications unreduced, and only reduce at the very end. -+ -+ .set HI, TMP2 -+ .set H_POW, AESDATA0 -+ .set H_POW_XORED, AESDATA1 -+ .set ONE, AESDATA2 -+ -+ movq .Lone(%rip), ONE -+ -+ // Start collecting the unreduced GHASH intermediate value LO, MI, HI. -+ pxor LO, LO -+ pxor MI, MI -+ pxor HI, HI -+ -+ // Set up a block counter %rax to contain 8*(8-n), where n is the number -+ // of blocks that remain, counting any partial block. This will be used -+ // to access the key powers H^n through H^1. -+ mov DATALEN, %eax -+ neg %eax -+ and $~15, %eax -+ sar $1, %eax -+ add $64, %eax -+ -+ sub $16, DATALEN -+ jl .Lcrypt_loop_1x_done\@ -+ -+ // Process the data one full block at a time. -+.Lcrypt_loop_1x\@: -+ -+ // Encrypt the next counter block. -+ _vpshufb BSWAP_MASK, LE_CTR, TMP0 -+ paddd ONE, LE_CTR -+ pxor (KEY), TMP0 -+ lea -6*16(RNDKEYLAST_PTR), %rsi // Reduce code size -+ cmp $24, AESKEYLEN -+ jl 128f // AES-128? -+ je 192f // AES-192? -+ // AES-256 -+ aesenc -7*16(%rsi), TMP0 -+ aesenc -6*16(%rsi), TMP0 -+192: -+ aesenc -5*16(%rsi), TMP0 -+ aesenc -4*16(%rsi), TMP0 -+128: -+.irp i, -3,-2,-1,0,1,2,3,4,5 -+ aesenc \i*16(%rsi), TMP0 -+.endr -+ aesenclast (RNDKEYLAST_PTR), TMP0 -+ -+ // Load the next key power H^i. -+ movdqa OFFSETOF_H_POWERS(KEY,%rax,2), H_POW -+ movq OFFSETOF_H_POWERS_XORED(KEY,%rax), H_POW_XORED -+ -+ // XOR the keystream block that was just generated in TMP0 with the next -+ // source data block and store the resulting en/decrypted data to DST. -+.if \enc -+ _xor_mem_to_reg (SRC), TMP0, tmp=TMP1 -+ movdqu TMP0, (DST) -+.else -+ movdqu (SRC), TMP1 -+ pxor TMP1, TMP0 -+ movdqu TMP0, (DST) -+.endif -+ -+ // Update GHASH with the ciphertext block. -+.if \enc -+ pshufb BSWAP_MASK, TMP0 -+ pxor TMP0, GHASH_ACC -+.else -+ pshufb BSWAP_MASK, TMP1 -+ pxor TMP1, GHASH_ACC -+.endif -+ _ghash_mul_noreduce H_POW, H_POW_XORED, GHASH_ACC, LO, MI, HI, TMP0 -+ pxor GHASH_ACC, GHASH_ACC -+ -+ add $8, %eax -+ add $16, SRC -+ add $16, DST -+ sub $16, DATALEN -+ jge .Lcrypt_loop_1x\@ -+.Lcrypt_loop_1x_done\@: -+ // Check whether there is a partial block at the end. -+ add $16, DATALEN -+ jz .Lghash_reduce\@ -+ -+ // Process a partial block of length 1 <= DATALEN <= 15. -+ -+ // Encrypt a counter block for the last time. -+ pshufb BSWAP_MASK, LE_CTR -+ pxor (KEY), LE_CTR -+ lea 16(KEY), %rsi -+1: -+ aesenc (%rsi), LE_CTR -+ add $16, %rsi -+ cmp %rsi, RNDKEYLAST_PTR -+ jne 1b -+ aesenclast (RNDKEYLAST_PTR), LE_CTR -+ -+ // Load the lowest key power, H^1. -+ movdqa OFFSETOF_H_POWERS(KEY,%rax,2), H_POW -+ movq OFFSETOF_H_POWERS_XORED(KEY,%rax), H_POW_XORED -+ -+ // Load and zero-pad 1 <= DATALEN <= 15 bytes of data from SRC. SRC is -+ // in %rcx, but _load_partial_block needs DATALEN in %rcx instead. -+ // RNDKEYLAST_PTR is no longer needed, so reuse it for SRC. -+ mov SRC, RNDKEYLAST_PTR -+ mov DATALEN, %ecx -+ _load_partial_block RNDKEYLAST_PTR, TMP0, %rsi, %esi -+ -+ // XOR the keystream block that was just generated in LE_CTR with the -+ // source data block and store the resulting en/decrypted data to DST. -+ pxor TMP0, LE_CTR -+ mov DATALEN, %ecx -+ _store_partial_block LE_CTR, DST -+ -+ // If encrypting, zero-pad the final ciphertext block for GHASH. (If -+ // decrypting, this was already done by _load_partial_block.) -+.if \enc -+ lea .Lzeropad_mask+16(%rip), %rax -+ sub DATALEN64, %rax -+ _vpand (%rax), LE_CTR, TMP0 -+.endif -+ -+ // Update GHASH with the final ciphertext block. -+ pshufb BSWAP_MASK, TMP0 -+ pxor TMP0, GHASH_ACC -+ _ghash_mul_noreduce H_POW, H_POW_XORED, GHASH_ACC, LO, MI, HI, TMP0 -+ -+.Lghash_reduce\@: -+ // Finally, do the GHASH reduction. -+ _ghash_reduce LO, MI, HI, GHASH_ACC, TMP0 -+ -+.Ldone\@: -+ // Store the updated GHASH accumulator back to memory. -+ movdqu GHASH_ACC, (GHASH_ACC_PTR) -+ -+ RET -+.endm -+ -+// void aes_gcm_enc_final_##suffix(const struct aes_gcm_key_aesni *key, -+// const u32 le_ctr[4], u8 ghash_acc[16], -+// u64 total_aadlen, u64 total_datalen); -+// bool aes_gcm_dec_final_##suffix(const struct aes_gcm_key_aesni *key, -+// const u32 le_ctr[4], const u8 ghash_acc[16], -+// u64 total_aadlen, u64 total_datalen, -+// const u8 tag[16], int taglen); -+// -+// This macro generates one of the above two functions (with \enc selecting -+// which one). Both functions finish computing the GCM authentication tag by -+// updating GHASH with the lengths block and encrypting the GHASH accumulator. -+// |total_aadlen| and |total_datalen| must be the total length of the additional -+// authenticated data and the en/decrypted data in bytes, respectively. -+// -+// The encryption function then stores the full-length (16-byte) computed -+// authentication tag to |ghash_acc|. The decryption function instead loads the -+// expected authentication tag (the one that was transmitted) from the 16-byte -+// buffer |tag|, compares the first 4 <= |taglen| <= 16 bytes of it to the -+// computed tag in constant time, and returns true if and only if they match. -+.macro _aes_gcm_final enc -+ -+ // Function arguments -+ .set KEY, %rdi -+ .set LE_CTR_PTR, %rsi -+ .set GHASH_ACC_PTR, %rdx -+ .set TOTAL_AADLEN, %rcx -+ .set TOTAL_DATALEN, %r8 -+ .set TAG, %r9 -+ .set TAGLEN, %r10d // Originally at 8(%rsp) -+ .set TAGLEN64, %r10 -+ -+ // Additional local variables. -+ // %rax and %xmm0-%xmm2 are used as temporary registers. -+ .set AESKEYLEN, %r11d -+ .set AESKEYLEN64, %r11 -+ .set BSWAP_MASK, %xmm3 -+ .set GHASH_ACC, %xmm4 -+ .set H_POW1, %xmm5 // H^1 -+ .set H_POW1_X64, %xmm6 // H^1 * x^64 -+ .set GFPOLY, %xmm7 -+ -+ movdqa .Lbswap_mask(%rip), BSWAP_MASK -+ movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN -+ -+ // Set up a counter block with 1 in the low 32-bit word. This is the -+ // counter that produces the ciphertext needed to encrypt the auth tag. -+ movdqu (LE_CTR_PTR), %xmm0 -+ mov $1, %eax -+ pinsrd $0, %eax, %xmm0 -+ -+ // Build the lengths block and XOR it into the GHASH accumulator. -+ movq TOTAL_DATALEN, GHASH_ACC -+ pinsrq $1, TOTAL_AADLEN, GHASH_ACC -+ psllq $3, GHASH_ACC // Bytes to bits -+ _xor_mem_to_reg (GHASH_ACC_PTR), GHASH_ACC, %xmm1 -+ -+ movdqa OFFSETOF_H_POWERS+7*16(KEY), H_POW1 -+ movdqa OFFSETOF_H_TIMES_X64(KEY), H_POW1_X64 -+ movq .Lgfpoly(%rip), GFPOLY -+ -+ // Make %rax point to the 6th from last AES round key. (Using signed -+ // byte offsets -7*16 through 6*16 decreases code size.) -+ lea (KEY,AESKEYLEN64,4), %rax -+ -+ // AES-encrypt the counter block and also multiply GHASH_ACC by H^1. -+ // Interleave the AES and GHASH instructions to improve performance. -+ pshufb BSWAP_MASK, %xmm0 -+ pxor (KEY), %xmm0 -+ cmp $24, AESKEYLEN -+ jl 128f // AES-128? -+ je 192f // AES-192? -+ // AES-256 -+ aesenc -7*16(%rax), %xmm0 -+ aesenc -6*16(%rax), %xmm0 -+192: -+ aesenc -5*16(%rax), %xmm0 -+ aesenc -4*16(%rax), %xmm0 -+128: -+.irp i, 0,1,2,3,4,5,6,7,8 -+ aesenc (\i-3)*16(%rax), %xmm0 -+ _ghash_mul_step \i, H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm1, %xmm2 -+.endr -+ aesenclast 6*16(%rax), %xmm0 -+ _ghash_mul_step 9, H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm1, %xmm2 -+ -+ // Undo the byte reflection of the GHASH accumulator. -+ pshufb BSWAP_MASK, GHASH_ACC -+ -+ // Encrypt the GHASH accumulator. -+ pxor %xmm0, GHASH_ACC -+ -+.if \enc -+ // Return the computed auth tag. -+ movdqu GHASH_ACC, (GHASH_ACC_PTR) -+.else -+ .set ZEROPAD_MASK_PTR, TOTAL_AADLEN // Reusing TOTAL_AADLEN! -+ -+ // Verify the auth tag in constant time by XOR'ing the transmitted and -+ // computed auth tags together and using the ptest instruction to check -+ // whether the first TAGLEN bytes of the result are zero. -+ _xor_mem_to_reg (TAG), GHASH_ACC, tmp=%xmm0 -+ movl 8(%rsp), TAGLEN -+ lea .Lzeropad_mask+16(%rip), ZEROPAD_MASK_PTR -+ sub TAGLEN64, ZEROPAD_MASK_PTR -+ xor %eax, %eax -+ _test_mem (ZEROPAD_MASK_PTR), GHASH_ACC, tmp=%xmm0 -+ sete %al -+.endif -+ RET -+.endm -+ -+.set USE_AVX, 0 -+SYM_FUNC_START(aes_gcm_precompute_aesni) -+ _aes_gcm_precompute -+SYM_FUNC_END(aes_gcm_precompute_aesni) -+SYM_FUNC_START(aes_gcm_aad_update_aesni) -+ _aes_gcm_aad_update -+SYM_FUNC_END(aes_gcm_aad_update_aesni) -+SYM_FUNC_START(aes_gcm_enc_update_aesni) -+ _aes_gcm_update 1 -+SYM_FUNC_END(aes_gcm_enc_update_aesni) -+SYM_FUNC_START(aes_gcm_dec_update_aesni) -+ _aes_gcm_update 0 -+SYM_FUNC_END(aes_gcm_dec_update_aesni) -+SYM_FUNC_START(aes_gcm_enc_final_aesni) -+ _aes_gcm_final 1 -+SYM_FUNC_END(aes_gcm_enc_final_aesni) -+SYM_FUNC_START(aes_gcm_dec_final_aesni) -+ _aes_gcm_final 0 -+SYM_FUNC_END(aes_gcm_dec_final_aesni) -+ -+.set USE_AVX, 1 -+SYM_FUNC_START(aes_gcm_precompute_aesni_avx) -+ _aes_gcm_precompute -+SYM_FUNC_END(aes_gcm_precompute_aesni_avx) -+SYM_FUNC_START(aes_gcm_aad_update_aesni_avx) -+ _aes_gcm_aad_update -+SYM_FUNC_END(aes_gcm_aad_update_aesni_avx) -+SYM_FUNC_START(aes_gcm_enc_update_aesni_avx) -+ _aes_gcm_update 1 -+SYM_FUNC_END(aes_gcm_enc_update_aesni_avx) -+SYM_FUNC_START(aes_gcm_dec_update_aesni_avx) -+ _aes_gcm_update 0 -+SYM_FUNC_END(aes_gcm_dec_update_aesni_avx) -+SYM_FUNC_START(aes_gcm_enc_final_aesni_avx) -+ _aes_gcm_final 1 -+SYM_FUNC_END(aes_gcm_enc_final_aesni_avx) -+SYM_FUNC_START(aes_gcm_dec_final_aesni_avx) -+ _aes_gcm_final 0 -+SYM_FUNC_END(aes_gcm_dec_final_aesni_avx) -diff --git a/arch/x86/crypto/aes-gcm-avx10-x86_64.S b/arch/x86/crypto/aes-gcm-avx10-x86_64.S -new file mode 100644 -index 000000000000..97e0ee515fc5 ---- /dev/null -+++ b/arch/x86/crypto/aes-gcm-avx10-x86_64.S -@@ -0,0 +1,1222 @@ -+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ -+// -+// VAES and VPCLMULQDQ optimized AES-GCM for x86_64 -+// -+// Copyright 2024 Google LLC -+// -+// Author: Eric Biggers -+// -+//------------------------------------------------------------------------------ -+// -+// This file is dual-licensed, meaning that you can use it under your choice of -+// either of the following two licenses: -+// -+// Licensed under the Apache License 2.0 (the "License"). You may obtain a copy -+// of the License at -+// -+// http://www.apache.org/licenses/LICENSE-2.0 -+// -+// Unless required by applicable law or agreed to in writing, software -+// distributed under the License is distributed on an "AS IS" BASIS, -+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+// See the License for the specific language governing permissions and -+// limitations under the License. -+// -+// or -+// -+// Redistribution and use in source and binary forms, with or without -+// modification, are permitted provided that the following conditions are met: -+// -+// 1. Redistributions of source code must retain the above copyright notice, -+// this list of conditions and the following disclaimer. -+// -+// 2. Redistributions in binary form must reproduce the above copyright -+// notice, this list of conditions and the following disclaimer in the -+// documentation and/or other materials provided with the distribution. -+// -+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+// POSSIBILITY OF SUCH DAMAGE. -+// -+//------------------------------------------------------------------------------ -+// -+// This file implements AES-GCM (Galois/Counter Mode) for x86_64 CPUs that -+// support VAES (vector AES), VPCLMULQDQ (vector carryless multiplication), and -+// either AVX512 or AVX10. Some of the functions, notably the encryption and -+// decryption update functions which are the most performance-critical, are -+// provided in two variants generated from a macro: one using 256-bit vectors -+// (suffix: vaes_avx10_256) and one using 512-bit vectors (vaes_avx10_512). The -+// other, "shared" functions (vaes_avx10) use at most 256-bit vectors. -+// -+// The functions that use 512-bit vectors are intended for CPUs that support -+// 512-bit vectors *and* where using them doesn't cause significant -+// downclocking. They require the following CPU features: -+// -+// VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/512) -+// -+// The other functions require the following CPU features: -+// -+// VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/256) -+// -+// All functions use the "System V" ABI. The Windows ABI is not supported. -+// -+// Note that we use "avx10" in the names of the functions as a shorthand to -+// really mean "AVX10 or a certain set of AVX512 features". Due to Intel's -+// introduction of AVX512 and then its replacement by AVX10, there doesn't seem -+// to be a simple way to name things that makes sense on all CPUs. -+// -+// Note that the macros that support both 256-bit and 512-bit vectors could -+// fairly easily be changed to support 128-bit too. However, this would *not* -+// be sufficient to allow the code to run on CPUs without AVX512 or AVX10, -+// because the code heavily uses several features of these extensions other than -+// the vector length: the increase in the number of SIMD registers from 16 to -+// 32, masking support, and new instructions such as vpternlogd (which can do a -+// three-argument XOR). These features are very useful for AES-GCM. -+ -+#include -+ -+.section .rodata -+.p2align 6 -+ -+ // A shuffle mask that reflects the bytes of 16-byte blocks -+.Lbswap_mask: -+ .octa 0x000102030405060708090a0b0c0d0e0f -+ -+ // This is the GHASH reducing polynomial without its constant term, i.e. -+ // x^128 + x^7 + x^2 + x, represented using the backwards mapping -+ // between bits and polynomial coefficients. -+ // -+ // Alternatively, it can be interpreted as the naturally-ordered -+ // representation of the polynomial x^127 + x^126 + x^121 + 1, i.e. the -+ // "reversed" GHASH reducing polynomial without its x^128 term. -+.Lgfpoly: -+ .octa 0xc2000000000000000000000000000001 -+ -+ // Same as above, but with the (1 << 64) bit set. -+.Lgfpoly_and_internal_carrybit: -+ .octa 0xc2000000000000010000000000000001 -+ -+ // The below constants are used for incrementing the counter blocks. -+ // ctr_pattern points to the four 128-bit values [0, 1, 2, 3]. -+ // inc_2blocks and inc_4blocks point to the single 128-bit values 2 and -+ // 4. Note that the same '2' is reused in ctr_pattern and inc_2blocks. -+.Lctr_pattern: -+ .octa 0 -+ .octa 1 -+.Linc_2blocks: -+ .octa 2 -+ .octa 3 -+.Linc_4blocks: -+ .octa 4 -+ -+// Number of powers of the hash key stored in the key struct. The powers are -+// stored from highest (H^NUM_H_POWERS) to lowest (H^1). -+#define NUM_H_POWERS 16 -+ -+// Offset to AES key length (in bytes) in the key struct -+#define OFFSETOF_AESKEYLEN 480 -+ -+// Offset to start of hash key powers array in the key struct -+#define OFFSETOF_H_POWERS 512 -+ -+// Offset to end of hash key powers array in the key struct. -+// -+// This is immediately followed by three zeroized padding blocks, which are -+// included so that partial vectors can be handled more easily. E.g. if VL=64 -+// and two blocks remain, we load the 4 values [H^2, H^1, 0, 0]. The most -+// padding blocks needed is 3, which occurs if [H^1, 0, 0, 0] is loaded. -+#define OFFSETOFEND_H_POWERS (OFFSETOF_H_POWERS + (NUM_H_POWERS * 16)) -+ -+.text -+ -+// Set the vector length in bytes. This sets the VL variable and defines -+// register aliases V0-V31 that map to the ymm or zmm registers. -+.macro _set_veclen vl -+ .set VL, \vl -+.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ -+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 -+.if VL == 32 -+ .set V\i, %ymm\i -+.elseif VL == 64 -+ .set V\i, %zmm\i -+.else -+ .error "Unsupported vector length" -+.endif -+.endr -+.endm -+ -+// The _ghash_mul_step macro does one step of GHASH multiplication of the -+// 128-bit lanes of \a by the corresponding 128-bit lanes of \b and storing the -+// reduced products in \dst. \t0, \t1, and \t2 are temporary registers of the -+// same size as \a and \b. To complete all steps, this must invoked with \i=0 -+// through \i=9. The division into steps allows users of this macro to -+// optionally interleave the computation with other instructions. Users of this -+// macro must preserve the parameter registers across steps. -+// -+// The multiplications are done in GHASH's representation of the finite field -+// GF(2^128). Elements of GF(2^128) are represented as binary polynomials -+// (i.e. polynomials whose coefficients are bits) modulo a reducing polynomial -+// G. The GCM specification uses G = x^128 + x^7 + x^2 + x + 1. Addition is -+// just XOR, while multiplication is more complex and has two parts: (a) do -+// carryless multiplication of two 128-bit input polynomials to get a 256-bit -+// intermediate product polynomial, and (b) reduce the intermediate product to -+// 128 bits by adding multiples of G that cancel out terms in it. (Adding -+// multiples of G doesn't change which field element the polynomial represents.) -+// -+// Unfortunately, the GCM specification maps bits to/from polynomial -+// coefficients backwards from the natural order. In each byte it specifies the -+// highest bit to be the lowest order polynomial coefficient, *not* the highest! -+// This makes it nontrivial to work with the GHASH polynomials. We could -+// reflect the bits, but x86 doesn't have an instruction that does that. -+// -+// Instead, we operate on the values without bit-reflecting them. This *mostly* -+// just works, since XOR and carryless multiplication are symmetric with respect -+// to bit order, but it has some consequences. First, due to GHASH's byte -+// order, by skipping bit reflection, *byte* reflection becomes necessary to -+// give the polynomial terms a consistent order. E.g., considering an N-bit -+// value interpreted using the G = x^128 + x^7 + x^2 + x + 1 convention, bits 0 -+// through N-1 of the byte-reflected value represent the coefficients of x^(N-1) -+// through x^0, whereas bits 0 through N-1 of the non-byte-reflected value -+// represent x^7...x^0, x^15...x^8, ..., x^(N-1)...x^(N-8) which can't be worked -+// with. Fortunately, x86's vpshufb instruction can do byte reflection. -+// -+// Second, forgoing the bit reflection causes an extra multiple of x (still -+// using the G = x^128 + x^7 + x^2 + x + 1 convention) to be introduced by each -+// multiplication. This is because an M-bit by N-bit carryless multiplication -+// really produces a (M+N-1)-bit product, but in practice it's zero-extended to -+// M+N bits. In the G = x^128 + x^7 + x^2 + x + 1 convention, which maps bits -+// to polynomial coefficients backwards, this zero-extension actually changes -+// the product by introducing an extra factor of x. Therefore, users of this -+// macro must ensure that one of the inputs has an extra factor of x^-1, i.e. -+// the multiplicative inverse of x, to cancel out the extra x. -+// -+// Third, the backwards coefficients convention is just confusing to work with, -+// since it makes "low" and "high" in the polynomial math mean the opposite of -+// their normal meaning in computer programming. This can be solved by using an -+// alternative interpretation: the polynomial coefficients are understood to be -+// in the natural order, and the multiplication is actually \a * \b * x^-128 mod -+// x^128 + x^127 + x^126 + x^121 + 1. This doesn't change the inputs, outputs, -+// or the implementation at all; it just changes the mathematical interpretation -+// of what each instruction is doing. Starting from here, we'll use this -+// alternative interpretation, as it's easier to understand the code that way. -+// -+// Moving onto the implementation, the vpclmulqdq instruction does 64 x 64 => -+// 128-bit carryless multiplication, so we break the 128 x 128 multiplication -+// into parts as follows (the _L and _H suffixes denote low and high 64 bits): -+// -+// LO = a_L * b_L -+// MI = (a_L * b_H) + (a_H * b_L) -+// HI = a_H * b_H -+// -+// The 256-bit product is x^128*HI + x^64*MI + LO. LO, MI, and HI are 128-bit. -+// Note that MI "overlaps" with LO and HI. We don't consolidate MI into LO and -+// HI right away, since the way the reduction works makes that unnecessary. -+// -+// For the reduction, we cancel out the low 128 bits by adding multiples of G = -+// x^128 + x^127 + x^126 + x^121 + 1. This is done by two iterations, each of -+// which cancels out the next lowest 64 bits. Consider a value x^64*A + B, -+// where A and B are 128-bit. Adding B_L*G to that value gives: -+// -+// x^64*A + B + B_L*G -+// = x^64*A + x^64*B_H + B_L + B_L*(x^128 + x^127 + x^126 + x^121 + 1) -+// = x^64*A + x^64*B_H + B_L + x^128*B_L + x^64*B_L*(x^63 + x^62 + x^57) + B_L -+// = x^64*A + x^64*B_H + x^128*B_L + x^64*B_L*(x^63 + x^62 + x^57) + B_L + B_L -+// = x^64*(A + B_H + x^64*B_L + B_L*(x^63 + x^62 + x^57)) -+// -+// So: if we sum A, B with its halves swapped, and the low half of B times x^63 -+// + x^62 + x^57, we get a 128-bit value C where x^64*C is congruent to the -+// original value x^64*A + B. I.e., the low 64 bits got canceled out. -+// -+// We just need to apply this twice: first to fold LO into MI, and second to -+// fold the updated MI into HI. -+// -+// The needed three-argument XORs are done using the vpternlogd instruction with -+// immediate 0x96, since this is faster than two vpxord instructions. -+// -+// A potential optimization, assuming that b is fixed per-key (if a is fixed -+// per-key it would work the other way around), is to use one iteration of the -+// reduction described above to precompute a value c such that x^64*c = b mod G, -+// and then multiply a_L by c (and implicitly by x^64) instead of by b: -+// -+// MI = (a_L * c_L) + (a_H * b_L) -+// HI = (a_L * c_H) + (a_H * b_H) -+// -+// This would eliminate the LO part of the intermediate product, which would -+// eliminate the need to fold LO into MI. This would save two instructions, -+// including a vpclmulqdq. However, we currently don't use this optimization -+// because it would require twice as many per-key precomputed values. -+// -+// Using Karatsuba multiplication instead of "schoolbook" multiplication -+// similarly would save a vpclmulqdq but does not seem to be worth it. -+.macro _ghash_mul_step i, a, b, dst, gfpoly, t0, t1, t2 -+.if \i == 0 -+ vpclmulqdq $0x00, \a, \b, \t0 // LO = a_L * b_L -+ vpclmulqdq $0x01, \a, \b, \t1 // MI_0 = a_L * b_H -+.elseif \i == 1 -+ vpclmulqdq $0x10, \a, \b, \t2 // MI_1 = a_H * b_L -+.elseif \i == 2 -+ vpxord \t2, \t1, \t1 // MI = MI_0 + MI_1 -+.elseif \i == 3 -+ vpclmulqdq $0x01, \t0, \gfpoly, \t2 // LO_L*(x^63 + x^62 + x^57) -+.elseif \i == 4 -+ vpshufd $0x4e, \t0, \t0 // Swap halves of LO -+.elseif \i == 5 -+ vpternlogd $0x96, \t2, \t0, \t1 // Fold LO into MI -+.elseif \i == 6 -+ vpclmulqdq $0x11, \a, \b, \dst // HI = a_H * b_H -+.elseif \i == 7 -+ vpclmulqdq $0x01, \t1, \gfpoly, \t0 // MI_L*(x^63 + x^62 + x^57) -+.elseif \i == 8 -+ vpshufd $0x4e, \t1, \t1 // Swap halves of MI -+.elseif \i == 9 -+ vpternlogd $0x96, \t0, \t1, \dst // Fold MI into HI -+.endif -+.endm -+ -+// GHASH-multiply the 128-bit lanes of \a by the 128-bit lanes of \b and store -+// the reduced products in \dst. See _ghash_mul_step for full explanation. -+.macro _ghash_mul a, b, dst, gfpoly, t0, t1, t2 -+.irp i, 0,1,2,3,4,5,6,7,8,9 -+ _ghash_mul_step \i, \a, \b, \dst, \gfpoly, \t0, \t1, \t2 -+.endr -+.endm -+ -+// GHASH-multiply the 128-bit lanes of \a by the 128-bit lanes of \b and add the -+// *unreduced* products to \lo, \mi, and \hi. -+.macro _ghash_mul_noreduce a, b, lo, mi, hi, t0, t1, t2, t3 -+ vpclmulqdq $0x00, \a, \b, \t0 // a_L * b_L -+ vpclmulqdq $0x01, \a, \b, \t1 // a_L * b_H -+ vpclmulqdq $0x10, \a, \b, \t2 // a_H * b_L -+ vpclmulqdq $0x11, \a, \b, \t3 // a_H * b_H -+ vpxord \t0, \lo, \lo -+ vpternlogd $0x96, \t2, \t1, \mi -+ vpxord \t3, \hi, \hi -+.endm -+ -+// Reduce the unreduced products from \lo, \mi, and \hi and store the 128-bit -+// reduced products in \hi. See _ghash_mul_step for explanation of reduction. -+.macro _ghash_reduce lo, mi, hi, gfpoly, t0 -+ vpclmulqdq $0x01, \lo, \gfpoly, \t0 -+ vpshufd $0x4e, \lo, \lo -+ vpternlogd $0x96, \t0, \lo, \mi -+ vpclmulqdq $0x01, \mi, \gfpoly, \t0 -+ vpshufd $0x4e, \mi, \mi -+ vpternlogd $0x96, \t0, \mi, \hi -+.endm -+ -+// void aes_gcm_precompute_##suffix(struct aes_gcm_key_avx10 *key); -+// -+// Given the expanded AES key |key->aes_key|, this function derives the GHASH -+// subkey and initializes |key->ghash_key_powers| with powers of it. -+// -+// The number of key powers initialized is NUM_H_POWERS, and they are stored in -+// the order H^NUM_H_POWERS to H^1. The zeroized padding blocks after the key -+// powers themselves are also initialized. -+// -+// This macro supports both VL=32 and VL=64. _set_veclen must have been invoked -+// with the desired length. In the VL=32 case, the function computes twice as -+// many key powers than are actually used by the VL=32 GCM update functions. -+// This is done to keep the key format the same regardless of vector length. -+.macro _aes_gcm_precompute -+ -+ // Function arguments -+ .set KEY, %rdi -+ -+ // Additional local variables. V0-V2 and %rax are used as temporaries. -+ .set POWERS_PTR, %rsi -+ .set RNDKEYLAST_PTR, %rdx -+ .set H_CUR, V3 -+ .set H_CUR_YMM, %ymm3 -+ .set H_CUR_XMM, %xmm3 -+ .set H_INC, V4 -+ .set H_INC_YMM, %ymm4 -+ .set H_INC_XMM, %xmm4 -+ .set GFPOLY, V5 -+ .set GFPOLY_YMM, %ymm5 -+ .set GFPOLY_XMM, %xmm5 -+ -+ // Get pointer to lowest set of key powers (located at end of array). -+ lea OFFSETOFEND_H_POWERS-VL(KEY), POWERS_PTR -+ -+ // Encrypt an all-zeroes block to get the raw hash subkey. -+ movl OFFSETOF_AESKEYLEN(KEY), %eax -+ lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR -+ vmovdqu (KEY), %xmm0 // Zero-th round key XOR all-zeroes block -+ add $16, KEY -+1: -+ vaesenc (KEY), %xmm0, %xmm0 -+ add $16, KEY -+ cmp KEY, RNDKEYLAST_PTR -+ jne 1b -+ vaesenclast (RNDKEYLAST_PTR), %xmm0, %xmm0 -+ -+ // Reflect the bytes of the raw hash subkey. -+ vpshufb .Lbswap_mask(%rip), %xmm0, H_CUR_XMM -+ -+ // Zeroize the padding blocks. -+ vpxor %xmm0, %xmm0, %xmm0 -+ vmovdqu %ymm0, VL(POWERS_PTR) -+ vmovdqu %xmm0, VL+2*16(POWERS_PTR) -+ -+ // Finish preprocessing the first key power, H^1. Since this GHASH -+ // implementation operates directly on values with the backwards bit -+ // order specified by the GCM standard, it's necessary to preprocess the -+ // raw key as follows. First, reflect its bytes. Second, multiply it -+ // by x^-1 mod x^128 + x^7 + x^2 + x + 1 (if using the backwards -+ // interpretation of polynomial coefficients), which can also be -+ // interpreted as multiplication by x mod x^128 + x^127 + x^126 + x^121 -+ // + 1 using the alternative, natural interpretation of polynomial -+ // coefficients. For details, see the comment above _ghash_mul_step. -+ // -+ // Either way, for the multiplication the concrete operation performed -+ // is a left shift of the 128-bit value by 1 bit, then an XOR with (0xc2 -+ // << 120) | 1 if a 1 bit was carried out. However, there's no 128-bit -+ // wide shift instruction, so instead double each of the two 64-bit -+ // halves and incorporate the internal carry bit into the value XOR'd. -+ vpshufd $0xd3, H_CUR_XMM, %xmm0 -+ vpsrad $31, %xmm0, %xmm0 -+ vpaddq H_CUR_XMM, H_CUR_XMM, H_CUR_XMM -+ vpand .Lgfpoly_and_internal_carrybit(%rip), %xmm0, %xmm0 -+ vpxor %xmm0, H_CUR_XMM, H_CUR_XMM -+ -+ // Load the gfpoly constant. -+ vbroadcasti32x4 .Lgfpoly(%rip), GFPOLY -+ -+ // Square H^1 to get H^2. -+ // -+ // Note that as with H^1, all higher key powers also need an extra -+ // factor of x^-1 (or x using the natural interpretation). Nothing -+ // special needs to be done to make this happen, though: H^1 * H^1 would -+ // end up with two factors of x^-1, but the multiplication consumes one. -+ // So the product H^2 ends up with the desired one factor of x^-1. -+ _ghash_mul H_CUR_XMM, H_CUR_XMM, H_INC_XMM, GFPOLY_XMM, \ -+ %xmm0, %xmm1, %xmm2 -+ -+ // Create H_CUR_YMM = [H^2, H^1] and H_INC_YMM = [H^2, H^2]. -+ vinserti128 $1, H_CUR_XMM, H_INC_YMM, H_CUR_YMM -+ vinserti128 $1, H_INC_XMM, H_INC_YMM, H_INC_YMM -+ -+.if VL == 64 -+ // Create H_CUR = [H^4, H^3, H^2, H^1] and H_INC = [H^4, H^4, H^4, H^4]. -+ _ghash_mul H_INC_YMM, H_CUR_YMM, H_INC_YMM, GFPOLY_YMM, \ -+ %ymm0, %ymm1, %ymm2 -+ vinserti64x4 $1, H_CUR_YMM, H_INC, H_CUR -+ vshufi64x2 $0, H_INC, H_INC, H_INC -+.endif -+ -+ // Store the lowest set of key powers. -+ vmovdqu8 H_CUR, (POWERS_PTR) -+ -+ // Compute and store the remaining key powers. With VL=32, repeatedly -+ // multiply [H^(i+1), H^i] by [H^2, H^2] to get [H^(i+3), H^(i+2)]. -+ // With VL=64, repeatedly multiply [H^(i+3), H^(i+2), H^(i+1), H^i] by -+ // [H^4, H^4, H^4, H^4] to get [H^(i+7), H^(i+6), H^(i+5), H^(i+4)]. -+ mov $(NUM_H_POWERS*16/VL) - 1, %eax -+.Lprecompute_next\@: -+ sub $VL, POWERS_PTR -+ _ghash_mul H_INC, H_CUR, H_CUR, GFPOLY, V0, V1, V2 -+ vmovdqu8 H_CUR, (POWERS_PTR) -+ dec %eax -+ jnz .Lprecompute_next\@ -+ -+ vzeroupper // This is needed after using ymm or zmm registers. -+ RET -+.endm -+ -+// XOR together the 128-bit lanes of \src (whose low lane is \src_xmm) and store -+// the result in \dst_xmm. This implicitly zeroizes the other lanes of dst. -+.macro _horizontal_xor src, src_xmm, dst_xmm, t0_xmm, t1_xmm, t2_xmm -+ vextracti32x4 $1, \src, \t0_xmm -+.if VL == 32 -+ vpxord \t0_xmm, \src_xmm, \dst_xmm -+.elseif VL == 64 -+ vextracti32x4 $2, \src, \t1_xmm -+ vextracti32x4 $3, \src, \t2_xmm -+ vpxord \t0_xmm, \src_xmm, \dst_xmm -+ vpternlogd $0x96, \t1_xmm, \t2_xmm, \dst_xmm -+.else -+ .error "Unsupported vector length" -+.endif -+.endm -+ -+// Do one step of the GHASH update of the data blocks given in the vector -+// registers GHASHDATA[0-3]. \i specifies the step to do, 0 through 9. The -+// division into steps allows users of this macro to optionally interleave the -+// computation with other instructions. This macro uses the vector register -+// GHASH_ACC as input/output; GHASHDATA[0-3] as inputs that are clobbered; -+// H_POW[4-1], GFPOLY, and BSWAP_MASK as inputs that aren't clobbered; and -+// GHASHTMP[0-2] as temporaries. This macro handles the byte-reflection of the -+// data blocks. The parameter registers must be preserved across steps. -+// -+// The GHASH update does: GHASH_ACC = H_POW4*(GHASHDATA0 + GHASH_ACC) + -+// H_POW3*GHASHDATA1 + H_POW2*GHASHDATA2 + H_POW1*GHASHDATA3, where the -+// operations are vectorized operations on vectors of 16-byte blocks. E.g., -+// with VL=32 there are 2 blocks per vector and the vectorized terms correspond -+// to the following non-vectorized terms: -+// -+// H_POW4*(GHASHDATA0 + GHASH_ACC) => H^8*(blk0 + GHASH_ACC_XMM) and H^7*(blk1 + 0) -+// H_POW3*GHASHDATA1 => H^6*blk2 and H^5*blk3 -+// H_POW2*GHASHDATA2 => H^4*blk4 and H^3*blk5 -+// H_POW1*GHASHDATA3 => H^2*blk6 and H^1*blk7 -+// -+// With VL=64, we use 4 blocks/vector, H^16 through H^1, and blk0 through blk15. -+// -+// More concretely, this code does: -+// - Do vectorized "schoolbook" multiplications to compute the intermediate -+// 256-bit product of each block and its corresponding hash key power. -+// There are 4*VL/16 of these intermediate products. -+// - Sum (XOR) the intermediate 256-bit products across vectors. This leaves -+// VL/16 256-bit intermediate values. -+// - Do a vectorized reduction of these 256-bit intermediate values to -+// 128-bits each. This leaves VL/16 128-bit intermediate values. -+// - Sum (XOR) these values and store the 128-bit result in GHASH_ACC_XMM. -+// -+// See _ghash_mul_step for the full explanation of the operations performed for -+// each individual finite field multiplication and reduction. -+.macro _ghash_step_4x i -+.if \i == 0 -+ vpshufb BSWAP_MASK, GHASHDATA0, GHASHDATA0 -+ vpxord GHASH_ACC, GHASHDATA0, GHASHDATA0 -+ vpshufb BSWAP_MASK, GHASHDATA1, GHASHDATA1 -+ vpshufb BSWAP_MASK, GHASHDATA2, GHASHDATA2 -+.elseif \i == 1 -+ vpshufb BSWAP_MASK, GHASHDATA3, GHASHDATA3 -+ vpclmulqdq $0x00, H_POW4, GHASHDATA0, GHASH_ACC // LO_0 -+ vpclmulqdq $0x00, H_POW3, GHASHDATA1, GHASHTMP0 // LO_1 -+ vpclmulqdq $0x00, H_POW2, GHASHDATA2, GHASHTMP1 // LO_2 -+.elseif \i == 2 -+ vpxord GHASHTMP0, GHASH_ACC, GHASH_ACC // sum(LO_{1,0}) -+ vpclmulqdq $0x00, H_POW1, GHASHDATA3, GHASHTMP2 // LO_3 -+ vpternlogd $0x96, GHASHTMP2, GHASHTMP1, GHASH_ACC // LO = sum(LO_{3,2,1,0}) -+ vpclmulqdq $0x01, H_POW4, GHASHDATA0, GHASHTMP0 // MI_0 -+.elseif \i == 3 -+ vpclmulqdq $0x01, H_POW3, GHASHDATA1, GHASHTMP1 // MI_1 -+ vpclmulqdq $0x01, H_POW2, GHASHDATA2, GHASHTMP2 // MI_2 -+ vpternlogd $0x96, GHASHTMP2, GHASHTMP1, GHASHTMP0 // sum(MI_{2,1,0}) -+ vpclmulqdq $0x01, H_POW1, GHASHDATA3, GHASHTMP1 // MI_3 -+.elseif \i == 4 -+ vpclmulqdq $0x10, H_POW4, GHASHDATA0, GHASHTMP2 // MI_4 -+ vpternlogd $0x96, GHASHTMP2, GHASHTMP1, GHASHTMP0 // sum(MI_{4,3,2,1,0}) -+ vpclmulqdq $0x10, H_POW3, GHASHDATA1, GHASHTMP1 // MI_5 -+ vpclmulqdq $0x10, H_POW2, GHASHDATA2, GHASHTMP2 // MI_6 -+.elseif \i == 5 -+ vpternlogd $0x96, GHASHTMP2, GHASHTMP1, GHASHTMP0 // sum(MI_{6,5,4,3,2,1,0}) -+ vpclmulqdq $0x01, GHASH_ACC, GFPOLY, GHASHTMP2 // LO_L*(x^63 + x^62 + x^57) -+ vpclmulqdq $0x10, H_POW1, GHASHDATA3, GHASHTMP1 // MI_7 -+ vpxord GHASHTMP1, GHASHTMP0, GHASHTMP0 // MI = sum(MI_{7,6,5,4,3,2,1,0}) -+.elseif \i == 6 -+ vpshufd $0x4e, GHASH_ACC, GHASH_ACC // Swap halves of LO -+ vpclmulqdq $0x11, H_POW4, GHASHDATA0, GHASHDATA0 // HI_0 -+ vpclmulqdq $0x11, H_POW3, GHASHDATA1, GHASHDATA1 // HI_1 -+ vpclmulqdq $0x11, H_POW2, GHASHDATA2, GHASHDATA2 // HI_2 -+.elseif \i == 7 -+ vpternlogd $0x96, GHASHTMP2, GHASH_ACC, GHASHTMP0 // Fold LO into MI -+ vpclmulqdq $0x11, H_POW1, GHASHDATA3, GHASHDATA3 // HI_3 -+ vpternlogd $0x96, GHASHDATA2, GHASHDATA1, GHASHDATA0 // sum(HI_{2,1,0}) -+ vpclmulqdq $0x01, GHASHTMP0, GFPOLY, GHASHTMP1 // MI_L*(x^63 + x^62 + x^57) -+.elseif \i == 8 -+ vpxord GHASHDATA3, GHASHDATA0, GHASH_ACC // HI = sum(HI_{3,2,1,0}) -+ vpshufd $0x4e, GHASHTMP0, GHASHTMP0 // Swap halves of MI -+ vpternlogd $0x96, GHASHTMP1, GHASHTMP0, GHASH_ACC // Fold MI into HI -+.elseif \i == 9 -+ _horizontal_xor GHASH_ACC, GHASH_ACC_XMM, GHASH_ACC_XMM, \ -+ GHASHDATA0_XMM, GHASHDATA1_XMM, GHASHDATA2_XMM -+.endif -+.endm -+ -+// Do one non-last round of AES encryption on the counter blocks in V0-V3 using -+// the round key that has been broadcast to all 128-bit lanes of \round_key. -+.macro _vaesenc_4x round_key -+ vaesenc \round_key, V0, V0 -+ vaesenc \round_key, V1, V1 -+ vaesenc \round_key, V2, V2 -+ vaesenc \round_key, V3, V3 -+.endm -+ -+// Start the AES encryption of four vectors of counter blocks. -+.macro _ctr_begin_4x -+ -+ // Increment LE_CTR four times to generate four vectors of little-endian -+ // counter blocks, swap each to big-endian, and store them in V0-V3. -+ vpshufb BSWAP_MASK, LE_CTR, V0 -+ vpaddd LE_CTR_INC, LE_CTR, LE_CTR -+ vpshufb BSWAP_MASK, LE_CTR, V1 -+ vpaddd LE_CTR_INC, LE_CTR, LE_CTR -+ vpshufb BSWAP_MASK, LE_CTR, V2 -+ vpaddd LE_CTR_INC, LE_CTR, LE_CTR -+ vpshufb BSWAP_MASK, LE_CTR, V3 -+ vpaddd LE_CTR_INC, LE_CTR, LE_CTR -+ -+ // AES "round zero": XOR in the zero-th round key. -+ vpxord RNDKEY0, V0, V0 -+ vpxord RNDKEY0, V1, V1 -+ vpxord RNDKEY0, V2, V2 -+ vpxord RNDKEY0, V3, V3 -+.endm -+ -+// void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_avx10 *key, -+// const u32 le_ctr[4], u8 ghash_acc[16], -+// const u8 *src, u8 *dst, int datalen); -+// -+// This macro generates a GCM encryption or decryption update function with the -+// above prototype (with \enc selecting which one). This macro supports both -+// VL=32 and VL=64. _set_veclen must have been invoked with the desired length. -+// -+// This function computes the next portion of the CTR keystream, XOR's it with -+// |datalen| bytes from |src|, and writes the resulting encrypted or decrypted -+// data to |dst|. It also updates the GHASH accumulator |ghash_acc| using the -+// next |datalen| ciphertext bytes. -+// -+// |datalen| must be a multiple of 16, except on the last call where it can be -+// any length. The caller must do any buffering needed to ensure this. Both -+// in-place and out-of-place en/decryption are supported. -+// -+// |le_ctr| must give the current counter in little-endian format. For a new -+// message, the low word of the counter must be 2. This function loads the -+// counter from |le_ctr| and increments the loaded counter as needed, but it -+// does *not* store the updated counter back to |le_ctr|. The caller must -+// update |le_ctr| if any more data segments follow. Internally, only the low -+// 32-bit word of the counter is incremented, following the GCM standard. -+.macro _aes_gcm_update enc -+ -+ // Function arguments -+ .set KEY, %rdi -+ .set LE_CTR_PTR, %rsi -+ .set GHASH_ACC_PTR, %rdx -+ .set SRC, %rcx -+ .set DST, %r8 -+ .set DATALEN, %r9d -+ .set DATALEN64, %r9 // Zero-extend DATALEN before using! -+ -+ // Additional local variables -+ -+ // %rax and %k1 are used as temporary registers. LE_CTR_PTR is also -+ // available as a temporary register after the counter is loaded. -+ -+ // AES key length in bytes -+ .set AESKEYLEN, %r10d -+ .set AESKEYLEN64, %r10 -+ -+ // Pointer to the last AES round key for the chosen AES variant -+ .set RNDKEYLAST_PTR, %r11 -+ -+ // In the main loop, V0-V3 are used as AES input and output. Elsewhere -+ // they are used as temporary registers. -+ -+ // GHASHDATA[0-3] hold the ciphertext blocks and GHASH input data. -+ .set GHASHDATA0, V4 -+ .set GHASHDATA0_XMM, %xmm4 -+ .set GHASHDATA1, V5 -+ .set GHASHDATA1_XMM, %xmm5 -+ .set GHASHDATA2, V6 -+ .set GHASHDATA2_XMM, %xmm6 -+ .set GHASHDATA3, V7 -+ -+ // BSWAP_MASK is the shuffle mask for byte-reflecting 128-bit values -+ // using vpshufb, copied to all 128-bit lanes. -+ .set BSWAP_MASK, V8 -+ -+ // RNDKEY temporarily holds the next AES round key. -+ .set RNDKEY, V9 -+ -+ // GHASH_ACC is the accumulator variable for GHASH. When fully reduced, -+ // only the lowest 128-bit lane can be nonzero. When not fully reduced, -+ // more than one lane may be used, and they need to be XOR'd together. -+ .set GHASH_ACC, V10 -+ .set GHASH_ACC_XMM, %xmm10 -+ -+ // LE_CTR_INC is the vector of 32-bit words that need to be added to a -+ // vector of little-endian counter blocks to advance it forwards. -+ .set LE_CTR_INC, V11 -+ -+ // LE_CTR contains the next set of little-endian counter blocks. -+ .set LE_CTR, V12 -+ -+ // RNDKEY0, RNDKEYLAST, and RNDKEY_M[9-5] contain cached AES round keys, -+ // copied to all 128-bit lanes. RNDKEY0 is the zero-th round key, -+ // RNDKEYLAST the last, and RNDKEY_M\i the one \i-th from the last. -+ .set RNDKEY0, V13 -+ .set RNDKEYLAST, V14 -+ .set RNDKEY_M9, V15 -+ .set RNDKEY_M8, V16 -+ .set RNDKEY_M7, V17 -+ .set RNDKEY_M6, V18 -+ .set RNDKEY_M5, V19 -+ -+ // RNDKEYLAST[0-3] temporarily store the last AES round key XOR'd with -+ // the corresponding block of source data. This is useful because -+ // vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a), and key ^ b can -+ // be computed in parallel with the AES rounds. -+ .set RNDKEYLAST0, V20 -+ .set RNDKEYLAST1, V21 -+ .set RNDKEYLAST2, V22 -+ .set RNDKEYLAST3, V23 -+ -+ // GHASHTMP[0-2] are temporary variables used by _ghash_step_4x. These -+ // cannot coincide with anything used for AES encryption, since for -+ // performance reasons GHASH and AES encryption are interleaved. -+ .set GHASHTMP0, V24 -+ .set GHASHTMP1, V25 -+ .set GHASHTMP2, V26 -+ -+ // H_POW[4-1] contain the powers of the hash key H^(4*VL/16)...H^1. The -+ // descending numbering reflects the order of the key powers. -+ .set H_POW4, V27 -+ .set H_POW3, V28 -+ .set H_POW2, V29 -+ .set H_POW1, V30 -+ -+ // GFPOLY contains the .Lgfpoly constant, copied to all 128-bit lanes. -+ .set GFPOLY, V31 -+ -+ // Load some constants. -+ vbroadcasti32x4 .Lbswap_mask(%rip), BSWAP_MASK -+ vbroadcasti32x4 .Lgfpoly(%rip), GFPOLY -+ -+ // Load the GHASH accumulator and the starting counter. -+ vmovdqu (GHASH_ACC_PTR), GHASH_ACC_XMM -+ vbroadcasti32x4 (LE_CTR_PTR), LE_CTR -+ -+ // Load the AES key length in bytes. -+ movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN -+ -+ // Make RNDKEYLAST_PTR point to the last AES round key. This is the -+ // round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256 -+ // respectively. Then load the zero-th and last round keys. -+ lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR -+ vbroadcasti32x4 (KEY), RNDKEY0 -+ vbroadcasti32x4 (RNDKEYLAST_PTR), RNDKEYLAST -+ -+ // Finish initializing LE_CTR by adding [0, 1, ...] to its low words. -+ vpaddd .Lctr_pattern(%rip), LE_CTR, LE_CTR -+ -+ // Initialize LE_CTR_INC to contain VL/16 in all 128-bit lanes. -+.if VL == 32 -+ vbroadcasti32x4 .Linc_2blocks(%rip), LE_CTR_INC -+.elseif VL == 64 -+ vbroadcasti32x4 .Linc_4blocks(%rip), LE_CTR_INC -+.else -+ .error "Unsupported vector length" -+.endif -+ -+ // If there are at least 4*VL bytes of data, then continue into the loop -+ // that processes 4*VL bytes of data at a time. Otherwise skip it. -+ // -+ // Pre-subtracting 4*VL from DATALEN saves an instruction from the main -+ // loop and also ensures that at least one write always occurs to -+ // DATALEN, zero-extending it and allowing DATALEN64 to be used later. -+ sub $4*VL, DATALEN -+ jl .Lcrypt_loop_4x_done\@ -+ -+ // Load powers of the hash key. -+ vmovdqu8 OFFSETOFEND_H_POWERS-4*VL(KEY), H_POW4 -+ vmovdqu8 OFFSETOFEND_H_POWERS-3*VL(KEY), H_POW3 -+ vmovdqu8 OFFSETOFEND_H_POWERS-2*VL(KEY), H_POW2 -+ vmovdqu8 OFFSETOFEND_H_POWERS-1*VL(KEY), H_POW1 -+ -+ // Main loop: en/decrypt and hash 4 vectors at a time. -+ // -+ // When possible, interleave the AES encryption of the counter blocks -+ // with the GHASH update of the ciphertext blocks. This improves -+ // performance on many CPUs because the execution ports used by the VAES -+ // instructions often differ from those used by vpclmulqdq and other -+ // instructions used in GHASH. For example, many Intel CPUs dispatch -+ // vaesenc to ports 0 and 1 and vpclmulqdq to port 5. -+ // -+ // The interleaving is easiest to do during decryption, since during -+ // decryption the ciphertext blocks are immediately available. For -+ // encryption, instead encrypt the first set of blocks, then hash those -+ // blocks while encrypting the next set of blocks, repeat that as -+ // needed, and finally hash the last set of blocks. -+ -+.if \enc -+ // Encrypt the first 4 vectors of plaintext blocks. Leave the resulting -+ // ciphertext in GHASHDATA[0-3] for GHASH. -+ _ctr_begin_4x -+ lea 16(KEY), %rax -+1: -+ vbroadcasti32x4 (%rax), RNDKEY -+ _vaesenc_4x RNDKEY -+ add $16, %rax -+ cmp %rax, RNDKEYLAST_PTR -+ jne 1b -+ vpxord 0*VL(SRC), RNDKEYLAST, RNDKEYLAST0 -+ vpxord 1*VL(SRC), RNDKEYLAST, RNDKEYLAST1 -+ vpxord 2*VL(SRC), RNDKEYLAST, RNDKEYLAST2 -+ vpxord 3*VL(SRC), RNDKEYLAST, RNDKEYLAST3 -+ vaesenclast RNDKEYLAST0, V0, GHASHDATA0 -+ vaesenclast RNDKEYLAST1, V1, GHASHDATA1 -+ vaesenclast RNDKEYLAST2, V2, GHASHDATA2 -+ vaesenclast RNDKEYLAST3, V3, GHASHDATA3 -+ vmovdqu8 GHASHDATA0, 0*VL(DST) -+ vmovdqu8 GHASHDATA1, 1*VL(DST) -+ vmovdqu8 GHASHDATA2, 2*VL(DST) -+ vmovdqu8 GHASHDATA3, 3*VL(DST) -+ add $4*VL, SRC -+ add $4*VL, DST -+ sub $4*VL, DATALEN -+ jl .Lghash_last_ciphertext_4x\@ -+.endif -+ -+ // Cache as many additional AES round keys as possible. -+.irp i, 9,8,7,6,5 -+ vbroadcasti32x4 -\i*16(RNDKEYLAST_PTR), RNDKEY_M\i -+.endr -+ -+.Lcrypt_loop_4x\@: -+ -+ // If decrypting, load more ciphertext blocks into GHASHDATA[0-3]. If -+ // encrypting, GHASHDATA[0-3] already contain the previous ciphertext. -+.if !\enc -+ vmovdqu8 0*VL(SRC), GHASHDATA0 -+ vmovdqu8 1*VL(SRC), GHASHDATA1 -+ vmovdqu8 2*VL(SRC), GHASHDATA2 -+ vmovdqu8 3*VL(SRC), GHASHDATA3 -+.endif -+ -+ // Start the AES encryption of the counter blocks. -+ _ctr_begin_4x -+ cmp $24, AESKEYLEN -+ jl 128f // AES-128? -+ je 192f // AES-192? -+ // AES-256 -+ vbroadcasti32x4 -13*16(RNDKEYLAST_PTR), RNDKEY -+ _vaesenc_4x RNDKEY -+ vbroadcasti32x4 -12*16(RNDKEYLAST_PTR), RNDKEY -+ _vaesenc_4x RNDKEY -+192: -+ vbroadcasti32x4 -11*16(RNDKEYLAST_PTR), RNDKEY -+ _vaesenc_4x RNDKEY -+ vbroadcasti32x4 -10*16(RNDKEYLAST_PTR), RNDKEY -+ _vaesenc_4x RNDKEY -+128: -+ -+ // XOR the source data with the last round key, saving the result in -+ // RNDKEYLAST[0-3]. This reduces latency by taking advantage of the -+ // property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). -+.if \enc -+ vpxord 0*VL(SRC), RNDKEYLAST, RNDKEYLAST0 -+ vpxord 1*VL(SRC), RNDKEYLAST, RNDKEYLAST1 -+ vpxord 2*VL(SRC), RNDKEYLAST, RNDKEYLAST2 -+ vpxord 3*VL(SRC), RNDKEYLAST, RNDKEYLAST3 -+.else -+ vpxord GHASHDATA0, RNDKEYLAST, RNDKEYLAST0 -+ vpxord GHASHDATA1, RNDKEYLAST, RNDKEYLAST1 -+ vpxord GHASHDATA2, RNDKEYLAST, RNDKEYLAST2 -+ vpxord GHASHDATA3, RNDKEYLAST, RNDKEYLAST3 -+.endif -+ -+ // Finish the AES encryption of the counter blocks in V0-V3, interleaved -+ // with the GHASH update of the ciphertext blocks in GHASHDATA[0-3]. -+.irp i, 9,8,7,6,5 -+ _vaesenc_4x RNDKEY_M\i -+ _ghash_step_4x (9 - \i) -+.endr -+.irp i, 4,3,2,1 -+ vbroadcasti32x4 -\i*16(RNDKEYLAST_PTR), RNDKEY -+ _vaesenc_4x RNDKEY -+ _ghash_step_4x (9 - \i) -+.endr -+ _ghash_step_4x 9 -+ -+ // Do the last AES round. This handles the XOR with the source data -+ // too, as per the optimization described above. -+ vaesenclast RNDKEYLAST0, V0, GHASHDATA0 -+ vaesenclast RNDKEYLAST1, V1, GHASHDATA1 -+ vaesenclast RNDKEYLAST2, V2, GHASHDATA2 -+ vaesenclast RNDKEYLAST3, V3, GHASHDATA3 -+ -+ // Store the en/decrypted data to DST. -+ vmovdqu8 GHASHDATA0, 0*VL(DST) -+ vmovdqu8 GHASHDATA1, 1*VL(DST) -+ vmovdqu8 GHASHDATA2, 2*VL(DST) -+ vmovdqu8 GHASHDATA3, 3*VL(DST) -+ -+ add $4*VL, SRC -+ add $4*VL, DST -+ sub $4*VL, DATALEN -+ jge .Lcrypt_loop_4x\@ -+ -+.if \enc -+.Lghash_last_ciphertext_4x\@: -+ // Update GHASH with the last set of ciphertext blocks. -+.irp i, 0,1,2,3,4,5,6,7,8,9 -+ _ghash_step_4x \i -+.endr -+.endif -+ -+.Lcrypt_loop_4x_done\@: -+ -+ // Undo the extra subtraction by 4*VL and check whether data remains. -+ add $4*VL, DATALEN -+ jz .Ldone\@ -+ -+ // The data length isn't a multiple of 4*VL. Process the remaining data -+ // of length 1 <= DATALEN < 4*VL, up to one vector (VL bytes) at a time. -+ // Going one vector at a time may seem inefficient compared to having -+ // separate code paths for each possible number of vectors remaining. -+ // However, using a loop keeps the code size down, and it performs -+ // surprising well; modern CPUs will start executing the next iteration -+ // before the previous one finishes and also predict the number of loop -+ // iterations. For a similar reason, we roll up the AES rounds. -+ // -+ // On the last iteration, the remaining length may be less than VL. -+ // Handle this using masking. -+ // -+ // Since there are enough key powers available for all remaining data, -+ // there is no need to do a GHASH reduction after each iteration. -+ // Instead, multiply each remaining block by its own key power, and only -+ // do a GHASH reduction at the very end. -+ -+ // Make POWERS_PTR point to the key powers [H^N, H^(N-1), ...] where N -+ // is the number of blocks that remain. -+ .set POWERS_PTR, LE_CTR_PTR // LE_CTR_PTR is free to be reused. -+ mov DATALEN, %eax -+ neg %rax -+ and $~15, %rax // -round_up(DATALEN, 16) -+ lea OFFSETOFEND_H_POWERS(KEY,%rax), POWERS_PTR -+ -+ // Start collecting the unreduced GHASH intermediate value LO, MI, HI. -+ .set LO, GHASHDATA0 -+ .set LO_XMM, GHASHDATA0_XMM -+ .set MI, GHASHDATA1 -+ .set MI_XMM, GHASHDATA1_XMM -+ .set HI, GHASHDATA2 -+ .set HI_XMM, GHASHDATA2_XMM -+ vpxor LO_XMM, LO_XMM, LO_XMM -+ vpxor MI_XMM, MI_XMM, MI_XMM -+ vpxor HI_XMM, HI_XMM, HI_XMM -+ -+.Lcrypt_loop_1x\@: -+ -+ // Select the appropriate mask for this iteration: all 1's if -+ // DATALEN >= VL, otherwise DATALEN 1's. Do this branchlessly using the -+ // bzhi instruction from BMI2. (This relies on DATALEN <= 255.) -+.if VL < 64 -+ mov $-1, %eax -+ bzhi DATALEN, %eax, %eax -+ kmovd %eax, %k1 -+.else -+ mov $-1, %rax -+ bzhi DATALEN64, %rax, %rax -+ kmovq %rax, %k1 -+.endif -+ -+ // Encrypt a vector of counter blocks. This does not need to be masked. -+ vpshufb BSWAP_MASK, LE_CTR, V0 -+ vpaddd LE_CTR_INC, LE_CTR, LE_CTR -+ vpxord RNDKEY0, V0, V0 -+ lea 16(KEY), %rax -+1: -+ vbroadcasti32x4 (%rax), RNDKEY -+ vaesenc RNDKEY, V0, V0 -+ add $16, %rax -+ cmp %rax, RNDKEYLAST_PTR -+ jne 1b -+ vaesenclast RNDKEYLAST, V0, V0 -+ -+ // XOR the data with the appropriate number of keystream bytes. -+ vmovdqu8 (SRC), V1{%k1}{z} -+ vpxord V1, V0, V0 -+ vmovdqu8 V0, (DST){%k1} -+ -+ // Update GHASH with the ciphertext block(s), without reducing. -+ // -+ // In the case of DATALEN < VL, the ciphertext is zero-padded to VL. -+ // (If decrypting, it's done by the above masked load. If encrypting, -+ // it's done by the below masked register-to-register move.) Note that -+ // if DATALEN <= VL - 16, there will be additional padding beyond the -+ // padding of the last block specified by GHASH itself; i.e., there may -+ // be whole block(s) that get processed by the GHASH multiplication and -+ // reduction instructions but should not actually be included in the -+ // GHASH. However, any such blocks are all-zeroes, and the values that -+ // they're multiplied with are also all-zeroes. Therefore they just add -+ // 0 * 0 = 0 to the final GHASH result, which makes no difference. -+ vmovdqu8 (POWERS_PTR), H_POW1 -+.if \enc -+ vmovdqu8 V0, V1{%k1}{z} -+.endif -+ vpshufb BSWAP_MASK, V1, V0 -+ vpxord GHASH_ACC, V0, V0 -+ _ghash_mul_noreduce H_POW1, V0, LO, MI, HI, GHASHDATA3, V1, V2, V3 -+ vpxor GHASH_ACC_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM -+ -+ add $VL, POWERS_PTR -+ add $VL, SRC -+ add $VL, DST -+ sub $VL, DATALEN -+ jg .Lcrypt_loop_1x\@ -+ -+ // Finally, do the GHASH reduction. -+ _ghash_reduce LO, MI, HI, GFPOLY, V0 -+ _horizontal_xor HI, HI_XMM, GHASH_ACC_XMM, %xmm0, %xmm1, %xmm2 -+ -+.Ldone\@: -+ // Store the updated GHASH accumulator back to memory. -+ vmovdqu GHASH_ACC_XMM, (GHASH_ACC_PTR) -+ -+ vzeroupper // This is needed after using ymm or zmm registers. -+ RET -+.endm -+ -+// void aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, -+// const u32 le_ctr[4], u8 ghash_acc[16], -+// u64 total_aadlen, u64 total_datalen); -+// bool aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, -+// const u32 le_ctr[4], -+// const u8 ghash_acc[16], -+// u64 total_aadlen, u64 total_datalen, -+// const u8 tag[16], int taglen); -+// -+// This macro generates one of the above two functions (with \enc selecting -+// which one). Both functions finish computing the GCM authentication tag by -+// updating GHASH with the lengths block and encrypting the GHASH accumulator. -+// |total_aadlen| and |total_datalen| must be the total length of the additional -+// authenticated data and the en/decrypted data in bytes, respectively. -+// -+// The encryption function then stores the full-length (16-byte) computed -+// authentication tag to |ghash_acc|. The decryption function instead loads the -+// expected authentication tag (the one that was transmitted) from the 16-byte -+// buffer |tag|, compares the first 4 <= |taglen| <= 16 bytes of it to the -+// computed tag in constant time, and returns true if and only if they match. -+.macro _aes_gcm_final enc -+ -+ // Function arguments -+ .set KEY, %rdi -+ .set LE_CTR_PTR, %rsi -+ .set GHASH_ACC_PTR, %rdx -+ .set TOTAL_AADLEN, %rcx -+ .set TOTAL_DATALEN, %r8 -+ .set TAG, %r9 -+ .set TAGLEN, %r10d // Originally at 8(%rsp) -+ -+ // Additional local variables. -+ // %rax, %xmm0-%xmm3, and %k1 are used as temporary registers. -+ .set AESKEYLEN, %r11d -+ .set AESKEYLEN64, %r11 -+ .set GFPOLY, %xmm4 -+ .set BSWAP_MASK, %xmm5 -+ .set LE_CTR, %xmm6 -+ .set GHASH_ACC, %xmm7 -+ .set H_POW1, %xmm8 -+ -+ // Load some constants. -+ vmovdqa .Lgfpoly(%rip), GFPOLY -+ vmovdqa .Lbswap_mask(%rip), BSWAP_MASK -+ -+ // Load the AES key length in bytes. -+ movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN -+ -+ // Set up a counter block with 1 in the low 32-bit word. This is the -+ // counter that produces the ciphertext needed to encrypt the auth tag. -+ // GFPOLY has 1 in the low word, so grab the 1 from there using a blend. -+ vpblendd $0xe, (LE_CTR_PTR), GFPOLY, LE_CTR -+ -+ // Build the lengths block and XOR it with the GHASH accumulator. -+ // Although the lengths block is defined as the AAD length followed by -+ // the en/decrypted data length, both in big-endian byte order, a byte -+ // reflection of the full block is needed because of the way we compute -+ // GHASH (see _ghash_mul_step). By using little-endian values in the -+ // opposite order, we avoid having to reflect any bytes here. -+ vmovq TOTAL_DATALEN, %xmm0 -+ vpinsrq $1, TOTAL_AADLEN, %xmm0, %xmm0 -+ vpsllq $3, %xmm0, %xmm0 // Bytes to bits -+ vpxor (GHASH_ACC_PTR), %xmm0, GHASH_ACC -+ -+ // Load the first hash key power (H^1), which is stored last. -+ vmovdqu8 OFFSETOFEND_H_POWERS-16(KEY), H_POW1 -+ -+.if !\enc -+ // Prepare a mask of TAGLEN one bits. -+ movl 8(%rsp), TAGLEN -+ mov $-1, %eax -+ bzhi TAGLEN, %eax, %eax -+ kmovd %eax, %k1 -+.endif -+ -+ // Make %rax point to the last AES round key for the chosen AES variant. -+ lea 6*16(KEY,AESKEYLEN64,4), %rax -+ -+ // Start the AES encryption of the counter block by swapping the counter -+ // block to big-endian and XOR-ing it with the zero-th AES round key. -+ vpshufb BSWAP_MASK, LE_CTR, %xmm0 -+ vpxor (KEY), %xmm0, %xmm0 -+ -+ // Complete the AES encryption and multiply GHASH_ACC by H^1. -+ // Interleave the AES and GHASH instructions to improve performance. -+ cmp $24, AESKEYLEN -+ jl 128f // AES-128? -+ je 192f // AES-192? -+ // AES-256 -+ vaesenc -13*16(%rax), %xmm0, %xmm0 -+ vaesenc -12*16(%rax), %xmm0, %xmm0 -+192: -+ vaesenc -11*16(%rax), %xmm0, %xmm0 -+ vaesenc -10*16(%rax), %xmm0, %xmm0 -+128: -+.irp i, 0,1,2,3,4,5,6,7,8 -+ _ghash_mul_step \i, H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ -+ %xmm1, %xmm2, %xmm3 -+ vaesenc (\i-9)*16(%rax), %xmm0, %xmm0 -+.endr -+ _ghash_mul_step 9, H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ -+ %xmm1, %xmm2, %xmm3 -+ -+ // Undo the byte reflection of the GHASH accumulator. -+ vpshufb BSWAP_MASK, GHASH_ACC, GHASH_ACC -+ -+ // Do the last AES round and XOR the resulting keystream block with the -+ // GHASH accumulator to produce the full computed authentication tag. -+ // -+ // Reduce latency by taking advantage of the property vaesenclast(key, -+ // a) ^ b == vaesenclast(key ^ b, a). I.e., XOR GHASH_ACC into the last -+ // round key, instead of XOR'ing the final AES output with GHASH_ACC. -+ // -+ // enc_final then returns the computed auth tag, while dec_final -+ // compares it with the transmitted one and returns a bool. To compare -+ // the tags, dec_final XORs them together and uses vptest to check -+ // whether the result is all-zeroes. This should be constant-time. -+ // dec_final applies the vaesenclast optimization to this additional -+ // value XOR'd too, using vpternlogd to XOR the last round key, GHASH -+ // accumulator, and transmitted auth tag together in one instruction. -+.if \enc -+ vpxor (%rax), GHASH_ACC, %xmm1 -+ vaesenclast %xmm1, %xmm0, GHASH_ACC -+ vmovdqu GHASH_ACC, (GHASH_ACC_PTR) -+.else -+ vmovdqu (TAG), %xmm1 -+ vpternlogd $0x96, (%rax), GHASH_ACC, %xmm1 -+ vaesenclast %xmm1, %xmm0, %xmm0 -+ xor %eax, %eax -+ vmovdqu8 %xmm0, %xmm0{%k1}{z} // Truncate to TAGLEN bytes -+ vptest %xmm0, %xmm0 -+ sete %al -+.endif -+ // No need for vzeroupper here, since only used xmm registers were used. -+ RET -+.endm -+ -+_set_veclen 32 -+SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_256) -+ _aes_gcm_precompute -+SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_256) -+SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_256) -+ _aes_gcm_update 1 -+SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_256) -+SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_256) -+ _aes_gcm_update 0 -+SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_256) -+ -+_set_veclen 64 -+SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_512) -+ _aes_gcm_precompute -+SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_512) -+SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_512) -+ _aes_gcm_update 1 -+SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_512) -+SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_512) -+ _aes_gcm_update 0 -+SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_512) -+ -+// void aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key, -+// u8 ghash_acc[16], -+// const u8 *aad, int aadlen); -+// -+// This function processes the AAD (Additional Authenticated Data) in GCM. -+// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the -+// data given by |aad| and |aadlen|. |key->ghash_key_powers| must have been -+// initialized. On the first call, |ghash_acc| must be all zeroes. |aadlen| -+// must be a multiple of 16, except on the last call where it can be any length. -+// The caller must do any buffering needed to ensure this. -+// -+// AES-GCM is almost always used with small amounts of AAD, less than 32 bytes. -+// Therefore, for AAD processing we currently only provide this implementation -+// which uses 256-bit vectors (ymm registers) and only has a 1x-wide loop. This -+// keeps the code size down, and it enables some micro-optimizations, e.g. using -+// VEX-coded instructions instead of EVEX-coded to save some instruction bytes. -+// To optimize for large amounts of AAD, we could implement a 4x-wide loop and -+// provide a version using 512-bit vectors, but that doesn't seem to be useful. -+SYM_FUNC_START(aes_gcm_aad_update_vaes_avx10) -+ -+ // Function arguments -+ .set KEY, %rdi -+ .set GHASH_ACC_PTR, %rsi -+ .set AAD, %rdx -+ .set AADLEN, %ecx -+ .set AADLEN64, %rcx // Zero-extend AADLEN before using! -+ -+ // Additional local variables. -+ // %rax, %ymm0-%ymm3, and %k1 are used as temporary registers. -+ .set BSWAP_MASK, %ymm4 -+ .set GFPOLY, %ymm5 -+ .set GHASH_ACC, %ymm6 -+ .set GHASH_ACC_XMM, %xmm6 -+ .set H_POW1, %ymm7 -+ -+ // Load some constants. -+ vbroadcasti128 .Lbswap_mask(%rip), BSWAP_MASK -+ vbroadcasti128 .Lgfpoly(%rip), GFPOLY -+ -+ // Load the GHASH accumulator. -+ vmovdqu (GHASH_ACC_PTR), GHASH_ACC_XMM -+ -+ // Update GHASH with 32 bytes of AAD at a time. -+ // -+ // Pre-subtracting 32 from AADLEN saves an instruction from the loop and -+ // also ensures that at least one write always occurs to AADLEN, -+ // zero-extending it and allowing AADLEN64 to be used later. -+ sub $32, AADLEN -+ jl .Laad_loop_1x_done -+ vmovdqu8 OFFSETOFEND_H_POWERS-32(KEY), H_POW1 // [H^2, H^1] -+.Laad_loop_1x: -+ vmovdqu (AAD), %ymm0 -+ vpshufb BSWAP_MASK, %ymm0, %ymm0 -+ vpxor %ymm0, GHASH_ACC, GHASH_ACC -+ _ghash_mul H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ -+ %ymm0, %ymm1, %ymm2 -+ vextracti128 $1, GHASH_ACC, %xmm0 -+ vpxor %xmm0, GHASH_ACC_XMM, GHASH_ACC_XMM -+ add $32, AAD -+ sub $32, AADLEN -+ jge .Laad_loop_1x -+.Laad_loop_1x_done: -+ add $32, AADLEN -+ jz .Laad_done -+ -+ // Update GHASH with the remaining 1 <= AADLEN < 32 bytes of AAD. -+ mov $-1, %eax -+ bzhi AADLEN, %eax, %eax -+ kmovd %eax, %k1 -+ vmovdqu8 (AAD), %ymm0{%k1}{z} -+ neg AADLEN64 -+ and $~15, AADLEN64 // -round_up(AADLEN, 16) -+ vmovdqu8 OFFSETOFEND_H_POWERS(KEY,AADLEN64), H_POW1 -+ vpshufb BSWAP_MASK, %ymm0, %ymm0 -+ vpxor %ymm0, GHASH_ACC, GHASH_ACC -+ _ghash_mul H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ -+ %ymm0, %ymm1, %ymm2 -+ vextracti128 $1, GHASH_ACC, %xmm0 -+ vpxor %xmm0, GHASH_ACC_XMM, GHASH_ACC_XMM -+ -+.Laad_done: -+ // Store the updated GHASH accumulator back to memory. -+ vmovdqu GHASH_ACC_XMM, (GHASH_ACC_PTR) -+ -+ vzeroupper // This is needed after using ymm or zmm registers. -+ RET -+SYM_FUNC_END(aes_gcm_aad_update_vaes_avx10) -+ -+SYM_FUNC_START(aes_gcm_enc_final_vaes_avx10) -+ _aes_gcm_final 1 -+SYM_FUNC_END(aes_gcm_enc_final_vaes_avx10) -+SYM_FUNC_START(aes_gcm_dec_final_vaes_avx10) -+ _aes_gcm_final 0 -+SYM_FUNC_END(aes_gcm_dec_final_vaes_avx10) -diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S -index 39066b57a70e..eb153eff9331 100644 ---- a/arch/x86/crypto/aesni-intel_asm.S -+++ b/arch/x86/crypto/aesni-intel_asm.S -@@ -10,16 +10,7 @@ - * Vinodh Gopal - * Kahraman Akdemir - * -- * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD -- * interface for 64-bit kernels. -- * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) -- * Aidan O'Mahony (aidan.o.mahony@intel.com) -- * Adrian Hoban -- * James Guilford (james.guilford@intel.com) -- * Gabriele Paoloni -- * Tadeusz Struk (tadeusz.struk@intel.com) -- * Wajdi Feghali (wajdi.k.feghali@intel.com) -- * Copyright (c) 2010, Intel Corporation. -+ * Copyright (c) 2010, Intel Corporation. - * - * Ported x86_64 version to x86: - * Author: Mathias Krause -@@ -27,95 +18,6 @@ - - #include - #include --#include -- --/* -- * The following macros are used to move an (un)aligned 16 byte value to/from -- * an XMM register. This can done for either FP or integer values, for FP use -- * movaps (move aligned packed single) or integer use movdqa (move double quad -- * aligned). It doesn't make a performance difference which instruction is used -- * since Nehalem (original Core i7) was released. However, the movaps is a byte -- * shorter, so that is the one we'll use for now. (same for unaligned). -- */ --#define MOVADQ movaps --#define MOVUDQ movups -- --#ifdef __x86_64__ -- --# constants in mergeable sections, linker can reorder and merge --.section .rodata.cst16.POLY, "aM", @progbits, 16 --.align 16 --POLY: .octa 0xC2000000000000000000000000000001 --.section .rodata.cst16.TWOONE, "aM", @progbits, 16 --.align 16 --TWOONE: .octa 0x00000001000000000000000000000001 -- --.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 --.align 16 --SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F --.section .rodata.cst16.MASK1, "aM", @progbits, 16 --.align 16 --MASK1: .octa 0x0000000000000000ffffffffffffffff --.section .rodata.cst16.MASK2, "aM", @progbits, 16 --.align 16 --MASK2: .octa 0xffffffffffffffff0000000000000000 --.section .rodata.cst16.ONE, "aM", @progbits, 16 --.align 16 --ONE: .octa 0x00000000000000000000000000000001 --.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 --.align 16 --F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 --.section .rodata.cst16.dec, "aM", @progbits, 16 --.align 16 --dec: .octa 0x1 --.section .rodata.cst16.enc, "aM", @progbits, 16 --.align 16 --enc: .octa 0x2 -- --# order of these constants should not change. --# more specifically, ALL_F should follow SHIFT_MASK, --# and zero should follow ALL_F --.section .rodata, "a", @progbits --.align 16 --SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 --ALL_F: .octa 0xffffffffffffffffffffffffffffffff -- .octa 0x00000000000000000000000000000000 -- --.text -- --#define AadHash 16*0 --#define AadLen 16*1 --#define InLen (16*1)+8 --#define PBlockEncKey 16*2 --#define OrigIV 16*3 --#define CurCount 16*4 --#define PBlockLen 16*5 --#define HashKey 16*6 // store HashKey <<1 mod poly here --#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here --#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here --#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here --#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 -- // bits of HashKey <<1 mod poly here -- //(for Karatsuba purposes) --#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 -- // bits of HashKey^2 <<1 mod poly here -- // (for Karatsuba purposes) --#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 -- // bits of HashKey^3 <<1 mod poly here -- // (for Karatsuba purposes) --#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 -- // bits of HashKey^4 <<1 mod poly here -- // (for Karatsuba purposes) -- --#define arg1 rdi --#define arg2 rsi --#define arg3 rdx --#define arg4 rcx --#define arg5 r8 --#define arg6 r9 --#define keysize 2*15*16(%arg1) --#endif -- - - #define STATE1 %xmm0 - #define STATE2 %xmm4 -@@ -162,1409 +64,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff - #define TKEYP T1 - #endif - --.macro FUNC_SAVE -- push %r12 -- push %r13 -- push %r14 --# --# states of %xmm registers %xmm6:%xmm15 not saved --# all %xmm registers are clobbered --# --.endm -- -- --.macro FUNC_RESTORE -- pop %r14 -- pop %r13 -- pop %r12 --.endm -- --# Precompute hashkeys. --# Input: Hash subkey. --# Output: HashKeys stored in gcm_context_data. Only needs to be called --# once per key. --# clobbers r12, and tmp xmm registers. --.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 -- mov \SUBKEY, %r12 -- movdqu (%r12), \TMP3 -- movdqa SHUF_MASK(%rip), \TMP2 -- pshufb \TMP2, \TMP3 -- -- # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) -- -- movdqa \TMP3, \TMP2 -- psllq $1, \TMP3 -- psrlq $63, \TMP2 -- movdqa \TMP2, \TMP1 -- pslldq $8, \TMP2 -- psrldq $8, \TMP1 -- por \TMP2, \TMP3 -- -- # reduce HashKey<<1 -- -- pshufd $0x24, \TMP1, \TMP2 -- pcmpeqd TWOONE(%rip), \TMP2 -- pand POLY(%rip), \TMP2 -- pxor \TMP2, \TMP3 -- movdqu \TMP3, HashKey(%arg2) -- -- movdqa \TMP3, \TMP5 -- pshufd $78, \TMP3, \TMP1 -- pxor \TMP3, \TMP1 -- movdqu \TMP1, HashKey_k(%arg2) -- -- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 --# TMP5 = HashKey^2<<1 (mod poly) -- movdqu \TMP5, HashKey_2(%arg2) --# HashKey_2 = HashKey^2<<1 (mod poly) -- pshufd $78, \TMP5, \TMP1 -- pxor \TMP5, \TMP1 -- movdqu \TMP1, HashKey_2_k(%arg2) -- -- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 --# TMP5 = HashKey^3<<1 (mod poly) -- movdqu \TMP5, HashKey_3(%arg2) -- pshufd $78, \TMP5, \TMP1 -- pxor \TMP5, \TMP1 -- movdqu \TMP1, HashKey_3_k(%arg2) -- -- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 --# TMP5 = HashKey^3<<1 (mod poly) -- movdqu \TMP5, HashKey_4(%arg2) -- pshufd $78, \TMP5, \TMP1 -- pxor \TMP5, \TMP1 -- movdqu \TMP1, HashKey_4_k(%arg2) --.endm -- --# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. --# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 --.macro GCM_INIT Iv SUBKEY AAD AADLEN -- mov \AADLEN, %r11 -- mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length -- xor %r11d, %r11d -- mov %r11, InLen(%arg2) # ctx_data.in_length = 0 -- mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 -- mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 -- mov \Iv, %rax -- movdqu (%rax), %xmm0 -- movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv -- -- movdqa SHUF_MASK(%rip), %xmm2 -- pshufb %xmm2, %xmm0 -- movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv -- -- PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 -- movdqu HashKey(%arg2), %xmm13 -- -- CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ -- %xmm4, %xmm5, %xmm6 --.endm -- --# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context --# struct has been initialized by GCM_INIT. --# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK --# Clobbers rax, r10-r13, and xmm0-xmm15 --.macro GCM_ENC_DEC operation -- movdqu AadHash(%arg2), %xmm8 -- movdqu HashKey(%arg2), %xmm13 -- add %arg5, InLen(%arg2) -- -- xor %r11d, %r11d # initialise the data pointer offset as zero -- PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation -- -- sub %r11, %arg5 # sub partial block data used -- mov %arg5, %r13 # save the number of bytes -- -- and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) -- mov %r13, %r12 -- # Encrypt/Decrypt first few blocks -- -- and $(3<<4), %r12 -- jz .L_initial_num_blocks_is_0_\@ -- cmp $(2<<4), %r12 -- jb .L_initial_num_blocks_is_1_\@ -- je .L_initial_num_blocks_is_2_\@ --.L_initial_num_blocks_is_3_\@: -- INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ --%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation -- sub $48, %r13 -- jmp .L_initial_blocks_\@ --.L_initial_num_blocks_is_2_\@: -- INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ --%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation -- sub $32, %r13 -- jmp .L_initial_blocks_\@ --.L_initial_num_blocks_is_1_\@: -- INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ --%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation -- sub $16, %r13 -- jmp .L_initial_blocks_\@ --.L_initial_num_blocks_is_0_\@: -- INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ --%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation --.L_initial_blocks_\@: -- -- # Main loop - Encrypt/Decrypt remaining blocks -- -- test %r13, %r13 -- je .L_zero_cipher_left_\@ -- sub $64, %r13 -- je .L_four_cipher_left_\@ --.L_crypt_by_4_\@: -- GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ -- %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ -- %xmm7, %xmm8, enc -- add $64, %r11 -- sub $64, %r13 -- jne .L_crypt_by_4_\@ --.L_four_cipher_left_\@: -- GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ --%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 --.L_zero_cipher_left_\@: -- movdqu %xmm8, AadHash(%arg2) -- movdqu %xmm0, CurCount(%arg2) -- -- mov %arg5, %r13 -- and $15, %r13 # %r13 = arg5 (mod 16) -- je .L_multiple_of_16_bytes_\@ -- -- mov %r13, PBlockLen(%arg2) -- -- # Handle the last <16 Byte block separately -- paddd ONE(%rip), %xmm0 # INCR CNT to get Yn -- movdqu %xmm0, CurCount(%arg2) -- movdqa SHUF_MASK(%rip), %xmm10 -- pshufb %xmm10, %xmm0 -- -- ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) -- movdqu %xmm0, PBlockEncKey(%arg2) -- -- cmp $16, %arg5 -- jge .L_large_enough_update_\@ -- -- lea (%arg4,%r11,1), %r10 -- mov %r13, %r12 -- READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 -- jmp .L_data_read_\@ -- --.L_large_enough_update_\@: -- sub $16, %r11 -- add %r13, %r11 -- -- # receive the last <16 Byte block -- movdqu (%arg4, %r11, 1), %xmm1 -- -- sub %r13, %r11 -- add $16, %r11 -- -- lea SHIFT_MASK+16(%rip), %r12 -- # adjust the shuffle mask pointer to be able to shift 16-r13 bytes -- # (r13 is the number of bytes in plaintext mod 16) -- sub %r13, %r12 -- # get the appropriate shuffle mask -- movdqu (%r12), %xmm2 -- # shift right 16-r13 bytes -- pshufb %xmm2, %xmm1 -- --.L_data_read_\@: -- lea ALL_F+16(%rip), %r12 -- sub %r13, %r12 -- --.ifc \operation, dec -- movdqa %xmm1, %xmm2 --.endif -- pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) -- movdqu (%r12), %xmm1 -- # get the appropriate mask to mask out top 16-r13 bytes of xmm0 -- pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 --.ifc \operation, dec -- pand %xmm1, %xmm2 -- movdqa SHUF_MASK(%rip), %xmm10 -- pshufb %xmm10 ,%xmm2 -- -- pxor %xmm2, %xmm8 --.else -- movdqa SHUF_MASK(%rip), %xmm10 -- pshufb %xmm10,%xmm0 -- -- pxor %xmm0, %xmm8 --.endif -- -- movdqu %xmm8, AadHash(%arg2) --.ifc \operation, enc -- # GHASH computation for the last <16 byte block -- movdqa SHUF_MASK(%rip), %xmm10 -- # shuffle xmm0 back to output as ciphertext -- pshufb %xmm10, %xmm0 --.endif -- -- # Output %r13 bytes -- movq %xmm0, %rax -- cmp $8, %r13 -- jle .L_less_than_8_bytes_left_\@ -- mov %rax, (%arg3 , %r11, 1) -- add $8, %r11 -- psrldq $8, %xmm0 -- movq %xmm0, %rax -- sub $8, %r13 --.L_less_than_8_bytes_left_\@: -- mov %al, (%arg3, %r11, 1) -- add $1, %r11 -- shr $8, %rax -- sub $1, %r13 -- jne .L_less_than_8_bytes_left_\@ --.L_multiple_of_16_bytes_\@: --.endm -- --# GCM_COMPLETE Finishes update of tag of last partial block --# Output: Authorization Tag (AUTH_TAG) --# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 --.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN -- movdqu AadHash(%arg2), %xmm8 -- movdqu HashKey(%arg2), %xmm13 -- -- mov PBlockLen(%arg2), %r12 -- -- test %r12, %r12 -- je .L_partial_done\@ -- -- GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 -- --.L_partial_done\@: -- mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) -- shl $3, %r12 # convert into number of bits -- movd %r12d, %xmm15 # len(A) in %xmm15 -- mov InLen(%arg2), %r12 -- shl $3, %r12 # len(C) in bits (*128) -- movq %r12, %xmm1 -- -- pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 -- pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) -- pxor %xmm15, %xmm8 -- GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 -- # final GHASH computation -- movdqa SHUF_MASK(%rip), %xmm10 -- pshufb %xmm10, %xmm8 -- -- movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 -- ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) -- pxor %xmm8, %xmm0 --.L_return_T_\@: -- mov \AUTHTAG, %r10 # %r10 = authTag -- mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len -- cmp $16, %r11 -- je .L_T_16_\@ -- cmp $8, %r11 -- jl .L_T_4_\@ --.L_T_8_\@: -- movq %xmm0, %rax -- mov %rax, (%r10) -- add $8, %r10 -- sub $8, %r11 -- psrldq $8, %xmm0 -- test %r11, %r11 -- je .L_return_T_done_\@ --.L_T_4_\@: -- movd %xmm0, %eax -- mov %eax, (%r10) -- add $4, %r10 -- sub $4, %r11 -- psrldq $4, %xmm0 -- test %r11, %r11 -- je .L_return_T_done_\@ --.L_T_123_\@: -- movd %xmm0, %eax -- cmp $2, %r11 -- jl .L_T_1_\@ -- mov %ax, (%r10) -- cmp $2, %r11 -- je .L_return_T_done_\@ -- add $2, %r10 -- sar $16, %eax --.L_T_1_\@: -- mov %al, (%r10) -- jmp .L_return_T_done_\@ --.L_T_16_\@: -- movdqu %xmm0, (%r10) --.L_return_T_done_\@: --.endm -- --#ifdef __x86_64__ --/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) --* --* --* Input: A and B (128-bits each, bit-reflected) --* Output: C = A*B*x mod poly, (i.e. >>1 ) --* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input --* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. --* --*/ --.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 -- movdqa \GH, \TMP1 -- pshufd $78, \GH, \TMP2 -- pshufd $78, \HK, \TMP3 -- pxor \GH, \TMP2 # TMP2 = a1+a0 -- pxor \HK, \TMP3 # TMP3 = b1+b0 -- pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1 -- pclmulqdq $0x00, \HK, \GH # GH = a0*b0 -- pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) -- pxor \GH, \TMP2 -- pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) -- movdqa \TMP2, \TMP3 -- pslldq $8, \TMP3 # left shift TMP3 2 DWs -- psrldq $8, \TMP2 # right shift TMP2 2 DWs -- pxor \TMP3, \GH -- pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK -- -- # first phase of the reduction -- -- movdqa \GH, \TMP2 -- movdqa \GH, \TMP3 -- movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 -- # in in order to perform -- # independent shifts -- pslld $31, \TMP2 # packed right shift <<31 -- pslld $30, \TMP3 # packed right shift <<30 -- pslld $25, \TMP4 # packed right shift <<25 -- pxor \TMP3, \TMP2 # xor the shifted versions -- pxor \TMP4, \TMP2 -- movdqa \TMP2, \TMP5 -- psrldq $4, \TMP5 # right shift TMP5 1 DW -- pslldq $12, \TMP2 # left shift TMP2 3 DWs -- pxor \TMP2, \GH -- -- # second phase of the reduction -- -- movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 -- # in in order to perform -- # independent shifts -- movdqa \GH,\TMP3 -- movdqa \GH,\TMP4 -- psrld $1,\TMP2 # packed left shift >>1 -- psrld $2,\TMP3 # packed left shift >>2 -- psrld $7,\TMP4 # packed left shift >>7 -- pxor \TMP3,\TMP2 # xor the shifted versions -- pxor \TMP4,\TMP2 -- pxor \TMP5, \TMP2 -- pxor \TMP2, \GH -- pxor \TMP1, \GH # result is in TMP1 --.endm -- --# Reads DLEN bytes starting at DPTR and stores in XMMDst --# where 0 < DLEN < 16 --# Clobbers %rax, DLEN and XMM1 --.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst -- cmp $8, \DLEN -- jl .L_read_lt8_\@ -- mov (\DPTR), %rax -- movq %rax, \XMMDst -- sub $8, \DLEN -- jz .L_done_read_partial_block_\@ -- xor %eax, %eax --.L_read_next_byte_\@: -- shl $8, %rax -- mov 7(\DPTR, \DLEN, 1), %al -- dec \DLEN -- jnz .L_read_next_byte_\@ -- movq %rax, \XMM1 -- pslldq $8, \XMM1 -- por \XMM1, \XMMDst -- jmp .L_done_read_partial_block_\@ --.L_read_lt8_\@: -- xor %eax, %eax --.L_read_next_byte_lt8_\@: -- shl $8, %rax -- mov -1(\DPTR, \DLEN, 1), %al -- dec \DLEN -- jnz .L_read_next_byte_lt8_\@ -- movq %rax, \XMMDst --.L_done_read_partial_block_\@: --.endm -- --# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. --# clobbers r10-11, xmm14 --.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ -- TMP6 TMP7 -- MOVADQ SHUF_MASK(%rip), %xmm14 -- mov \AAD, %r10 # %r10 = AAD -- mov \AADLEN, %r11 # %r11 = aadLen -- pxor \TMP7, \TMP7 -- pxor \TMP6, \TMP6 -- -- cmp $16, %r11 -- jl .L_get_AAD_rest\@ --.L_get_AAD_blocks\@: -- movdqu (%r10), \TMP7 -- pshufb %xmm14, \TMP7 # byte-reflect the AAD data -- pxor \TMP7, \TMP6 -- GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 -- add $16, %r10 -- sub $16, %r11 -- cmp $16, %r11 -- jge .L_get_AAD_blocks\@ -- -- movdqu \TMP6, \TMP7 -- -- /* read the last <16B of AAD */ --.L_get_AAD_rest\@: -- test %r11, %r11 -- je .L_get_AAD_done\@ -- -- READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 -- pshufb %xmm14, \TMP7 # byte-reflect the AAD data -- pxor \TMP6, \TMP7 -- GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 -- movdqu \TMP7, \TMP6 -- --.L_get_AAD_done\@: -- movdqu \TMP6, AadHash(%arg2) --.endm -- --# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks --# between update calls. --# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK --# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context --# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 --.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ -- AAD_HASH operation -- mov PBlockLen(%arg2), %r13 -- test %r13, %r13 -- je .L_partial_block_done_\@ # Leave Macro if no partial blocks -- # Read in input data without over reading -- cmp $16, \PLAIN_CYPH_LEN -- jl .L_fewer_than_16_bytes_\@ -- movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm -- jmp .L_data_read_\@ -- --.L_fewer_than_16_bytes_\@: -- lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 -- mov \PLAIN_CYPH_LEN, %r12 -- READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 -- -- mov PBlockLen(%arg2), %r13 -- --.L_data_read_\@: # Finished reading in data -- -- movdqu PBlockEncKey(%arg2), %xmm9 -- movdqu HashKey(%arg2), %xmm13 -- -- lea SHIFT_MASK(%rip), %r12 -- -- # adjust the shuffle mask pointer to be able to shift r13 bytes -- # r16-r13 is the number of bytes in plaintext mod 16) -- add %r13, %r12 -- movdqu (%r12), %xmm2 # get the appropriate shuffle mask -- pshufb %xmm2, %xmm9 # shift right r13 bytes -- --.ifc \operation, dec -- movdqa %xmm1, %xmm3 -- pxor %xmm1, %xmm9 # Ciphertext XOR E(K, Yn) -- -- mov \PLAIN_CYPH_LEN, %r10 -- add %r13, %r10 -- # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling -- sub $16, %r10 -- # Determine if partial block is not being filled and -- # shift mask accordingly -- jge .L_no_extra_mask_1_\@ -- sub %r10, %r12 --.L_no_extra_mask_1_\@: -- -- movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 -- # get the appropriate mask to mask out bottom r13 bytes of xmm9 -- pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 -- -- pand %xmm1, %xmm3 -- movdqa SHUF_MASK(%rip), %xmm10 -- pshufb %xmm10, %xmm3 -- pshufb %xmm2, %xmm3 -- pxor %xmm3, \AAD_HASH -- -- test %r10, %r10 -- jl .L_partial_incomplete_1_\@ -- -- # GHASH computation for the last <16 Byte block -- GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 -- xor %eax, %eax -- -- mov %rax, PBlockLen(%arg2) -- jmp .L_dec_done_\@ --.L_partial_incomplete_1_\@: -- add \PLAIN_CYPH_LEN, PBlockLen(%arg2) --.L_dec_done_\@: -- movdqu \AAD_HASH, AadHash(%arg2) --.else -- pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) -- -- mov \PLAIN_CYPH_LEN, %r10 -- add %r13, %r10 -- # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling -- sub $16, %r10 -- # Determine if partial block is not being filled and -- # shift mask accordingly -- jge .L_no_extra_mask_2_\@ -- sub %r10, %r12 --.L_no_extra_mask_2_\@: -- -- movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 -- # get the appropriate mask to mask out bottom r13 bytes of xmm9 -- pand %xmm1, %xmm9 -- -- movdqa SHUF_MASK(%rip), %xmm1 -- pshufb %xmm1, %xmm9 -- pshufb %xmm2, %xmm9 -- pxor %xmm9, \AAD_HASH -- -- test %r10, %r10 -- jl .L_partial_incomplete_2_\@ -- -- # GHASH computation for the last <16 Byte block -- GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 -- xor %eax, %eax -- -- mov %rax, PBlockLen(%arg2) -- jmp .L_encode_done_\@ --.L_partial_incomplete_2_\@: -- add \PLAIN_CYPH_LEN, PBlockLen(%arg2) --.L_encode_done_\@: -- movdqu \AAD_HASH, AadHash(%arg2) -- -- movdqa SHUF_MASK(%rip), %xmm10 -- # shuffle xmm9 back to output as ciphertext -- pshufb %xmm10, %xmm9 -- pshufb %xmm2, %xmm9 --.endif -- # output encrypted Bytes -- test %r10, %r10 -- jl .L_partial_fill_\@ -- mov %r13, %r12 -- mov $16, %r13 -- # Set r13 to be the number of bytes to write out -- sub %r12, %r13 -- jmp .L_count_set_\@ --.L_partial_fill_\@: -- mov \PLAIN_CYPH_LEN, %r13 --.L_count_set_\@: -- movdqa %xmm9, %xmm0 -- movq %xmm0, %rax -- cmp $8, %r13 -- jle .L_less_than_8_bytes_left_\@ -- -- mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) -- add $8, \DATA_OFFSET -- psrldq $8, %xmm0 -- movq %xmm0, %rax -- sub $8, %r13 --.L_less_than_8_bytes_left_\@: -- movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) -- add $1, \DATA_OFFSET -- shr $8, %rax -- sub $1, %r13 -- jne .L_less_than_8_bytes_left_\@ --.L_partial_block_done_\@: --.endm # PARTIAL_BLOCK -- --/* --* if a = number of total plaintext bytes --* b = floor(a/16) --* num_initial_blocks = b mod 4 --* encrypt the initial num_initial_blocks blocks and apply ghash on --* the ciphertext --* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers --* are clobbered --* arg1, %arg2, %arg3 are used as a pointer only, not modified --*/ -- -- --.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -- XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation -- MOVADQ SHUF_MASK(%rip), %xmm14 -- -- movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 -- -- # start AES for num_initial_blocks blocks -- -- movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 -- --.if (\i == 5) || (\i == 6) || (\i == 7) -- -- MOVADQ ONE(%RIP),\TMP1 -- MOVADQ 0(%arg1),\TMP2 --.irpc index, \i_seq -- paddd \TMP1, \XMM0 # INCR Y0 --.ifc \operation, dec -- movdqa \XMM0, %xmm\index --.else -- MOVADQ \XMM0, %xmm\index --.endif -- pshufb %xmm14, %xmm\index # perform a 16 byte swap -- pxor \TMP2, %xmm\index --.endr -- lea 0x10(%arg1),%r10 -- mov keysize,%eax -- shr $2,%eax # 128->4, 192->6, 256->8 -- add $5,%eax # 128->9, 192->11, 256->13 -- --.Laes_loop_initial_\@: -- MOVADQ (%r10),\TMP1 --.irpc index, \i_seq -- aesenc \TMP1, %xmm\index --.endr -- add $16,%r10 -- sub $1,%eax -- jnz .Laes_loop_initial_\@ -- -- MOVADQ (%r10), \TMP1 --.irpc index, \i_seq -- aesenclast \TMP1, %xmm\index # Last Round --.endr --.irpc index, \i_seq -- movdqu (%arg4 , %r11, 1), \TMP1 -- pxor \TMP1, %xmm\index -- movdqu %xmm\index, (%arg3 , %r11, 1) -- # write back plaintext/ciphertext for num_initial_blocks -- add $16, %r11 -- --.ifc \operation, dec -- movdqa \TMP1, %xmm\index --.endif -- pshufb %xmm14, %xmm\index -- -- # prepare plaintext/ciphertext for GHASH computation --.endr --.endif -- -- # apply GHASH on num_initial_blocks blocks -- --.if \i == 5 -- pxor %xmm5, %xmm6 -- GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -- pxor %xmm6, %xmm7 -- GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -- pxor %xmm7, %xmm8 -- GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 --.elseif \i == 6 -- pxor %xmm6, %xmm7 -- GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -- pxor %xmm7, %xmm8 -- GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 --.elseif \i == 7 -- pxor %xmm7, %xmm8 -- GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 --.endif -- cmp $64, %r13 -- jl .L_initial_blocks_done\@ -- # no need for precomputed values --/* --* --* Precomputations for HashKey parallel with encryption of first 4 blocks. --* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i --*/ -- MOVADQ ONE(%RIP),\TMP1 -- paddd \TMP1, \XMM0 # INCR Y0 -- MOVADQ \XMM0, \XMM1 -- pshufb %xmm14, \XMM1 # perform a 16 byte swap -- -- paddd \TMP1, \XMM0 # INCR Y0 -- MOVADQ \XMM0, \XMM2 -- pshufb %xmm14, \XMM2 # perform a 16 byte swap -- -- paddd \TMP1, \XMM0 # INCR Y0 -- MOVADQ \XMM0, \XMM3 -- pshufb %xmm14, \XMM3 # perform a 16 byte swap -- -- paddd \TMP1, \XMM0 # INCR Y0 -- MOVADQ \XMM0, \XMM4 -- pshufb %xmm14, \XMM4 # perform a 16 byte swap -- -- MOVADQ 0(%arg1),\TMP1 -- pxor \TMP1, \XMM1 -- pxor \TMP1, \XMM2 -- pxor \TMP1, \XMM3 -- pxor \TMP1, \XMM4 --.irpc index, 1234 # do 4 rounds -- movaps 0x10*\index(%arg1), \TMP1 -- aesenc \TMP1, \XMM1 -- aesenc \TMP1, \XMM2 -- aesenc \TMP1, \XMM3 -- aesenc \TMP1, \XMM4 --.endr --.irpc index, 56789 # do next 5 rounds -- movaps 0x10*\index(%arg1), \TMP1 -- aesenc \TMP1, \XMM1 -- aesenc \TMP1, \XMM2 -- aesenc \TMP1, \XMM3 -- aesenc \TMP1, \XMM4 --.endr -- lea 0xa0(%arg1),%r10 -- mov keysize,%eax -- shr $2,%eax # 128->4, 192->6, 256->8 -- sub $4,%eax # 128->0, 192->2, 256->4 -- jz .Laes_loop_pre_done\@ -- --.Laes_loop_pre_\@: -- MOVADQ (%r10),\TMP2 --.irpc index, 1234 -- aesenc \TMP2, %xmm\index --.endr -- add $16,%r10 -- sub $1,%eax -- jnz .Laes_loop_pre_\@ -- --.Laes_loop_pre_done\@: -- MOVADQ (%r10), \TMP2 -- aesenclast \TMP2, \XMM1 -- aesenclast \TMP2, \XMM2 -- aesenclast \TMP2, \XMM3 -- aesenclast \TMP2, \XMM4 -- movdqu 16*0(%arg4 , %r11 , 1), \TMP1 -- pxor \TMP1, \XMM1 --.ifc \operation, dec -- movdqu \XMM1, 16*0(%arg3 , %r11 , 1) -- movdqa \TMP1, \XMM1 --.endif -- movdqu 16*1(%arg4 , %r11 , 1), \TMP1 -- pxor \TMP1, \XMM2 --.ifc \operation, dec -- movdqu \XMM2, 16*1(%arg3 , %r11 , 1) -- movdqa \TMP1, \XMM2 --.endif -- movdqu 16*2(%arg4 , %r11 , 1), \TMP1 -- pxor \TMP1, \XMM3 --.ifc \operation, dec -- movdqu \XMM3, 16*2(%arg3 , %r11 , 1) -- movdqa \TMP1, \XMM3 --.endif -- movdqu 16*3(%arg4 , %r11 , 1), \TMP1 -- pxor \TMP1, \XMM4 --.ifc \operation, dec -- movdqu \XMM4, 16*3(%arg3 , %r11 , 1) -- movdqa \TMP1, \XMM4 --.else -- movdqu \XMM1, 16*0(%arg3 , %r11 , 1) -- movdqu \XMM2, 16*1(%arg3 , %r11 , 1) -- movdqu \XMM3, 16*2(%arg3 , %r11 , 1) -- movdqu \XMM4, 16*3(%arg3 , %r11 , 1) --.endif -- -- add $64, %r11 -- pshufb %xmm14, \XMM1 # perform a 16 byte swap -- pxor \XMMDst, \XMM1 --# combine GHASHed value with the corresponding ciphertext -- pshufb %xmm14, \XMM2 # perform a 16 byte swap -- pshufb %xmm14, \XMM3 # perform a 16 byte swap -- pshufb %xmm14, \XMM4 # perform a 16 byte swap -- --.L_initial_blocks_done\@: -- --.endm -- --/* --* encrypt 4 blocks at a time --* ghash the 4 previously encrypted ciphertext blocks --* arg1, %arg3, %arg4 are used as pointers only, not modified --* %r11 is the data offset value --*/ --.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \ --TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation -- -- movdqa \XMM1, \XMM5 -- movdqa \XMM2, \XMM6 -- movdqa \XMM3, \XMM7 -- movdqa \XMM4, \XMM8 -- -- movdqa SHUF_MASK(%rip), %xmm15 -- # multiply TMP5 * HashKey using karatsuba -- -- movdqa \XMM5, \TMP4 -- pshufd $78, \XMM5, \TMP6 -- pxor \XMM5, \TMP6 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqu HashKey_4(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 -- movdqa \XMM0, \XMM1 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqa \XMM0, \XMM2 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqa \XMM0, \XMM3 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqa \XMM0, \XMM4 -- pshufb %xmm15, \XMM1 # perform a 16 byte swap -- pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 -- pshufb %xmm15, \XMM2 # perform a 16 byte swap -- pshufb %xmm15, \XMM3 # perform a 16 byte swap -- pshufb %xmm15, \XMM4 # perform a 16 byte swap -- -- pxor (%arg1), \XMM1 -- pxor (%arg1), \XMM2 -- pxor (%arg1), \XMM3 -- pxor (%arg1), \XMM4 -- movdqu HashKey_4_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) -- movaps 0x10(%arg1), \TMP1 -- aesenc \TMP1, \XMM1 # Round 1 -- aesenc \TMP1, \XMM2 -- aesenc \TMP1, \XMM3 -- aesenc \TMP1, \XMM4 -- movaps 0x20(%arg1), \TMP1 -- aesenc \TMP1, \XMM1 # Round 2 -- aesenc \TMP1, \XMM2 -- aesenc \TMP1, \XMM3 -- aesenc \TMP1, \XMM4 -- movdqa \XMM6, \TMP1 -- pshufd $78, \XMM6, \TMP2 -- pxor \XMM6, \TMP2 -- movdqu HashKey_3(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 -- movaps 0x30(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 3 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 -- movaps 0x40(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 4 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- movdqu HashKey_3_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movaps 0x50(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 5 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pxor \TMP1, \TMP4 --# accumulate the results in TMP4:XMM5, TMP6 holds the middle part -- pxor \XMM6, \XMM5 -- pxor \TMP2, \TMP6 -- movdqa \XMM7, \TMP1 -- pshufd $78, \XMM7, \TMP2 -- pxor \XMM7, \TMP2 -- movdqu HashKey_2(%arg2), \TMP5 -- -- # Multiply TMP5 * HashKey using karatsuba -- -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- movaps 0x60(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 6 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 -- movaps 0x70(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 7 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- movdqu HashKey_2_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movaps 0x80(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 8 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pxor \TMP1, \TMP4 --# accumulate the results in TMP4:XMM5, TMP6 holds the middle part -- pxor \XMM7, \XMM5 -- pxor \TMP2, \TMP6 -- -- # Multiply XMM8 * HashKey -- # XMM8 and TMP5 hold the values for the two operands -- -- movdqa \XMM8, \TMP1 -- pshufd $78, \XMM8, \TMP2 -- pxor \XMM8, \TMP2 -- movdqu HashKey(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- movaps 0x90(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 9 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 -- lea 0xa0(%arg1),%r10 -- mov keysize,%eax -- shr $2,%eax # 128->4, 192->6, 256->8 -- sub $4,%eax # 128->0, 192->2, 256->4 -- jz .Laes_loop_par_enc_done\@ -- --.Laes_loop_par_enc\@: -- MOVADQ (%r10),\TMP3 --.irpc index, 1234 -- aesenc \TMP3, %xmm\index --.endr -- add $16,%r10 -- sub $1,%eax -- jnz .Laes_loop_par_enc\@ -- --.Laes_loop_par_enc_done\@: -- MOVADQ (%r10), \TMP3 -- aesenclast \TMP3, \XMM1 # Round 10 -- aesenclast \TMP3, \XMM2 -- aesenclast \TMP3, \XMM3 -- aesenclast \TMP3, \XMM4 -- movdqu HashKey_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movdqu (%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK -- movdqu 16(%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK -- movdqu 32(%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK -- movdqu 48(%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK -- movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer -- movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer -- movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer -- movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer -- pshufb %xmm15, \XMM1 # perform a 16 byte swap -- pshufb %xmm15, \XMM2 # perform a 16 byte swap -- pshufb %xmm15, \XMM3 # perform a 16 byte swap -- pshufb %xmm15, \XMM4 # perform a 16 byte swap -- -- pxor \TMP4, \TMP1 -- pxor \XMM8, \XMM5 -- pxor \TMP6, \TMP2 -- pxor \TMP1, \TMP2 -- pxor \XMM5, \TMP2 -- movdqa \TMP2, \TMP3 -- pslldq $8, \TMP3 # left shift TMP3 2 DWs -- psrldq $8, \TMP2 # right shift TMP2 2 DWs -- pxor \TMP3, \XMM5 -- pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 -- -- # first phase of reduction -- -- movdqa \XMM5, \TMP2 -- movdqa \XMM5, \TMP3 -- movdqa \XMM5, \TMP4 --# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently -- pslld $31, \TMP2 # packed right shift << 31 -- pslld $30, \TMP3 # packed right shift << 30 -- pslld $25, \TMP4 # packed right shift << 25 -- pxor \TMP3, \TMP2 # xor the shifted versions -- pxor \TMP4, \TMP2 -- movdqa \TMP2, \TMP5 -- psrldq $4, \TMP5 # right shift T5 1 DW -- pslldq $12, \TMP2 # left shift T2 3 DWs -- pxor \TMP2, \XMM5 -- -- # second phase of reduction -- -- movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 -- movdqa \XMM5,\TMP3 -- movdqa \XMM5,\TMP4 -- psrld $1, \TMP2 # packed left shift >>1 -- psrld $2, \TMP3 # packed left shift >>2 -- psrld $7, \TMP4 # packed left shift >>7 -- pxor \TMP3,\TMP2 # xor the shifted versions -- pxor \TMP4,\TMP2 -- pxor \TMP5, \TMP2 -- pxor \TMP2, \XMM5 -- pxor \TMP1, \XMM5 # result is in TMP1 -- -- pxor \XMM5, \XMM1 --.endm -- --/* --* decrypt 4 blocks at a time --* ghash the 4 previously decrypted ciphertext blocks --* arg1, %arg3, %arg4 are used as pointers only, not modified --* %r11 is the data offset value --*/ --.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \ --TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation -- -- movdqa \XMM1, \XMM5 -- movdqa \XMM2, \XMM6 -- movdqa \XMM3, \XMM7 -- movdqa \XMM4, \XMM8 -- -- movdqa SHUF_MASK(%rip), %xmm15 -- # multiply TMP5 * HashKey using karatsuba -- -- movdqa \XMM5, \TMP4 -- pshufd $78, \XMM5, \TMP6 -- pxor \XMM5, \TMP6 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqu HashKey_4(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 -- movdqa \XMM0, \XMM1 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqa \XMM0, \XMM2 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqa \XMM0, \XMM3 -- paddd ONE(%rip), \XMM0 # INCR CNT -- movdqa \XMM0, \XMM4 -- pshufb %xmm15, \XMM1 # perform a 16 byte swap -- pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 -- pshufb %xmm15, \XMM2 # perform a 16 byte swap -- pshufb %xmm15, \XMM3 # perform a 16 byte swap -- pshufb %xmm15, \XMM4 # perform a 16 byte swap -- -- pxor (%arg1), \XMM1 -- pxor (%arg1), \XMM2 -- pxor (%arg1), \XMM3 -- pxor (%arg1), \XMM4 -- movdqu HashKey_4_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) -- movaps 0x10(%arg1), \TMP1 -- aesenc \TMP1, \XMM1 # Round 1 -- aesenc \TMP1, \XMM2 -- aesenc \TMP1, \XMM3 -- aesenc \TMP1, \XMM4 -- movaps 0x20(%arg1), \TMP1 -- aesenc \TMP1, \XMM1 # Round 2 -- aesenc \TMP1, \XMM2 -- aesenc \TMP1, \XMM3 -- aesenc \TMP1, \XMM4 -- movdqa \XMM6, \TMP1 -- pshufd $78, \XMM6, \TMP2 -- pxor \XMM6, \TMP2 -- movdqu HashKey_3(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 -- movaps 0x30(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 3 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 -- movaps 0x40(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 4 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- movdqu HashKey_3_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movaps 0x50(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 5 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pxor \TMP1, \TMP4 --# accumulate the results in TMP4:XMM5, TMP6 holds the middle part -- pxor \XMM6, \XMM5 -- pxor \TMP2, \TMP6 -- movdqa \XMM7, \TMP1 -- pshufd $78, \XMM7, \TMP2 -- pxor \XMM7, \TMP2 -- movdqu HashKey_2(%arg2), \TMP5 -- -- # Multiply TMP5 * HashKey using karatsuba -- -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- movaps 0x60(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 6 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 -- movaps 0x70(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 7 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- movdqu HashKey_2_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movaps 0x80(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 8 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pxor \TMP1, \TMP4 --# accumulate the results in TMP4:XMM5, TMP6 holds the middle part -- pxor \XMM7, \XMM5 -- pxor \TMP2, \TMP6 -- -- # Multiply XMM8 * HashKey -- # XMM8 and TMP5 hold the values for the two operands -- -- movdqa \XMM8, \TMP1 -- pshufd $78, \XMM8, \TMP2 -- pxor \XMM8, \TMP2 -- movdqu HashKey(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- movaps 0x90(%arg1), \TMP3 -- aesenc \TMP3, \XMM1 # Round 9 -- aesenc \TMP3, \XMM2 -- aesenc \TMP3, \XMM3 -- aesenc \TMP3, \XMM4 -- pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 -- lea 0xa0(%arg1),%r10 -- mov keysize,%eax -- shr $2,%eax # 128->4, 192->6, 256->8 -- sub $4,%eax # 128->0, 192->2, 256->4 -- jz .Laes_loop_par_dec_done\@ -- --.Laes_loop_par_dec\@: -- MOVADQ (%r10),\TMP3 --.irpc index, 1234 -- aesenc \TMP3, %xmm\index --.endr -- add $16,%r10 -- sub $1,%eax -- jnz .Laes_loop_par_dec\@ -- --.Laes_loop_par_dec_done\@: -- MOVADQ (%r10), \TMP3 -- aesenclast \TMP3, \XMM1 # last round -- aesenclast \TMP3, \XMM2 -- aesenclast \TMP3, \XMM3 -- aesenclast \TMP3, \XMM4 -- movdqu HashKey_k(%arg2), \TMP5 -- pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movdqu (%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK -- movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer -- movdqa \TMP3, \XMM1 -- movdqu 16(%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK -- movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer -- movdqa \TMP3, \XMM2 -- movdqu 32(%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK -- movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer -- movdqa \TMP3, \XMM3 -- movdqu 48(%arg4,%r11,1), \TMP3 -- pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK -- movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer -- movdqa \TMP3, \XMM4 -- pshufb %xmm15, \XMM1 # perform a 16 byte swap -- pshufb %xmm15, \XMM2 # perform a 16 byte swap -- pshufb %xmm15, \XMM3 # perform a 16 byte swap -- pshufb %xmm15, \XMM4 # perform a 16 byte swap -- -- pxor \TMP4, \TMP1 -- pxor \XMM8, \XMM5 -- pxor \TMP6, \TMP2 -- pxor \TMP1, \TMP2 -- pxor \XMM5, \TMP2 -- movdqa \TMP2, \TMP3 -- pslldq $8, \TMP3 # left shift TMP3 2 DWs -- psrldq $8, \TMP2 # right shift TMP2 2 DWs -- pxor \TMP3, \XMM5 -- pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 -- -- # first phase of reduction -- -- movdqa \XMM5, \TMP2 -- movdqa \XMM5, \TMP3 -- movdqa \XMM5, \TMP4 --# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently -- pslld $31, \TMP2 # packed right shift << 31 -- pslld $30, \TMP3 # packed right shift << 30 -- pslld $25, \TMP4 # packed right shift << 25 -- pxor \TMP3, \TMP2 # xor the shifted versions -- pxor \TMP4, \TMP2 -- movdqa \TMP2, \TMP5 -- psrldq $4, \TMP5 # right shift T5 1 DW -- pslldq $12, \TMP2 # left shift T2 3 DWs -- pxor \TMP2, \XMM5 -- -- # second phase of reduction -- -- movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 -- movdqa \XMM5,\TMP3 -- movdqa \XMM5,\TMP4 -- psrld $1, \TMP2 # packed left shift >>1 -- psrld $2, \TMP3 # packed left shift >>2 -- psrld $7, \TMP4 # packed left shift >>7 -- pxor \TMP3,\TMP2 # xor the shifted versions -- pxor \TMP4,\TMP2 -- pxor \TMP5, \TMP2 -- pxor \TMP2, \XMM5 -- pxor \TMP1, \XMM5 # result is in TMP1 -- -- pxor \XMM5, \XMM1 --.endm -- --/* GHASH the last 4 ciphertext blocks. */ --.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ --TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst -- -- # Multiply TMP6 * HashKey (using Karatsuba) -- -- movdqa \XMM1, \TMP6 -- pshufd $78, \XMM1, \TMP2 -- pxor \XMM1, \TMP2 -- movdqu HashKey_4(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1 -- pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0 -- movdqu HashKey_4_k(%arg2), \TMP4 -- pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- movdqa \XMM1, \XMMDst -- movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 -- -- # Multiply TMP1 * HashKey (using Karatsuba) -- -- movdqa \XMM2, \TMP1 -- pshufd $78, \XMM2, \TMP2 -- pxor \XMM2, \TMP2 -- movdqu HashKey_3(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0 -- movdqu HashKey_3_k(%arg2), \TMP4 -- pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- pxor \TMP1, \TMP6 -- pxor \XMM2, \XMMDst -- pxor \TMP2, \XMM1 --# results accumulated in TMP6, XMMDst, XMM1 -- -- # Multiply TMP1 * HashKey (using Karatsuba) -- -- movdqa \XMM3, \TMP1 -- pshufd $78, \XMM3, \TMP2 -- pxor \XMM3, \TMP2 -- movdqu HashKey_2(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0 -- movdqu HashKey_2_k(%arg2), \TMP4 -- pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- pxor \TMP1, \TMP6 -- pxor \XMM3, \XMMDst -- pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 -- -- # Multiply TMP1 * HashKey (using Karatsuba) -- movdqa \XMM4, \TMP1 -- pshufd $78, \XMM4, \TMP2 -- pxor \XMM4, \TMP2 -- movdqu HashKey(%arg2), \TMP5 -- pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 -- pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0 -- movdqu HashKey_k(%arg2), \TMP4 -- pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) -- pxor \TMP1, \TMP6 -- pxor \XMM4, \XMMDst -- pxor \XMM1, \TMP2 -- pxor \TMP6, \TMP2 -- pxor \XMMDst, \TMP2 -- # middle section of the temp results combined as in karatsuba algorithm -- movdqa \TMP2, \TMP4 -- pslldq $8, \TMP4 # left shift TMP4 2 DWs -- psrldq $8, \TMP2 # right shift TMP2 2 DWs -- pxor \TMP4, \XMMDst -- pxor \TMP2, \TMP6 --# TMP6:XMMDst holds the result of the accumulated carry-less multiplications -- # first phase of the reduction -- movdqa \XMMDst, \TMP2 -- movdqa \XMMDst, \TMP3 -- movdqa \XMMDst, \TMP4 --# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently -- pslld $31, \TMP2 # packed right shifting << 31 -- pslld $30, \TMP3 # packed right shifting << 30 -- pslld $25, \TMP4 # packed right shifting << 25 -- pxor \TMP3, \TMP2 # xor the shifted versions -- pxor \TMP4, \TMP2 -- movdqa \TMP2, \TMP7 -- psrldq $4, \TMP7 # right shift TMP7 1 DW -- pslldq $12, \TMP2 # left shift TMP2 3 DWs -- pxor \TMP2, \XMMDst -- -- # second phase of the reduction -- movdqa \XMMDst, \TMP2 -- # make 3 copies of XMMDst for doing 3 shift operations -- movdqa \XMMDst, \TMP3 -- movdqa \XMMDst, \TMP4 -- psrld $1, \TMP2 # packed left shift >> 1 -- psrld $2, \TMP3 # packed left shift >> 2 -- psrld $7, \TMP4 # packed left shift >> 7 -- pxor \TMP3, \TMP2 # xor the shifted versions -- pxor \TMP4, \TMP2 -- pxor \TMP7, \TMP2 -- pxor \TMP2, \XMMDst -- pxor \TMP6, \XMMDst # reduced result is in XMMDst --.endm -- -- --/* Encryption of a single block --* uses eax & r10 --*/ -- --.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 -- -- pxor (%arg1), \XMM0 -- mov keysize,%eax -- shr $2,%eax # 128->4, 192->6, 256->8 -- add $5,%eax # 128->9, 192->11, 256->13 -- lea 16(%arg1), %r10 # get first expanded key address -- --_esb_loop_\@: -- MOVADQ (%r10),\TMP1 -- aesenc \TMP1,\XMM0 -- add $16,%r10 -- sub $1,%eax -- jnz _esb_loop_\@ -- -- MOVADQ (%r10),\TMP1 -- aesenclast \TMP1,\XMM0 --.endm -- --/***************************************************************************** --* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. --* struct gcm_context_data *data, --* // context data --* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) --* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) --* // concatenated with 0x00000001. 16-byte aligned pointer. --* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. --* const u8 *aad, // Additional Authentication Data (AAD) --* u64 aad_len) // Length of AAD in bytes. --*/ --SYM_FUNC_START(aesni_gcm_init) -- FUNC_SAVE -- GCM_INIT %arg3, %arg4,%arg5, %arg6 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_init) -- --/***************************************************************************** --* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. --* struct gcm_context_data *data, --* // context data --* u8 *out, // Ciphertext output. Encrypt in-place is allowed. --* const u8 *in, // Plaintext input --* u64 plaintext_len, // Length of data in bytes for encryption. --*/ --SYM_FUNC_START(aesni_gcm_enc_update) -- FUNC_SAVE -- GCM_ENC_DEC enc -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_enc_update) -- --/***************************************************************************** --* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. --* struct gcm_context_data *data, --* // context data --* u8 *out, // Ciphertext output. Encrypt in-place is allowed. --* const u8 *in, // Plaintext input --* u64 plaintext_len, // Length of data in bytes for encryption. --*/ --SYM_FUNC_START(aesni_gcm_dec_update) -- FUNC_SAVE -- GCM_ENC_DEC dec -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_dec_update) -- --/***************************************************************************** --* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. --* struct gcm_context_data *data, --* // context data --* u8 *auth_tag, // Authenticated Tag output. --* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), --* // 12 or 8. --*/ --SYM_FUNC_START(aesni_gcm_finalize) -- FUNC_SAVE -- GCM_COMPLETE %arg3 %arg4 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_finalize) -- --#endif -- - SYM_FUNC_START_LOCAL(_key_expansion_256a) - pshufd $0b11111111, %xmm1, %xmm1 - shufps $0b00010000, %xmm0, %xmm4 -diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S -deleted file mode 100644 -index 8c9749ed0651..000000000000 ---- a/arch/x86/crypto/aesni-intel_avx-x86_64.S -+++ /dev/null -@@ -1,2804 +0,0 @@ --######################################################################## --# Copyright (c) 2013, Intel Corporation --# --# This software is available to you under a choice of one of two --# licenses. You may choose to be licensed under the terms of the GNU --# General Public License (GPL) Version 2, available from the file --# COPYING in the main directory of this source tree, or the --# OpenIB.org BSD license below: --# --# Redistribution and use in source and binary forms, with or without --# modification, are permitted provided that the following conditions are --# met: --# --# * Redistributions of source code must retain the above copyright --# notice, this list of conditions and the following disclaimer. --# --# * Redistributions in binary form must reproduce the above copyright --# notice, this list of conditions and the following disclaimer in the --# documentation and/or other materials provided with the --# distribution. --# --# * Neither the name of the Intel Corporation nor the names of its --# contributors may be used to endorse or promote products derived from --# this software without specific prior written permission. --# --# --# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY --# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE --# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR --# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR --# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, --# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, --# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR --# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF --# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING --# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS --# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --######################################################################## --## --## Authors: --## Erdinc Ozturk --## Vinodh Gopal --## James Guilford --## Tim Chen --## --## References: --## This code was derived and highly optimized from the code described in paper: --## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation --## on Intel Architecture Processors. August, 2010 --## The details of the implementation is explained in: --## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode --## on Intel Architecture Processors. October, 2012. --## --## Assumptions: --## --## --## --## iv: --## 0 1 2 3 --## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | Salt (From the SA) | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | Initialization Vector | --## | (This is the sequence number from IPSec header) | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | 0x1 | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## --## --## --## AAD: --## AAD padded to 128 bits with 0 --## for example, assume AAD is a u32 vector --## --## if AAD is 8 bytes: --## AAD[3] = {A0, A1}# --## padded AAD in xmm register = {A1 A0 0 0} --## --## 0 1 2 3 --## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | SPI (A1) | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | 32-bit Sequence Number (A0) | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | 0x0 | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## --## AAD Format with 32-bit Sequence Number --## --## if AAD is 12 bytes: --## AAD[3] = {A0, A1, A2}# --## padded AAD in xmm register = {A2 A1 A0 0} --## --## 0 1 2 3 --## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | SPI (A2) | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | 64-bit Extended Sequence Number {A1,A0} | --## | | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## | 0x0 | --## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ --## --## AAD Format with 64-bit Extended Sequence Number --## --## --## aadLen: --## from the definition of the spec, aadLen can only be 8 or 12 bytes. --## The code additionally supports aadLen of length 16 bytes. --## --## TLen: --## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. --## --## poly = x^128 + x^127 + x^126 + x^121 + 1 --## throughout the code, one tab and two tab indentations are used. one tab is --## for GHASH part, two tabs is for AES part. --## -- --#include -- --# constants in mergeable sections, linker can reorder and merge --.section .rodata.cst16.POLY, "aM", @progbits, 16 --.align 16 --POLY: .octa 0xC2000000000000000000000000000001 -- --.section .rodata.cst16.POLY2, "aM", @progbits, 16 --.align 16 --POLY2: .octa 0xC20000000000000000000001C2000000 -- --.section .rodata.cst16.TWOONE, "aM", @progbits, 16 --.align 16 --TWOONE: .octa 0x00000001000000000000000000000001 -- --.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 --.align 16 --SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F -- --.section .rodata.cst16.ONE, "aM", @progbits, 16 --.align 16 --ONE: .octa 0x00000000000000000000000000000001 -- --.section .rodata.cst16.ONEf, "aM", @progbits, 16 --.align 16 --ONEf: .octa 0x01000000000000000000000000000000 -- --# order of these constants should not change. --# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F --.section .rodata, "a", @progbits --.align 16 --SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 --ALL_F: .octa 0xffffffffffffffffffffffffffffffff -- .octa 0x00000000000000000000000000000000 -- --.text -- -- --#define AadHash 16*0 --#define AadLen 16*1 --#define InLen (16*1)+8 --#define PBlockEncKey 16*2 --#define OrigIV 16*3 --#define CurCount 16*4 --#define PBlockLen 16*5 -- --HashKey = 16*6 # store HashKey <<1 mod poly here --HashKey_2 = 16*7 # store HashKey^2 <<1 mod poly here --HashKey_3 = 16*8 # store HashKey^3 <<1 mod poly here --HashKey_4 = 16*9 # store HashKey^4 <<1 mod poly here --HashKey_5 = 16*10 # store HashKey^5 <<1 mod poly here --HashKey_6 = 16*11 # store HashKey^6 <<1 mod poly here --HashKey_7 = 16*12 # store HashKey^7 <<1 mod poly here --HashKey_8 = 16*13 # store HashKey^8 <<1 mod poly here --HashKey_k = 16*14 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) --HashKey_2_k = 16*15 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) --HashKey_3_k = 16*16 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) --HashKey_4_k = 16*17 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) --HashKey_5_k = 16*18 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) --HashKey_6_k = 16*19 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) --HashKey_7_k = 16*20 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) --HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) -- --#define arg1 %rdi --#define arg2 %rsi --#define arg3 %rdx --#define arg4 %rcx --#define arg5 %r8 --#define arg6 %r9 --#define keysize 2*15*16(arg1) -- --i = 0 --j = 0 -- --out_order = 0 --in_order = 1 --DEC = 0 --ENC = 1 -- --.macro define_reg r n --reg_\r = %xmm\n --.endm -- --.macro setreg --.altmacro --define_reg i %i --define_reg j %j --.noaltmacro --.endm -- --TMP1 = 16*0 # Temporary storage for AAD --TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) --TMP3 = 16*2 # Temporary storage for AES State 3 --TMP4 = 16*3 # Temporary storage for AES State 4 --TMP5 = 16*4 # Temporary storage for AES State 5 --TMP6 = 16*5 # Temporary storage for AES State 6 --TMP7 = 16*6 # Temporary storage for AES State 7 --TMP8 = 16*7 # Temporary storage for AES State 8 -- --VARIABLE_OFFSET = 16*8 -- --################################ --# Utility Macros --################################ -- --.macro FUNC_SAVE -- push %r12 -- push %r13 -- push %r15 -- -- push %rbp -- mov %rsp, %rbp -- -- sub $VARIABLE_OFFSET, %rsp -- and $~63, %rsp # align rsp to 64 bytes --.endm -- --.macro FUNC_RESTORE -- mov %rbp, %rsp -- pop %rbp -- -- pop %r15 -- pop %r13 -- pop %r12 --.endm -- --# Encryption of a single block --.macro ENCRYPT_SINGLE_BLOCK REP XMM0 -- vpxor (arg1), \XMM0, \XMM0 -- i = 1 -- setreg --.rep \REP -- vaesenc 16*i(arg1), \XMM0, \XMM0 -- i = (i+1) -- setreg --.endr -- vaesenclast 16*i(arg1), \XMM0, \XMM0 --.endm -- --# combined for GCM encrypt and decrypt functions --# clobbering all xmm registers --# clobbering r10, r11, r12, r13, r15, rax --.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP -- vmovdqu AadHash(arg2), %xmm8 -- vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey -- add arg5, InLen(arg2) -- -- # initialize the data pointer offset as zero -- xor %r11d, %r11d -- -- PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC -- sub %r11, arg5 -- -- mov arg5, %r13 # save the number of bytes of plaintext/ciphertext -- and $-16, %r13 # r13 = r13 - (r13 mod 16) -- -- mov %r13, %r12 -- shr $4, %r12 -- and $7, %r12 -- jz .L_initial_num_blocks_is_0\@ -- -- cmp $7, %r12 -- je .L_initial_num_blocks_is_7\@ -- cmp $6, %r12 -- je .L_initial_num_blocks_is_6\@ -- cmp $5, %r12 -- je .L_initial_num_blocks_is_5\@ -- cmp $4, %r12 -- je .L_initial_num_blocks_is_4\@ -- cmp $3, %r12 -- je .L_initial_num_blocks_is_3\@ -- cmp $2, %r12 -- je .L_initial_num_blocks_is_2\@ -- -- jmp .L_initial_num_blocks_is_1\@ -- --.L_initial_num_blocks_is_7\@: -- \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*7, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_6\@: -- \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*6, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_5\@: -- \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*5, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_4\@: -- \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*4, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_3\@: -- \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*3, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_2\@: -- \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*2, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_1\@: -- \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- sub $16*1, %r13 -- jmp .L_initial_blocks_encrypted\@ -- --.L_initial_num_blocks_is_0\@: -- \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -- -- --.L_initial_blocks_encrypted\@: -- test %r13, %r13 -- je .L_zero_cipher_left\@ -- -- sub $128, %r13 -- je .L_eight_cipher_left\@ -- -- -- -- -- vmovd %xmm9, %r15d -- and $255, %r15d -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- -- --.L_encrypt_by_8_new\@: -- cmp $(255-8), %r15d -- jg .L_encrypt_by_8\@ -- -- -- -- add $8, %r15b -- \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC -- add $128, %r11 -- sub $128, %r13 -- jne .L_encrypt_by_8_new\@ -- -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- jmp .L_eight_cipher_left\@ -- --.L_encrypt_by_8\@: -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- add $8, %r15b -- \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- add $128, %r11 -- sub $128, %r13 -- jne .L_encrypt_by_8_new\@ -- -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- -- -- -- --.L_eight_cipher_left\@: -- \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 -- -- --.L_zero_cipher_left\@: -- vmovdqu %xmm14, AadHash(arg2) -- vmovdqu %xmm9, CurCount(arg2) -- -- # check for 0 length -- mov arg5, %r13 -- and $15, %r13 # r13 = (arg5 mod 16) -- -- je .L_multiple_of_16_bytes\@ -- -- # handle the last <16 Byte block separately -- -- mov %r13, PBlockLen(arg2) -- -- vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn -- vmovdqu %xmm9, CurCount(arg2) -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- -- ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn) -- vmovdqu %xmm9, PBlockEncKey(arg2) -- -- cmp $16, arg5 -- jge .L_large_enough_update\@ -- -- lea (arg4,%r11,1), %r10 -- mov %r13, %r12 -- -- READ_PARTIAL_BLOCK %r10 %r12 %xmm1 -- -- lea SHIFT_MASK+16(%rip), %r12 -- sub %r13, %r12 # adjust the shuffle mask pointer to be -- # able to shift 16-r13 bytes (r13 is the -- # number of bytes in plaintext mod 16) -- -- jmp .L_final_ghash_mul\@ -- --.L_large_enough_update\@: -- sub $16, %r11 -- add %r13, %r11 -- -- # receive the last <16 Byte block -- vmovdqu (arg4, %r11, 1), %xmm1 -- -- sub %r13, %r11 -- add $16, %r11 -- -- lea SHIFT_MASK+16(%rip), %r12 -- # adjust the shuffle mask pointer to be able to shift 16-r13 bytes -- # (r13 is the number of bytes in plaintext mod 16) -- sub %r13, %r12 -- # get the appropriate shuffle mask -- vmovdqu (%r12), %xmm2 -- # shift right 16-r13 bytes -- vpshufb %xmm2, %xmm1, %xmm1 -- --.L_final_ghash_mul\@: -- .if \ENC_DEC == DEC -- vmovdqa %xmm1, %xmm2 -- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) -- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to -- # mask out top 16-r13 bytes of xmm9 -- vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 -- vpand %xmm1, %xmm2, %xmm2 -- vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 -- vpxor %xmm2, %xmm14, %xmm14 -- -- vmovdqu %xmm14, AadHash(arg2) -- .else -- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) -- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to -- # mask out top 16-r13 bytes of xmm9 -- vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -- vpxor %xmm9, %xmm14, %xmm14 -- -- vmovdqu %xmm14, AadHash(arg2) -- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext -- .endif -- -- -- ############################# -- # output r13 Bytes -- vmovq %xmm9, %rax -- cmp $8, %r13 -- jle .L_less_than_8_bytes_left\@ -- -- mov %rax, (arg3 , %r11) -- add $8, %r11 -- vpsrldq $8, %xmm9, %xmm9 -- vmovq %xmm9, %rax -- sub $8, %r13 -- --.L_less_than_8_bytes_left\@: -- movb %al, (arg3 , %r11) -- add $1, %r11 -- shr $8, %rax -- sub $1, %r13 -- jne .L_less_than_8_bytes_left\@ -- ############################# -- --.L_multiple_of_16_bytes\@: --.endm -- -- --# GCM_COMPLETE Finishes update of tag of last partial block --# Output: Authorization Tag (AUTH_TAG) --# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 --.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN -- vmovdqu AadHash(arg2), %xmm14 -- vmovdqu HashKey(arg2), %xmm13 -- -- mov PBlockLen(arg2), %r12 -- test %r12, %r12 -- je .L_partial_done\@ -- -- #GHASH computation for the last <16 Byte block -- \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 -- --.L_partial_done\@: -- mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) -- shl $3, %r12 # convert into number of bits -- vmovd %r12d, %xmm15 # len(A) in xmm15 -- -- mov InLen(arg2), %r12 -- shl $3, %r12 # len(C) in bits (*128) -- vmovq %r12, %xmm1 -- vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 -- vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) -- -- vpxor %xmm15, %xmm14, %xmm14 -- \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation -- vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap -- -- vmovdqu OrigIV(arg2), %xmm9 -- -- ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0) -- -- vpxor %xmm14, %xmm9, %xmm9 -- -- -- --.L_return_T\@: -- mov \AUTH_TAG, %r10 # r10 = authTag -- mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len -- -- cmp $16, %r11 -- je .L_T_16\@ -- -- cmp $8, %r11 -- jl .L_T_4\@ -- --.L_T_8\@: -- vmovq %xmm9, %rax -- mov %rax, (%r10) -- add $8, %r10 -- sub $8, %r11 -- vpsrldq $8, %xmm9, %xmm9 -- test %r11, %r11 -- je .L_return_T_done\@ --.L_T_4\@: -- vmovd %xmm9, %eax -- mov %eax, (%r10) -- add $4, %r10 -- sub $4, %r11 -- vpsrldq $4, %xmm9, %xmm9 -- test %r11, %r11 -- je .L_return_T_done\@ --.L_T_123\@: -- vmovd %xmm9, %eax -- cmp $2, %r11 -- jl .L_T_1\@ -- mov %ax, (%r10) -- cmp $2, %r11 -- je .L_return_T_done\@ -- add $2, %r10 -- sar $16, %eax --.L_T_1\@: -- mov %al, (%r10) -- jmp .L_return_T_done\@ -- --.L_T_16\@: -- vmovdqu %xmm9, (%r10) -- --.L_return_T_done\@: --.endm -- --.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 -- -- mov \AAD, %r10 # r10 = AAD -- mov \AADLEN, %r12 # r12 = aadLen -- -- -- mov %r12, %r11 -- -- vpxor \T8, \T8, \T8 -- vpxor \T7, \T7, \T7 -- cmp $16, %r11 -- jl .L_get_AAD_rest8\@ --.L_get_AAD_blocks\@: -- vmovdqu (%r10), \T7 -- vpshufb SHUF_MASK(%rip), \T7, \T7 -- vpxor \T7, \T8, \T8 -- \GHASH_MUL \T8, \T2, \T1, \T3, \T4, \T5, \T6 -- add $16, %r10 -- sub $16, %r12 -- sub $16, %r11 -- cmp $16, %r11 -- jge .L_get_AAD_blocks\@ -- vmovdqu \T8, \T7 -- test %r11, %r11 -- je .L_get_AAD_done\@ -- -- vpxor \T7, \T7, \T7 -- -- /* read the last <16B of AAD. since we have at least 4B of -- data right after the AAD (the ICV, and maybe some CT), we can -- read 4B/8B blocks safely, and then get rid of the extra stuff */ --.L_get_AAD_rest8\@: -- cmp $4, %r11 -- jle .L_get_AAD_rest4\@ -- movq (%r10), \T1 -- add $8, %r10 -- sub $8, %r11 -- vpslldq $8, \T1, \T1 -- vpsrldq $8, \T7, \T7 -- vpxor \T1, \T7, \T7 -- jmp .L_get_AAD_rest8\@ --.L_get_AAD_rest4\@: -- test %r11, %r11 -- jle .L_get_AAD_rest0\@ -- mov (%r10), %eax -- movq %rax, \T1 -- add $4, %r10 -- sub $4, %r11 -- vpslldq $12, \T1, \T1 -- vpsrldq $4, \T7, \T7 -- vpxor \T1, \T7, \T7 --.L_get_AAD_rest0\@: -- /* finalize: shift out the extra bytes we read, and align -- left. since pslldq can only shift by an immediate, we use -- vpshufb and a pair of shuffle masks */ -- leaq ALL_F(%rip), %r11 -- subq %r12, %r11 -- vmovdqu 16(%r11), \T1 -- andq $~3, %r11 -- vpshufb (%r11), \T7, \T7 -- vpand \T1, \T7, \T7 --.L_get_AAD_rest_final\@: -- vpshufb SHUF_MASK(%rip), \T7, \T7 -- vpxor \T8, \T7, \T7 -- \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 -- --.L_get_AAD_done\@: -- vmovdqu \T7, AadHash(arg2) --.endm -- --.macro INIT GHASH_MUL PRECOMPUTE -- mov arg6, %r11 -- mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length -- xor %r11d, %r11d -- mov %r11, InLen(arg2) # ctx_data.in_length = 0 -- -- mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0 -- mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0 -- mov arg3, %rax -- movdqu (%rax), %xmm0 -- movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv -- -- vpshufb SHUF_MASK(%rip), %xmm0, %xmm0 -- movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv -- -- vmovdqu (arg4), %xmm6 # xmm6 = HashKey -- -- vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 -- ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey -- vmovdqa %xmm6, %xmm2 -- vpsllq $1, %xmm6, %xmm6 -- vpsrlq $63, %xmm2, %xmm2 -- vmovdqa %xmm2, %xmm1 -- vpslldq $8, %xmm2, %xmm2 -- vpsrldq $8, %xmm1, %xmm1 -- vpor %xmm2, %xmm6, %xmm6 -- #reduction -- vpshufd $0b00100100, %xmm1, %xmm2 -- vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 -- vpand POLY(%rip), %xmm2, %xmm2 -- vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly -- ####################################################################### -- vmovdqu %xmm6, HashKey(arg2) # store HashKey<<1 mod poly -- -- CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0 -- -- \PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 --.endm -- -- --# Reads DLEN bytes starting at DPTR and stores in XMMDst --# where 0 < DLEN < 16 --# Clobbers %rax, DLEN --.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst -- vpxor \XMMDst, \XMMDst, \XMMDst -- -- cmp $8, \DLEN -- jl .L_read_lt8_\@ -- mov (\DPTR), %rax -- vpinsrq $0, %rax, \XMMDst, \XMMDst -- sub $8, \DLEN -- jz .L_done_read_partial_block_\@ -- xor %eax, %eax --.L_read_next_byte_\@: -- shl $8, %rax -- mov 7(\DPTR, \DLEN, 1), %al -- dec \DLEN -- jnz .L_read_next_byte_\@ -- vpinsrq $1, %rax, \XMMDst, \XMMDst -- jmp .L_done_read_partial_block_\@ --.L_read_lt8_\@: -- xor %eax, %eax --.L_read_next_byte_lt8_\@: -- shl $8, %rax -- mov -1(\DPTR, \DLEN, 1), %al -- dec \DLEN -- jnz .L_read_next_byte_lt8_\@ -- vpinsrq $0, %rax, \XMMDst, \XMMDst --.L_done_read_partial_block_\@: --.endm -- --# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks --# between update calls. --# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK --# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context --# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 --.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ -- AAD_HASH ENC_DEC -- mov PBlockLen(arg2), %r13 -- test %r13, %r13 -- je .L_partial_block_done_\@ # Leave Macro if no partial blocks -- # Read in input data without over reading -- cmp $16, \PLAIN_CYPH_LEN -- jl .L_fewer_than_16_bytes_\@ -- vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm -- jmp .L_data_read_\@ -- --.L_fewer_than_16_bytes_\@: -- lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 -- mov \PLAIN_CYPH_LEN, %r12 -- READ_PARTIAL_BLOCK %r10 %r12 %xmm1 -- -- mov PBlockLen(arg2), %r13 -- --.L_data_read_\@: # Finished reading in data -- -- vmovdqu PBlockEncKey(arg2), %xmm9 -- vmovdqu HashKey(arg2), %xmm13 -- -- lea SHIFT_MASK(%rip), %r12 -- -- # adjust the shuffle mask pointer to be able to shift r13 bytes -- # r16-r13 is the number of bytes in plaintext mod 16) -- add %r13, %r12 -- vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask -- vpshufb %xmm2, %xmm9, %xmm9 # shift right r13 bytes -- --.if \ENC_DEC == DEC -- vmovdqa %xmm1, %xmm3 -- pxor %xmm1, %xmm9 # Ciphertext XOR E(K, Yn) -- -- mov \PLAIN_CYPH_LEN, %r10 -- add %r13, %r10 -- # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling -- sub $16, %r10 -- # Determine if partial block is not being filled and -- # shift mask accordingly -- jge .L_no_extra_mask_1_\@ -- sub %r10, %r12 --.L_no_extra_mask_1_\@: -- -- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 -- # get the appropriate mask to mask out bottom r13 bytes of xmm9 -- vpand %xmm1, %xmm9, %xmm9 # mask out bottom r13 bytes of xmm9 -- -- vpand %xmm1, %xmm3, %xmm3 -- vmovdqa SHUF_MASK(%rip), %xmm10 -- vpshufb %xmm10, %xmm3, %xmm3 -- vpshufb %xmm2, %xmm3, %xmm3 -- vpxor %xmm3, \AAD_HASH, \AAD_HASH -- -- test %r10, %r10 -- jl .L_partial_incomplete_1_\@ -- -- # GHASH computation for the last <16 Byte block -- \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 -- xor %eax,%eax -- -- mov %rax, PBlockLen(arg2) -- jmp .L_dec_done_\@ --.L_partial_incomplete_1_\@: -- add \PLAIN_CYPH_LEN, PBlockLen(arg2) --.L_dec_done_\@: -- vmovdqu \AAD_HASH, AadHash(arg2) --.else -- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) -- -- mov \PLAIN_CYPH_LEN, %r10 -- add %r13, %r10 -- # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling -- sub $16, %r10 -- # Determine if partial block is not being filled and -- # shift mask accordingly -- jge .L_no_extra_mask_2_\@ -- sub %r10, %r12 --.L_no_extra_mask_2_\@: -- -- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 -- # get the appropriate mask to mask out bottom r13 bytes of xmm9 -- vpand %xmm1, %xmm9, %xmm9 -- -- vmovdqa SHUF_MASK(%rip), %xmm1 -- vpshufb %xmm1, %xmm9, %xmm9 -- vpshufb %xmm2, %xmm9, %xmm9 -- vpxor %xmm9, \AAD_HASH, \AAD_HASH -- -- test %r10, %r10 -- jl .L_partial_incomplete_2_\@ -- -- # GHASH computation for the last <16 Byte block -- \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 -- xor %eax,%eax -- -- mov %rax, PBlockLen(arg2) -- jmp .L_encode_done_\@ --.L_partial_incomplete_2_\@: -- add \PLAIN_CYPH_LEN, PBlockLen(arg2) --.L_encode_done_\@: -- vmovdqu \AAD_HASH, AadHash(arg2) -- -- vmovdqa SHUF_MASK(%rip), %xmm10 -- # shuffle xmm9 back to output as ciphertext -- vpshufb %xmm10, %xmm9, %xmm9 -- vpshufb %xmm2, %xmm9, %xmm9 --.endif -- # output encrypted Bytes -- test %r10, %r10 -- jl .L_partial_fill_\@ -- mov %r13, %r12 -- mov $16, %r13 -- # Set r13 to be the number of bytes to write out -- sub %r12, %r13 -- jmp .L_count_set_\@ --.L_partial_fill_\@: -- mov \PLAIN_CYPH_LEN, %r13 --.L_count_set_\@: -- vmovdqa %xmm9, %xmm0 -- vmovq %xmm0, %rax -- cmp $8, %r13 -- jle .L_less_than_8_bytes_left_\@ -- -- mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) -- add $8, \DATA_OFFSET -- psrldq $8, %xmm0 -- vmovq %xmm0, %rax -- sub $8, %r13 --.L_less_than_8_bytes_left_\@: -- movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) -- add $1, \DATA_OFFSET -- shr $8, %rax -- sub $1, %r13 -- jne .L_less_than_8_bytes_left_\@ --.L_partial_block_done_\@: --.endm # PARTIAL_BLOCK -- --############################################################################### --# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) --# Input: A and B (128-bits each, bit-reflected) --# Output: C = A*B*x mod poly, (i.e. >>1 ) --# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input --# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. --############################################################################### --.macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 -- -- vpshufd $0b01001110, \GH, \T2 -- vpshufd $0b01001110, \HK, \T3 -- vpxor \GH , \T2, \T2 # T2 = (a1+a0) -- vpxor \HK , \T3, \T3 # T3 = (b1+b0) -- -- vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 -- vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 -- vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) -- vpxor \GH, \T2,\T2 -- vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 -- -- vpslldq $8, \T2,\T3 # shift-L T3 2 DWs -- vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs -- vpxor \T3, \GH, \GH -- vpxor \T2, \T1, \T1 # = GH x HK -- -- #first phase of the reduction -- vpslld $31, \GH, \T2 # packed right shifting << 31 -- vpslld $30, \GH, \T3 # packed right shifting shift << 30 -- vpslld $25, \GH, \T4 # packed right shifting shift << 25 -- -- vpxor \T3, \T2, \T2 # xor the shifted versions -- vpxor \T4, \T2, \T2 -- -- vpsrldq $4, \T2, \T5 # shift-R T5 1 DW -- -- vpslldq $12, \T2, \T2 # shift-L T2 3 DWs -- vpxor \T2, \GH, \GH # first phase of the reduction complete -- -- #second phase of the reduction -- -- vpsrld $1,\GH, \T2 # packed left shifting >> 1 -- vpsrld $2,\GH, \T3 # packed left shifting >> 2 -- vpsrld $7,\GH, \T4 # packed left shifting >> 7 -- vpxor \T3, \T2, \T2 # xor the shifted versions -- vpxor \T4, \T2, \T2 -- -- vpxor \T5, \T2, \T2 -- vpxor \T2, \GH, \GH -- vpxor \T1, \GH, \GH # the result is in GH -- -- --.endm -- --.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 -- -- # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -- vmovdqa \HK, \T5 -- -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly -- vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_2_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly -- vmovdqu \T5, HashKey_3(arg2) -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_3_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly -- vmovdqu \T5, HashKey_4(arg2) -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_4_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly -- vmovdqu \T5, HashKey_5(arg2) -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_5_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly -- vmovdqu \T5, HashKey_6(arg2) -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_6_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly -- vmovdqu \T5, HashKey_7(arg2) -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_7_k(arg2) -- -- GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly -- vmovdqu \T5, HashKey_8(arg2) -- vpshufd $0b01001110, \T5, \T1 -- vpxor \T5, \T1, \T1 -- vmovdqu \T1, HashKey_8_k(arg2) -- --.endm -- --## if a = number of total plaintext bytes --## b = floor(a/16) --## num_initial_blocks = b mod 4# --## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext --## r10, r11, r12, rax are clobbered --## arg1, arg2, arg3, arg4 are used as pointers only, not modified -- --.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC -- i = (8-\num_initial_blocks) -- setreg -- vmovdqu AadHash(arg2), reg_i -- -- # start AES for num_initial_blocks blocks -- vmovdqu CurCount(arg2), \CTR -- -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, reg_i -- vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap -- i = (i+1) -- setreg --.endr -- -- vmovdqa (arg1), \T_key -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vpxor \T_key, reg_i, reg_i -- i = (i+1) -- setreg --.endr -- -- j = 1 -- setreg --.rep \REP -- vmovdqa 16*j(arg1), \T_key -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vaesenc \T_key, reg_i, reg_i -- i = (i+1) -- setreg --.endr -- -- j = (j+1) -- setreg --.endr -- -- vmovdqa 16*j(arg1), \T_key -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vaesenclast \T_key, reg_i, reg_i -- i = (i+1) -- setreg --.endr -- -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vmovdqu (arg4, %r11), \T1 -- vpxor \T1, reg_i, reg_i -- vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for num_initial_blocks blocks -- add $16, %r11 --.if \ENC_DEC == DEC -- vmovdqa \T1, reg_i --.endif -- vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations -- i = (i+1) -- setreg --.endr -- -- -- i = (8-\num_initial_blocks) -- j = (9-\num_initial_blocks) -- setreg -- --.rep \num_initial_blocks -- vpxor reg_i, reg_j, reg_j -- GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks -- i = (i+1) -- j = (j+1) -- setreg --.endr -- # XMM8 has the combined result here -- -- vmovdqa \XMM8, TMP1(%rsp) -- vmovdqa \XMM8, \T3 -- -- cmp $128, %r13 -- jl .L_initial_blocks_done\@ # no need for precomputed constants -- --############################################################################### --# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM1 -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM2 -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM3 -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM4 -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM5 -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM6 -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM7 -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM8 -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap -- -- vmovdqa (arg1), \T_key -- vpxor \T_key, \XMM1, \XMM1 -- vpxor \T_key, \XMM2, \XMM2 -- vpxor \T_key, \XMM3, \XMM3 -- vpxor \T_key, \XMM4, \XMM4 -- vpxor \T_key, \XMM5, \XMM5 -- vpxor \T_key, \XMM6, \XMM6 -- vpxor \T_key, \XMM7, \XMM7 -- vpxor \T_key, \XMM8, \XMM8 -- -- i = 1 -- setreg --.rep \REP # do REP rounds -- vmovdqa 16*i(arg1), \T_key -- vaesenc \T_key, \XMM1, \XMM1 -- vaesenc \T_key, \XMM2, \XMM2 -- vaesenc \T_key, \XMM3, \XMM3 -- vaesenc \T_key, \XMM4, \XMM4 -- vaesenc \T_key, \XMM5, \XMM5 -- vaesenc \T_key, \XMM6, \XMM6 -- vaesenc \T_key, \XMM7, \XMM7 -- vaesenc \T_key, \XMM8, \XMM8 -- i = (i+1) -- setreg --.endr -- -- vmovdqa 16*i(arg1), \T_key -- vaesenclast \T_key, \XMM1, \XMM1 -- vaesenclast \T_key, \XMM2, \XMM2 -- vaesenclast \T_key, \XMM3, \XMM3 -- vaesenclast \T_key, \XMM4, \XMM4 -- vaesenclast \T_key, \XMM5, \XMM5 -- vaesenclast \T_key, \XMM6, \XMM6 -- vaesenclast \T_key, \XMM7, \XMM7 -- vaesenclast \T_key, \XMM8, \XMM8 -- -- vmovdqu (arg4, %r11), \T1 -- vpxor \T1, \XMM1, \XMM1 -- vmovdqu \XMM1, (arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM1 -- .endif -- -- vmovdqu 16*1(arg4, %r11), \T1 -- vpxor \T1, \XMM2, \XMM2 -- vmovdqu \XMM2, 16*1(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM2 -- .endif -- -- vmovdqu 16*2(arg4, %r11), \T1 -- vpxor \T1, \XMM3, \XMM3 -- vmovdqu \XMM3, 16*2(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM3 -- .endif -- -- vmovdqu 16*3(arg4, %r11), \T1 -- vpxor \T1, \XMM4, \XMM4 -- vmovdqu \XMM4, 16*3(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM4 -- .endif -- -- vmovdqu 16*4(arg4, %r11), \T1 -- vpxor \T1, \XMM5, \XMM5 -- vmovdqu \XMM5, 16*4(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM5 -- .endif -- -- vmovdqu 16*5(arg4, %r11), \T1 -- vpxor \T1, \XMM6, \XMM6 -- vmovdqu \XMM6, 16*5(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM6 -- .endif -- -- vmovdqu 16*6(arg4, %r11), \T1 -- vpxor \T1, \XMM7, \XMM7 -- vmovdqu \XMM7, 16*6(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM7 -- .endif -- -- vmovdqu 16*7(arg4, %r11), \T1 -- vpxor \T1, \XMM8, \XMM8 -- vmovdqu \XMM8, 16*7(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM8 -- .endif -- -- add $128, %r11 -- -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap -- --############################################################################### -- --.L_initial_blocks_done\@: -- --.endm -- --# encrypt 8 blocks at a time --# ghash the 8 previously encrypted ciphertext blocks --# arg1, arg2, arg3, arg4 are used as pointers only, not modified --# r11 is the data offset value --.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC -- -- vmovdqa \XMM1, \T2 -- vmovdqa \XMM2, TMP2(%rsp) -- vmovdqa \XMM3, TMP3(%rsp) -- vmovdqa \XMM4, TMP4(%rsp) -- vmovdqa \XMM5, TMP5(%rsp) -- vmovdqa \XMM6, TMP6(%rsp) -- vmovdqa \XMM7, TMP7(%rsp) -- vmovdqa \XMM8, TMP8(%rsp) -- --.if \loop_idx == in_order -- vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT -- vpaddd ONE(%rip), \XMM1, \XMM2 -- vpaddd ONE(%rip), \XMM2, \XMM3 -- vpaddd ONE(%rip), \XMM3, \XMM4 -- vpaddd ONE(%rip), \XMM4, \XMM5 -- vpaddd ONE(%rip), \XMM5, \XMM6 -- vpaddd ONE(%rip), \XMM6, \XMM7 -- vpaddd ONE(%rip), \XMM7, \XMM8 -- vmovdqa \XMM8, \CTR -- -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap --.else -- vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT -- vpaddd ONEf(%rip), \XMM1, \XMM2 -- vpaddd ONEf(%rip), \XMM2, \XMM3 -- vpaddd ONEf(%rip), \XMM3, \XMM4 -- vpaddd ONEf(%rip), \XMM4, \XMM5 -- vpaddd ONEf(%rip), \XMM5, \XMM6 -- vpaddd ONEf(%rip), \XMM6, \XMM7 -- vpaddd ONEf(%rip), \XMM7, \XMM8 -- vmovdqa \XMM8, \CTR --.endif -- -- -- ####################################################################### -- -- vmovdqu (arg1), \T1 -- vpxor \T1, \XMM1, \XMM1 -- vpxor \T1, \XMM2, \XMM2 -- vpxor \T1, \XMM3, \XMM3 -- vpxor \T1, \XMM4, \XMM4 -- vpxor \T1, \XMM5, \XMM5 -- vpxor \T1, \XMM6, \XMM6 -- vpxor \T1, \XMM7, \XMM7 -- vpxor \T1, \XMM8, \XMM8 -- -- ####################################################################### -- -- -- -- -- -- vmovdqu 16*1(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqu 16*2(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- -- ####################################################################### -- -- vmovdqu HashKey_8(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 -- vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 -- -- vpshufd $0b01001110, \T2, \T6 -- vpxor \T2, \T6, \T6 -- -- vmovdqu HashKey_8_k(arg2), \T5 -- vpclmulqdq $0x00, \T5, \T6, \T6 -- -- vmovdqu 16*3(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP2(%rsp), \T1 -- vmovdqu HashKey_7(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_7_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*4(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- ####################################################################### -- -- vmovdqa TMP3(%rsp), \T1 -- vmovdqu HashKey_6(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_6_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*5(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP4(%rsp), \T1 -- vmovdqu HashKey_5(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_5_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*6(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- -- vmovdqa TMP5(%rsp), \T1 -- vmovdqu HashKey_4(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_4_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*7(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP6(%rsp), \T1 -- vmovdqu HashKey_3(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_3_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- -- vmovdqu 16*8(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP7(%rsp), \T1 -- vmovdqu HashKey_2(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_2_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- ####################################################################### -- -- vmovdqu 16*9(arg1), \T5 -- vaesenc \T5, \XMM1, \XMM1 -- vaesenc \T5, \XMM2, \XMM2 -- vaesenc \T5, \XMM3, \XMM3 -- vaesenc \T5, \XMM4, \XMM4 -- vaesenc \T5, \XMM5, \XMM5 -- vaesenc \T5, \XMM6, \XMM6 -- vaesenc \T5, \XMM7, \XMM7 -- vaesenc \T5, \XMM8, \XMM8 -- -- vmovdqa TMP8(%rsp), \T1 -- vmovdqu HashKey(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpshufd $0b01001110, \T1, \T3 -- vpxor \T1, \T3, \T3 -- vmovdqu HashKey_k(arg2), \T5 -- vpclmulqdq $0x10, \T5, \T3, \T3 -- vpxor \T3, \T6, \T6 -- -- vpxor \T4, \T6, \T6 -- vpxor \T7, \T6, \T6 -- -- vmovdqu 16*10(arg1), \T5 -- -- i = 11 -- setreg --.rep (\REP-9) -- -- vaesenc \T5, \XMM1, \XMM1 -- vaesenc \T5, \XMM2, \XMM2 -- vaesenc \T5, \XMM3, \XMM3 -- vaesenc \T5, \XMM4, \XMM4 -- vaesenc \T5, \XMM5, \XMM5 -- vaesenc \T5, \XMM6, \XMM6 -- vaesenc \T5, \XMM7, \XMM7 -- vaesenc \T5, \XMM8, \XMM8 -- -- vmovdqu 16*i(arg1), \T5 -- i = i + 1 -- setreg --.endr -- -- i = 0 -- j = 1 -- setreg --.rep 8 -- vpxor 16*i(arg4, %r11), \T5, \T2 -- .if \ENC_DEC == ENC -- vaesenclast \T2, reg_j, reg_j -- .else -- vaesenclast \T2, reg_j, \T3 -- vmovdqu 16*i(arg4, %r11), reg_j -- vmovdqu \T3, 16*i(arg3, %r11) -- .endif -- i = (i+1) -- j = (j+1) -- setreg --.endr -- ####################################################################### -- -- -- vpslldq $8, \T6, \T3 # shift-L T3 2 DWs -- vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs -- vpxor \T3, \T7, \T7 -- vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 -- -- -- -- ####################################################################### -- #first phase of the reduction -- ####################################################################### -- vpslld $31, \T7, \T2 # packed right shifting << 31 -- vpslld $30, \T7, \T3 # packed right shifting shift << 30 -- vpslld $25, \T7, \T4 # packed right shifting shift << 25 -- -- vpxor \T3, \T2, \T2 # xor the shifted versions -- vpxor \T4, \T2, \T2 -- -- vpsrldq $4, \T2, \T1 # shift-R T1 1 DW -- -- vpslldq $12, \T2, \T2 # shift-L T2 3 DWs -- vpxor \T2, \T7, \T7 # first phase of the reduction complete -- ####################################################################### -- .if \ENC_DEC == ENC -- vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer -- .endif -- -- ####################################################################### -- #second phase of the reduction -- vpsrld $1, \T7, \T2 # packed left shifting >> 1 -- vpsrld $2, \T7, \T3 # packed left shifting >> 2 -- vpsrld $7, \T7, \T4 # packed left shifting >> 7 -- vpxor \T3, \T2, \T2 # xor the shifted versions -- vpxor \T4, \T2, \T2 -- -- vpxor \T1, \T2, \T2 -- vpxor \T2, \T7, \T7 -- vpxor \T7, \T6, \T6 # the result is in T6 -- ####################################################################### -- -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap -- -- -- vpxor \T6, \XMM1, \XMM1 -- -- -- --.endm -- -- --# GHASH the last 4 ciphertext blocks. --.macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 -- -- ## Karatsuba Method -- -- -- vpshufd $0b01001110, \XMM1, \T2 -- vpxor \XMM1, \T2, \T2 -- vmovdqu HashKey_8(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM1, \T6 -- vpclmulqdq $0x00, \T5, \XMM1, \T7 -- -- vmovdqu HashKey_8_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM2, \T2 -- vpxor \XMM2, \T2, \T2 -- vmovdqu HashKey_7(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM2, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM2, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_7_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM3, \T2 -- vpxor \XMM3, \T2, \T2 -- vmovdqu HashKey_6(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM3, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM3, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_6_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM4, \T2 -- vpxor \XMM4, \T2, \T2 -- vmovdqu HashKey_5(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM4, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM4, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_5_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM5, \T2 -- vpxor \XMM5, \T2, \T2 -- vmovdqu HashKey_4(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM5, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM5, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_4_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM6, \T2 -- vpxor \XMM6, \T2, \T2 -- vmovdqu HashKey_3(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM6, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM6, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_3_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM7, \T2 -- vpxor \XMM7, \T2, \T2 -- vmovdqu HashKey_2(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM7, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM7, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_2_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vpshufd $0b01001110, \XMM8, \T2 -- vpxor \XMM8, \T2, \T2 -- vmovdqu HashKey(arg2), \T5 -- vpclmulqdq $0x11, \T5, \XMM8, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM8, \T4 -- vpxor \T4, \T7, \T7 -- -- vmovdqu HashKey_k(arg2), \T3 -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- vpxor \T6, \XMM1, \XMM1 -- vpxor \T7, \XMM1, \T2 -- -- -- -- -- vpslldq $8, \T2, \T4 -- vpsrldq $8, \T2, \T2 -- -- vpxor \T4, \T7, \T7 -- vpxor \T2, \T6, \T6 # holds the result of -- # the accumulated carry-less multiplications -- -- ####################################################################### -- #first phase of the reduction -- vpslld $31, \T7, \T2 # packed right shifting << 31 -- vpslld $30, \T7, \T3 # packed right shifting shift << 30 -- vpslld $25, \T7, \T4 # packed right shifting shift << 25 -- -- vpxor \T3, \T2, \T2 # xor the shifted versions -- vpxor \T4, \T2, \T2 -- -- vpsrldq $4, \T2, \T1 # shift-R T1 1 DW -- -- vpslldq $12, \T2, \T2 # shift-L T2 3 DWs -- vpxor \T2, \T7, \T7 # first phase of the reduction complete -- ####################################################################### -- -- -- #second phase of the reduction -- vpsrld $1, \T7, \T2 # packed left shifting >> 1 -- vpsrld $2, \T7, \T3 # packed left shifting >> 2 -- vpsrld $7, \T7, \T4 # packed left shifting >> 7 -- vpxor \T3, \T2, \T2 # xor the shifted versions -- vpxor \T4, \T2, \T2 -- -- vpxor \T1, \T2, \T2 -- vpxor \T2, \T7, \T7 -- vpxor \T7, \T6, \T6 # the result is in T6 -- --.endm -- --############################################################# --#void aesni_gcm_precomp_avx_gen2 --# (gcm_data *my_ctx_data, --# gcm_context_data *data, --# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ --# u8 *iv, /* Pre-counter block j0: 4 byte salt --# (from Security Association) concatenated with 8 byte --# Initialisation Vector (from IPSec ESP Payload) --# concatenated with 0x00000001. 16-byte aligned pointer. */ --# const u8 *aad, /* Additional Authentication Data (AAD)*/ --# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ --############################################################# --SYM_FUNC_START(aesni_gcm_init_avx_gen2) -- FUNC_SAVE -- INIT GHASH_MUL_AVX, PRECOMPUTE_AVX -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_init_avx_gen2) -- --############################################################################### --#void aesni_gcm_enc_update_avx_gen2( --# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ --# gcm_context_data *data, --# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ --# const u8 *in, /* Plaintext input */ --# u64 plaintext_len) /* Length of data in Bytes for encryption. */ --############################################################################### --SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) -- FUNC_SAVE -- mov keysize, %eax -- cmp $32, %eax -- je key_256_enc_update -- cmp $16, %eax -- je key_128_enc_update -- # must be 192 -- GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 -- FUNC_RESTORE -- RET --key_128_enc_update: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 -- FUNC_RESTORE -- RET --key_256_enc_update: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) -- --############################################################################### --#void aesni_gcm_dec_update_avx_gen2( --# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ --# gcm_context_data *data, --# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ --# const u8 *in, /* Ciphertext input */ --# u64 plaintext_len) /* Length of data in Bytes for encryption. */ --############################################################################### --SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) -- FUNC_SAVE -- mov keysize,%eax -- cmp $32, %eax -- je key_256_dec_update -- cmp $16, %eax -- je key_128_dec_update -- # must be 192 -- GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 -- FUNC_RESTORE -- RET --key_128_dec_update: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 -- FUNC_RESTORE -- RET --key_256_dec_update: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) -- --############################################################################### --#void aesni_gcm_finalize_avx_gen2( --# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ --# gcm_context_data *data, --# u8 *auth_tag, /* Authenticated Tag output. */ --# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. --# Valid values are 16 (most likely), 12 or 8. */ --############################################################################### --SYM_FUNC_START(aesni_gcm_finalize_avx_gen2) -- FUNC_SAVE -- mov keysize,%eax -- cmp $32, %eax -- je key_256_finalize -- cmp $16, %eax -- je key_128_finalize -- # must be 192 -- GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 -- FUNC_RESTORE -- RET --key_128_finalize: -- GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 -- FUNC_RESTORE -- RET --key_256_finalize: -- GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) -- --############################################################################### --# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) --# Input: A and B (128-bits each, bit-reflected) --# Output: C = A*B*x mod poly, (i.e. >>1 ) --# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input --# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. --############################################################################### --.macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 -- -- vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 -- vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 -- vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 -- vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 -- vpxor \T3, \GH, \GH -- -- -- vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs -- vpslldq $8 , \GH, \GH # shift-L GH 2 DWs -- -- vpxor \T3, \T1, \T1 -- vpxor \T2, \GH, \GH -- -- ####################################################################### -- #first phase of the reduction -- vmovdqa POLY2(%rip), \T3 -- -- vpclmulqdq $0x01, \GH, \T3, \T2 -- vpslldq $8, \T2, \T2 # shift-L T2 2 DWs -- -- vpxor \T2, \GH, \GH # first phase of the reduction complete -- ####################################################################### -- #second phase of the reduction -- vpclmulqdq $0x00, \GH, \T3, \T2 -- vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) -- -- vpclmulqdq $0x10, \GH, \T3, \GH -- vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) -- -- vpxor \T2, \GH, \GH # second phase of the reduction complete -- ####################################################################### -- vpxor \T1, \GH, \GH # the result is in GH -- -- --.endm -- --.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 -- -- # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -- vmovdqa \HK, \T5 -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly -- vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly -- -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly -- vmovdqu \T5, HashKey_3(arg2) -- -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly -- vmovdqu \T5, HashKey_4(arg2) -- -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly -- vmovdqu \T5, HashKey_5(arg2) -- -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly -- vmovdqu \T5, HashKey_6(arg2) -- -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly -- vmovdqu \T5, HashKey_7(arg2) -- -- GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly -- vmovdqu \T5, HashKey_8(arg2) -- --.endm -- --## if a = number of total plaintext bytes --## b = floor(a/16) --## num_initial_blocks = b mod 4# --## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext --## r10, r11, r12, rax are clobbered --## arg1, arg2, arg3, arg4 are used as pointers only, not modified -- --.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER -- i = (8-\num_initial_blocks) -- setreg -- vmovdqu AadHash(arg2), reg_i -- -- # start AES for num_initial_blocks blocks -- vmovdqu CurCount(arg2), \CTR -- -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, reg_i -- vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap -- i = (i+1) -- setreg --.endr -- -- vmovdqa (arg1), \T_key -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vpxor \T_key, reg_i, reg_i -- i = (i+1) -- setreg --.endr -- -- j = 1 -- setreg --.rep \REP -- vmovdqa 16*j(arg1), \T_key -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vaesenc \T_key, reg_i, reg_i -- i = (i+1) -- setreg --.endr -- -- j = (j+1) -- setreg --.endr -- -- -- vmovdqa 16*j(arg1), \T_key -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vaesenclast \T_key, reg_i, reg_i -- i = (i+1) -- setreg --.endr -- -- i = (9-\num_initial_blocks) -- setreg --.rep \num_initial_blocks -- vmovdqu (arg4, %r11), \T1 -- vpxor \T1, reg_i, reg_i -- vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for -- # num_initial_blocks blocks -- add $16, %r11 --.if \ENC_DEC == DEC -- vmovdqa \T1, reg_i --.endif -- vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations -- i = (i+1) -- setreg --.endr -- -- -- i = (8-\num_initial_blocks) -- j = (9-\num_initial_blocks) -- setreg -- --.rep \num_initial_blocks -- vpxor reg_i, reg_j, reg_j -- GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks -- i = (i+1) -- j = (j+1) -- setreg --.endr -- # XMM8 has the combined result here -- -- vmovdqa \XMM8, TMP1(%rsp) -- vmovdqa \XMM8, \T3 -- -- cmp $128, %r13 -- jl .L_initial_blocks_done\@ # no need for precomputed constants -- --############################################################################### --# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM1 -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM2 -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM3 -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM4 -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM5 -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM6 -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM7 -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- -- vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 -- vmovdqa \CTR, \XMM8 -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap -- -- vmovdqa (arg1), \T_key -- vpxor \T_key, \XMM1, \XMM1 -- vpxor \T_key, \XMM2, \XMM2 -- vpxor \T_key, \XMM3, \XMM3 -- vpxor \T_key, \XMM4, \XMM4 -- vpxor \T_key, \XMM5, \XMM5 -- vpxor \T_key, \XMM6, \XMM6 -- vpxor \T_key, \XMM7, \XMM7 -- vpxor \T_key, \XMM8, \XMM8 -- -- i = 1 -- setreg --.rep \REP # do REP rounds -- vmovdqa 16*i(arg1), \T_key -- vaesenc \T_key, \XMM1, \XMM1 -- vaesenc \T_key, \XMM2, \XMM2 -- vaesenc \T_key, \XMM3, \XMM3 -- vaesenc \T_key, \XMM4, \XMM4 -- vaesenc \T_key, \XMM5, \XMM5 -- vaesenc \T_key, \XMM6, \XMM6 -- vaesenc \T_key, \XMM7, \XMM7 -- vaesenc \T_key, \XMM8, \XMM8 -- i = (i+1) -- setreg --.endr -- -- -- vmovdqa 16*i(arg1), \T_key -- vaesenclast \T_key, \XMM1, \XMM1 -- vaesenclast \T_key, \XMM2, \XMM2 -- vaesenclast \T_key, \XMM3, \XMM3 -- vaesenclast \T_key, \XMM4, \XMM4 -- vaesenclast \T_key, \XMM5, \XMM5 -- vaesenclast \T_key, \XMM6, \XMM6 -- vaesenclast \T_key, \XMM7, \XMM7 -- vaesenclast \T_key, \XMM8, \XMM8 -- -- vmovdqu (arg4, %r11), \T1 -- vpxor \T1, \XMM1, \XMM1 -- vmovdqu \XMM1, (arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM1 -- .endif -- -- vmovdqu 16*1(arg4, %r11), \T1 -- vpxor \T1, \XMM2, \XMM2 -- vmovdqu \XMM2, 16*1(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM2 -- .endif -- -- vmovdqu 16*2(arg4, %r11), \T1 -- vpxor \T1, \XMM3, \XMM3 -- vmovdqu \XMM3, 16*2(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM3 -- .endif -- -- vmovdqu 16*3(arg4, %r11), \T1 -- vpxor \T1, \XMM4, \XMM4 -- vmovdqu \XMM4, 16*3(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM4 -- .endif -- -- vmovdqu 16*4(arg4, %r11), \T1 -- vpxor \T1, \XMM5, \XMM5 -- vmovdqu \XMM5, 16*4(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM5 -- .endif -- -- vmovdqu 16*5(arg4, %r11), \T1 -- vpxor \T1, \XMM6, \XMM6 -- vmovdqu \XMM6, 16*5(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM6 -- .endif -- -- vmovdqu 16*6(arg4, %r11), \T1 -- vpxor \T1, \XMM7, \XMM7 -- vmovdqu \XMM7, 16*6(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM7 -- .endif -- -- vmovdqu 16*7(arg4, %r11), \T1 -- vpxor \T1, \XMM8, \XMM8 -- vmovdqu \XMM8, 16*7(arg3 , %r11) -- .if \ENC_DEC == DEC -- vmovdqa \T1, \XMM8 -- .endif -- -- add $128, %r11 -- -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with -- # the corresponding ciphertext -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap -- --############################################################################### -- --.L_initial_blocks_done\@: -- -- --.endm -- -- -- --# encrypt 8 blocks at a time --# ghash the 8 previously encrypted ciphertext blocks --# arg1, arg2, arg3, arg4 are used as pointers only, not modified --# r11 is the data offset value --.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC -- -- vmovdqa \XMM1, \T2 -- vmovdqa \XMM2, TMP2(%rsp) -- vmovdqa \XMM3, TMP3(%rsp) -- vmovdqa \XMM4, TMP4(%rsp) -- vmovdqa \XMM5, TMP5(%rsp) -- vmovdqa \XMM6, TMP6(%rsp) -- vmovdqa \XMM7, TMP7(%rsp) -- vmovdqa \XMM8, TMP8(%rsp) -- --.if \loop_idx == in_order -- vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT -- vpaddd ONE(%rip), \XMM1, \XMM2 -- vpaddd ONE(%rip), \XMM2, \XMM3 -- vpaddd ONE(%rip), \XMM3, \XMM4 -- vpaddd ONE(%rip), \XMM4, \XMM5 -- vpaddd ONE(%rip), \XMM5, \XMM6 -- vpaddd ONE(%rip), \XMM6, \XMM7 -- vpaddd ONE(%rip), \XMM7, \XMM8 -- vmovdqa \XMM8, \CTR -- -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap --.else -- vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT -- vpaddd ONEf(%rip), \XMM1, \XMM2 -- vpaddd ONEf(%rip), \XMM2, \XMM3 -- vpaddd ONEf(%rip), \XMM3, \XMM4 -- vpaddd ONEf(%rip), \XMM4, \XMM5 -- vpaddd ONEf(%rip), \XMM5, \XMM6 -- vpaddd ONEf(%rip), \XMM6, \XMM7 -- vpaddd ONEf(%rip), \XMM7, \XMM8 -- vmovdqa \XMM8, \CTR --.endif -- -- -- ####################################################################### -- -- vmovdqu (arg1), \T1 -- vpxor \T1, \XMM1, \XMM1 -- vpxor \T1, \XMM2, \XMM2 -- vpxor \T1, \XMM3, \XMM3 -- vpxor \T1, \XMM4, \XMM4 -- vpxor \T1, \XMM5, \XMM5 -- vpxor \T1, \XMM6, \XMM6 -- vpxor \T1, \XMM7, \XMM7 -- vpxor \T1, \XMM8, \XMM8 -- -- ####################################################################### -- -- -- -- -- -- vmovdqu 16*1(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqu 16*2(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- -- ####################################################################### -- -- vmovdqu HashKey_8(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 -- vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 -- vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 -- vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 -- vpxor \T5, \T6, \T6 -- -- vmovdqu 16*3(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP2(%rsp), \T1 -- vmovdqu HashKey_7(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*4(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- ####################################################################### -- -- vmovdqa TMP3(%rsp), \T1 -- vmovdqu HashKey_6(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*5(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP4(%rsp), \T1 -- vmovdqu HashKey_5(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*6(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- -- vmovdqa TMP5(%rsp), \T1 -- vmovdqu HashKey_4(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*7(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP6(%rsp), \T1 -- vmovdqu HashKey_3(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vmovdqu 16*8(arg1), \T1 -- vaesenc \T1, \XMM1, \XMM1 -- vaesenc \T1, \XMM2, \XMM2 -- vaesenc \T1, \XMM3, \XMM3 -- vaesenc \T1, \XMM4, \XMM4 -- vaesenc \T1, \XMM5, \XMM5 -- vaesenc \T1, \XMM6, \XMM6 -- vaesenc \T1, \XMM7, \XMM7 -- vaesenc \T1, \XMM8, \XMM8 -- -- vmovdqa TMP7(%rsp), \T1 -- vmovdqu HashKey_2(arg2), \T5 -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T4 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- -- ####################################################################### -- -- vmovdqu 16*9(arg1), \T5 -- vaesenc \T5, \XMM1, \XMM1 -- vaesenc \T5, \XMM2, \XMM2 -- vaesenc \T5, \XMM3, \XMM3 -- vaesenc \T5, \XMM4, \XMM4 -- vaesenc \T5, \XMM5, \XMM5 -- vaesenc \T5, \XMM6, \XMM6 -- vaesenc \T5, \XMM7, \XMM7 -- vaesenc \T5, \XMM8, \XMM8 -- -- vmovdqa TMP8(%rsp), \T1 -- vmovdqu HashKey(arg2), \T5 -- -- vpclmulqdq $0x00, \T5, \T1, \T3 -- vpxor \T3, \T7, \T7 -- -- vpclmulqdq $0x01, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x10, \T5, \T1, \T3 -- vpxor \T3, \T6, \T6 -- -- vpclmulqdq $0x11, \T5, \T1, \T3 -- vpxor \T3, \T4, \T1 -- -- -- vmovdqu 16*10(arg1), \T5 -- -- i = 11 -- setreg --.rep (\REP-9) -- vaesenc \T5, \XMM1, \XMM1 -- vaesenc \T5, \XMM2, \XMM2 -- vaesenc \T5, \XMM3, \XMM3 -- vaesenc \T5, \XMM4, \XMM4 -- vaesenc \T5, \XMM5, \XMM5 -- vaesenc \T5, \XMM6, \XMM6 -- vaesenc \T5, \XMM7, \XMM7 -- vaesenc \T5, \XMM8, \XMM8 -- -- vmovdqu 16*i(arg1), \T5 -- i = i + 1 -- setreg --.endr -- -- i = 0 -- j = 1 -- setreg --.rep 8 -- vpxor 16*i(arg4, %r11), \T5, \T2 -- .if \ENC_DEC == ENC -- vaesenclast \T2, reg_j, reg_j -- .else -- vaesenclast \T2, reg_j, \T3 -- vmovdqu 16*i(arg4, %r11), reg_j -- vmovdqu \T3, 16*i(arg3, %r11) -- .endif -- i = (i+1) -- j = (j+1) -- setreg --.endr -- ####################################################################### -- -- -- vpslldq $8, \T6, \T3 # shift-L T3 2 DWs -- vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs -- vpxor \T3, \T7, \T7 -- vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 -- -- -- -- ####################################################################### -- #first phase of the reduction -- vmovdqa POLY2(%rip), \T3 -- -- vpclmulqdq $0x01, \T7, \T3, \T2 -- vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs -- -- vpxor \T2, \T7, \T7 # first phase of the reduction complete -- ####################################################################### -- .if \ENC_DEC == ENC -- vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer -- vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer -- .endif -- -- ####################################################################### -- #second phase of the reduction -- vpclmulqdq $0x00, \T7, \T3, \T2 -- vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) -- -- vpclmulqdq $0x10, \T7, \T3, \T4 -- vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) -- -- vpxor \T2, \T4, \T4 # second phase of the reduction complete -- ####################################################################### -- vpxor \T4, \T1, \T1 # the result is in T1 -- -- vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap -- vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap -- -- -- vpxor \T1, \XMM1, \XMM1 -- -- -- --.endm -- -- --# GHASH the last 4 ciphertext blocks. --.macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 -- -- ## Karatsuba Method -- -- vmovdqu HashKey_8(arg2), \T5 -- -- vpshufd $0b01001110, \XMM1, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM1, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM1, \T6 -- vpclmulqdq $0x00, \T5, \XMM1, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \XMM1 -- -- ###################### -- -- vmovdqu HashKey_7(arg2), \T5 -- vpshufd $0b01001110, \XMM2, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM2, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM2, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM2, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vmovdqu HashKey_6(arg2), \T5 -- vpshufd $0b01001110, \XMM3, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM3, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM3, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM3, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vmovdqu HashKey_5(arg2), \T5 -- vpshufd $0b01001110, \XMM4, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM4, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM4, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM4, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vmovdqu HashKey_4(arg2), \T5 -- vpshufd $0b01001110, \XMM5, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM5, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM5, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM5, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vmovdqu HashKey_3(arg2), \T5 -- vpshufd $0b01001110, \XMM6, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM6, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM6, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM6, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vmovdqu HashKey_2(arg2), \T5 -- vpshufd $0b01001110, \XMM7, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM7, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM7, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM7, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- -- ###################### -- -- vmovdqu HashKey(arg2), \T5 -- vpshufd $0b01001110, \XMM8, \T2 -- vpshufd $0b01001110, \T5, \T3 -- vpxor \XMM8, \T2, \T2 -- vpxor \T5, \T3, \T3 -- -- vpclmulqdq $0x11, \T5, \XMM8, \T4 -- vpxor \T4, \T6, \T6 -- -- vpclmulqdq $0x00, \T5, \XMM8, \T4 -- vpxor \T4, \T7, \T7 -- -- vpclmulqdq $0x00, \T3, \T2, \T2 -- -- vpxor \T2, \XMM1, \XMM1 -- vpxor \T6, \XMM1, \XMM1 -- vpxor \T7, \XMM1, \T2 -- -- -- -- -- vpslldq $8, \T2, \T4 -- vpsrldq $8, \T2, \T2 -- -- vpxor \T4, \T7, \T7 -- vpxor \T2, \T6, \T6 # holds the result of the -- # accumulated carry-less multiplications -- -- ####################################################################### -- #first phase of the reduction -- vmovdqa POLY2(%rip), \T3 -- -- vpclmulqdq $0x01, \T7, \T3, \T2 -- vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs -- -- vpxor \T2, \T7, \T7 # first phase of the reduction complete -- ####################################################################### -- -- -- #second phase of the reduction -- vpclmulqdq $0x00, \T7, \T3, \T2 -- vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) -- -- vpclmulqdq $0x10, \T7, \T3, \T4 -- vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) -- -- vpxor \T2, \T4, \T4 # second phase of the reduction complete -- ####################################################################### -- vpxor \T4, \T6, \T6 # the result is in T6 --.endm -- -- -- --############################################################# --#void aesni_gcm_init_avx_gen4 --# (gcm_data *my_ctx_data, --# gcm_context_data *data, --# u8 *iv, /* Pre-counter block j0: 4 byte salt --# (from Security Association) concatenated with 8 byte --# Initialisation Vector (from IPSec ESP Payload) --# concatenated with 0x00000001. 16-byte aligned pointer. */ --# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ --# const u8 *aad, /* Additional Authentication Data (AAD)*/ --# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ --############################################################# --SYM_FUNC_START(aesni_gcm_init_avx_gen4) -- FUNC_SAVE -- INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_init_avx_gen4) -- --############################################################################### --#void aesni_gcm_enc_avx_gen4( --# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ --# gcm_context_data *data, --# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ --# const u8 *in, /* Plaintext input */ --# u64 plaintext_len) /* Length of data in Bytes for encryption. */ --############################################################################### --SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) -- FUNC_SAVE -- mov keysize,%eax -- cmp $32, %eax -- je key_256_enc_update4 -- cmp $16, %eax -- je key_128_enc_update4 -- # must be 192 -- GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 -- FUNC_RESTORE -- RET --key_128_enc_update4: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 -- FUNC_RESTORE -- RET --key_256_enc_update4: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) -- --############################################################################### --#void aesni_gcm_dec_update_avx_gen4( --# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ --# gcm_context_data *data, --# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ --# const u8 *in, /* Ciphertext input */ --# u64 plaintext_len) /* Length of data in Bytes for encryption. */ --############################################################################### --SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) -- FUNC_SAVE -- mov keysize,%eax -- cmp $32, %eax -- je key_256_dec_update4 -- cmp $16, %eax -- je key_128_dec_update4 -- # must be 192 -- GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 -- FUNC_RESTORE -- RET --key_128_dec_update4: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 -- FUNC_RESTORE -- RET --key_256_dec_update4: -- GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) -- --############################################################################### --#void aesni_gcm_finalize_avx_gen4( --# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ --# gcm_context_data *data, --# u8 *auth_tag, /* Authenticated Tag output. */ --# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. --# Valid values are 16 (most likely), 12 or 8. */ --############################################################################### --SYM_FUNC_START(aesni_gcm_finalize_avx_gen4) -- FUNC_SAVE -- mov keysize,%eax -- cmp $32, %eax -- je key_256_finalize4 -- cmp $16, %eax -- je key_128_finalize4 -- # must be 192 -- GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 -- FUNC_RESTORE -- RET --key_128_finalize4: -- GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 -- FUNC_RESTORE -- RET --key_256_finalize4: -- GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 -- FUNC_RESTORE -- RET --SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) -diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c -index ef031655b2d3..cd37de5ec404 100644 ---- a/arch/x86/crypto/aesni-intel_glue.c -+++ b/arch/x86/crypto/aesni-intel_glue.c -@@ -1,7 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0-or-later - /* -- * Support for Intel AES-NI instructions. This file contains glue -- * code, the real AES implementation is in intel-aes_asm.S. -+ * Support for AES-NI and VAES instructions. This file contains glue code. -+ * The real AES implementations are in aesni-intel_asm.S and other .S files. - * - * Copyright (C) 2008, Intel Corp. - * Author: Huang Ying -@@ -13,6 +13,8 @@ - * Tadeusz Struk (tadeusz.struk@intel.com) - * Aidan O'Mahony (aidan.o.mahony@intel.com) - * Copyright (c) 2010, Intel Corporation. -+ * -+ * Copyright 2024 Google LLC - */ - - #include -@@ -44,41 +46,11 @@ - #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) - #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) - --/* This data is stored at the end of the crypto_tfm struct. -- * It's a type of per "session" data storage location. -- * This needs to be 16 byte aligned. -- */ --struct aesni_rfc4106_gcm_ctx { -- u8 hash_subkey[16] AESNI_ALIGN_ATTR; -- struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; -- u8 nonce[4]; --}; -- --struct generic_gcmaes_ctx { -- u8 hash_subkey[16] AESNI_ALIGN_ATTR; -- struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; --}; -- - struct aesni_xts_ctx { - struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR; - struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR; - }; - --#define GCM_BLOCK_LEN 16 -- --struct gcm_context_data { -- /* init, update and finalize context data */ -- u8 aad_hash[GCM_BLOCK_LEN]; -- u64 aad_length; -- u64 in_length; -- u8 partial_block_enc_key[GCM_BLOCK_LEN]; -- u8 orig_IV[GCM_BLOCK_LEN]; -- u8 current_counter[GCM_BLOCK_LEN]; -- u64 partial_block_len; -- u64 unused; -- u8 hash_keys[GCM_BLOCK_LEN * 16]; --}; -- - static inline void *aes_align_addr(void *addr) - { - if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN) -@@ -103,9 +75,6 @@ asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, - asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); - --#define AVX_GEN2_OPTSIZE 640 --#define AVX_GEN4_OPTSIZE 4096 -- - asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); - -@@ -118,23 +87,6 @@ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); - DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc); - --/* Scatter / Gather routines, with args similar to above */ --asmlinkage void aesni_gcm_init(void *ctx, -- struct gcm_context_data *gdata, -- u8 *iv, -- u8 *hash_subkey, const u8 *aad, -- unsigned long aad_len); --asmlinkage void aesni_gcm_enc_update(void *ctx, -- struct gcm_context_data *gdata, u8 *out, -- const u8 *in, unsigned long plaintext_len); --asmlinkage void aesni_gcm_dec_update(void *ctx, -- struct gcm_context_data *gdata, u8 *out, -- const u8 *in, -- unsigned long ciphertext_len); --asmlinkage void aesni_gcm_finalize(void *ctx, -- struct gcm_context_data *gdata, -- u8 *auth_tag, unsigned long auth_tag_len); -- - asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, - void *keys, u8 *out, unsigned int num_bytes); - asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, -@@ -154,67 +106,6 @@ asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, - asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, - const void *keys, u8 *out, unsigned int num_bytes, - unsigned int byte_ctr); -- --/* -- * asmlinkage void aesni_gcm_init_avx_gen2() -- * gcm_data *my_ctx_data, context data -- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. -- */ --asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data, -- struct gcm_context_data *gdata, -- u8 *iv, -- u8 *hash_subkey, -- const u8 *aad, -- unsigned long aad_len); -- --asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx, -- struct gcm_context_data *gdata, u8 *out, -- const u8 *in, unsigned long plaintext_len); --asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx, -- struct gcm_context_data *gdata, u8 *out, -- const u8 *in, -- unsigned long ciphertext_len); --asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx, -- struct gcm_context_data *gdata, -- u8 *auth_tag, unsigned long auth_tag_len); -- --/* -- * asmlinkage void aesni_gcm_init_avx_gen4() -- * gcm_data *my_ctx_data, context data -- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. -- */ --asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data, -- struct gcm_context_data *gdata, -- u8 *iv, -- u8 *hash_subkey, -- const u8 *aad, -- unsigned long aad_len); -- --asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx, -- struct gcm_context_data *gdata, u8 *out, -- const u8 *in, unsigned long plaintext_len); --asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx, -- struct gcm_context_data *gdata, u8 *out, -- const u8 *in, -- unsigned long ciphertext_len); --asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx, -- struct gcm_context_data *gdata, -- u8 *auth_tag, unsigned long auth_tag_len); -- --static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx); --static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2); -- --static inline struct --aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) --{ -- return aes_align_addr(crypto_aead_ctx(tfm)); --} -- --static inline struct --generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm) --{ -- return aes_align_addr(crypto_aead_ctx(tfm)); --} - #endif - - static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) -@@ -588,280 +479,6 @@ static int xctr_crypt(struct skcipher_request *req) - } - return err; - } -- --static int aes_gcm_derive_hash_subkey(const struct crypto_aes_ctx *aes_key, -- u8 hash_subkey[AES_BLOCK_SIZE]) --{ -- static const u8 zeroes[AES_BLOCK_SIZE]; -- -- aes_encrypt(aes_key, hash_subkey, zeroes); -- return 0; --} -- --static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, -- unsigned int key_len) --{ -- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); -- -- if (key_len < 4) -- return -EINVAL; -- -- /*Account for 4 byte nonce at the end.*/ -- key_len -= 4; -- -- memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); -- -- return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: -- aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded, -- ctx->hash_subkey); --} -- --/* This is the Integrity Check Value (aka the authentication tag) length and can -- * be 8, 12 or 16 bytes long. */ --static int common_rfc4106_set_authsize(struct crypto_aead *aead, -- unsigned int authsize) --{ -- switch (authsize) { -- case 8: -- case 12: -- case 16: -- break; -- default: -- return -EINVAL; -- } -- -- return 0; --} -- --static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, -- unsigned int authsize) --{ -- switch (authsize) { -- case 4: -- case 8: -- case 12: -- case 13: -- case 14: -- case 15: -- case 16: -- break; -- default: -- return -EINVAL; -- } -- -- return 0; --} -- --static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, -- unsigned int assoclen, u8 *hash_subkey, -- u8 *iv, void *aes_ctx, u8 *auth_tag, -- unsigned long auth_tag_len) --{ -- u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); -- struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); -- unsigned long left = req->cryptlen; -- struct scatter_walk assoc_sg_walk; -- struct skcipher_walk walk; -- bool do_avx, do_avx2; -- u8 *assocmem = NULL; -- u8 *assoc; -- int err; -- -- if (!enc) -- left -= auth_tag_len; -- -- do_avx = (left >= AVX_GEN2_OPTSIZE); -- do_avx2 = (left >= AVX_GEN4_OPTSIZE); -- -- /* Linearize assoc, if not already linear */ -- if (req->src->length >= assoclen && req->src->length) { -- scatterwalk_start(&assoc_sg_walk, req->src); -- assoc = scatterwalk_map(&assoc_sg_walk); -- } else { -- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? -- GFP_KERNEL : GFP_ATOMIC; -- -- /* assoc can be any length, so must be on heap */ -- assocmem = kmalloc(assoclen, flags); -- if (unlikely(!assocmem)) -- return -ENOMEM; -- assoc = assocmem; -- -- scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); -- } -- -- kernel_fpu_begin(); -- if (static_branch_likely(&gcm_use_avx2) && do_avx2) -- aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc, -- assoclen); -- else if (static_branch_likely(&gcm_use_avx) && do_avx) -- aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc, -- assoclen); -- else -- aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); -- kernel_fpu_end(); -- -- if (!assocmem) -- scatterwalk_unmap(assoc); -- else -- kfree(assocmem); -- -- err = enc ? skcipher_walk_aead_encrypt(&walk, req, false) -- : skcipher_walk_aead_decrypt(&walk, req, false); -- -- while (walk.nbytes > 0) { -- kernel_fpu_begin(); -- if (static_branch_likely(&gcm_use_avx2) && do_avx2) { -- if (enc) -- aesni_gcm_enc_update_avx_gen4(aes_ctx, data, -- walk.dst.virt.addr, -- walk.src.virt.addr, -- walk.nbytes); -- else -- aesni_gcm_dec_update_avx_gen4(aes_ctx, data, -- walk.dst.virt.addr, -- walk.src.virt.addr, -- walk.nbytes); -- } else if (static_branch_likely(&gcm_use_avx) && do_avx) { -- if (enc) -- aesni_gcm_enc_update_avx_gen2(aes_ctx, data, -- walk.dst.virt.addr, -- walk.src.virt.addr, -- walk.nbytes); -- else -- aesni_gcm_dec_update_avx_gen2(aes_ctx, data, -- walk.dst.virt.addr, -- walk.src.virt.addr, -- walk.nbytes); -- } else if (enc) { -- aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr, -- walk.src.virt.addr, walk.nbytes); -- } else { -- aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr, -- walk.src.virt.addr, walk.nbytes); -- } -- kernel_fpu_end(); -- -- err = skcipher_walk_done(&walk, 0); -- } -- -- if (err) -- return err; -- -- kernel_fpu_begin(); -- if (static_branch_likely(&gcm_use_avx2) && do_avx2) -- aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag, -- auth_tag_len); -- else if (static_branch_likely(&gcm_use_avx) && do_avx) -- aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag, -- auth_tag_len); -- else -- aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len); -- kernel_fpu_end(); -- -- return 0; --} -- --static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, -- u8 *hash_subkey, u8 *iv, void *aes_ctx) --{ -- struct crypto_aead *tfm = crypto_aead_reqtfm(req); -- unsigned long auth_tag_len = crypto_aead_authsize(tfm); -- u8 auth_tag[16]; -- int err; -- -- err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx, -- auth_tag, auth_tag_len); -- if (err) -- return err; -- -- scatterwalk_map_and_copy(auth_tag, req->dst, -- req->assoclen + req->cryptlen, -- auth_tag_len, 1); -- return 0; --} -- --static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, -- u8 *hash_subkey, u8 *iv, void *aes_ctx) --{ -- struct crypto_aead *tfm = crypto_aead_reqtfm(req); -- unsigned long auth_tag_len = crypto_aead_authsize(tfm); -- u8 auth_tag_msg[16]; -- u8 auth_tag[16]; -- int err; -- -- err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx, -- auth_tag, auth_tag_len); -- if (err) -- return err; -- -- /* Copy out original auth_tag */ -- scatterwalk_map_and_copy(auth_tag_msg, req->src, -- req->assoclen + req->cryptlen - auth_tag_len, -- auth_tag_len, 0); -- -- /* Compare generated tag with passed in tag. */ -- if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) { -- memzero_explicit(auth_tag, sizeof(auth_tag)); -- return -EBADMSG; -- } -- return 0; --} -- --static int helper_rfc4106_encrypt(struct aead_request *req) --{ -- struct crypto_aead *tfm = crypto_aead_reqtfm(req); -- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); -- void *aes_ctx = &(ctx->aes_key_expanded); -- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); -- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); -- unsigned int i; -- __be32 counter = cpu_to_be32(1); -- -- /* Assuming we are supporting rfc4106 64-bit extended */ -- /* sequence numbers We need to have the AAD length equal */ -- /* to 16 or 20 bytes */ -- if (unlikely(req->assoclen != 16 && req->assoclen != 20)) -- return -EINVAL; -- -- /* IV below built */ -- for (i = 0; i < 4; i++) -- *(iv+i) = ctx->nonce[i]; -- for (i = 0; i < 8; i++) -- *(iv+4+i) = req->iv[i]; -- *((__be32 *)(iv+12)) = counter; -- -- return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, -- aes_ctx); --} -- --static int helper_rfc4106_decrypt(struct aead_request *req) --{ -- __be32 counter = cpu_to_be32(1); -- struct crypto_aead *tfm = crypto_aead_reqtfm(req); -- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); -- void *aes_ctx = &(ctx->aes_key_expanded); -- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); -- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); -- unsigned int i; -- -- if (unlikely(req->assoclen != 16 && req->assoclen != 20)) -- return -EINVAL; -- -- /* Assuming we are supporting rfc4106 64-bit extended */ -- /* sequence numbers We need to have the AAD length */ -- /* equal to 16 or 20 bytes */ -- -- /* IV below built */ -- for (i = 0; i < 4; i++) -- *(iv+i) = ctx->nonce[i]; -- for (i = 0; i < 8; i++) -- *(iv+4+i) = req->iv[i]; -- *((__be32 *)(iv+12)) = counter; -- -- return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, -- aes_ctx); --} - #endif - - static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, -@@ -1216,11 +833,717 @@ DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); - DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); - #endif - -+/* The common part of the x86_64 AES-GCM key struct */ -+struct aes_gcm_key { -+ /* Expanded AES key and the AES key length in bytes */ -+ struct crypto_aes_ctx aes_key; -+ -+ /* RFC4106 nonce (used only by the rfc4106 algorithms) */ -+ u32 rfc4106_nonce; -+}; -+ -+/* Key struct used by the AES-NI implementations of AES-GCM */ -+struct aes_gcm_key_aesni { -+ /* -+ * Common part of the key. The assembly code requires 16-byte alignment -+ * for the round keys; we get this by them being located at the start of -+ * the struct and the whole struct being 16-byte aligned. -+ */ -+ struct aes_gcm_key base; -+ -+ /* -+ * Powers of the hash key H^8 through H^1. These are 128-bit values. -+ * They all have an extra factor of x^-1 and are byte-reversed. 16-byte -+ * alignment is required by the assembly code. -+ */ -+ u64 h_powers[8][2] __aligned(16); -+ -+ /* -+ * h_powers_xored[i] contains the two 64-bit halves of h_powers[i] XOR'd -+ * together. It's used for Karatsuba multiplication. 16-byte alignment -+ * is required by the assembly code. -+ */ -+ u64 h_powers_xored[8] __aligned(16); -+ -+ /* -+ * H^1 times x^64 (and also the usual extra factor of x^-1). 16-byte -+ * alignment is required by the assembly code. -+ */ -+ u64 h_times_x64[2] __aligned(16); -+}; -+#define AES_GCM_KEY_AESNI(key) \ -+ container_of((key), struct aes_gcm_key_aesni, base) -+#define AES_GCM_KEY_AESNI_SIZE \ -+ (sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1))) -+ -+/* Key struct used by the VAES + AVX10 implementations of AES-GCM */ -+struct aes_gcm_key_avx10 { -+ /* -+ * Common part of the key. The assembly code prefers 16-byte alignment -+ * for the round keys; we get this by them being located at the start of -+ * the struct and the whole struct being 64-byte aligned. -+ */ -+ struct aes_gcm_key base; -+ -+ /* -+ * Powers of the hash key H^16 through H^1. These are 128-bit values. -+ * They all have an extra factor of x^-1 and are byte-reversed. This -+ * array is aligned to a 64-byte boundary to make it naturally aligned -+ * for 512-bit loads, which can improve performance. (The assembly code -+ * doesn't *need* the alignment; this is just an optimization.) -+ */ -+ u64 h_powers[16][2] __aligned(64); -+ -+ /* Three padding blocks required by the assembly code */ -+ u64 padding[3][2]; -+}; -+#define AES_GCM_KEY_AVX10(key) \ -+ container_of((key), struct aes_gcm_key_avx10, base) -+#define AES_GCM_KEY_AVX10_SIZE \ -+ (sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1))) -+ -+/* -+ * These flags are passed to the AES-GCM helper functions to specify the -+ * specific version of AES-GCM (RFC4106 or not), whether it's encryption or -+ * decryption, and which assembly functions should be called. Assembly -+ * functions are selected using flags instead of function pointers to avoid -+ * indirect calls (which are very expensive on x86) regardless of inlining. -+ */ -+#define FLAG_RFC4106 BIT(0) -+#define FLAG_ENC BIT(1) -+#define FLAG_AVX BIT(2) -+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -+# define FLAG_AVX10_256 BIT(3) -+# define FLAG_AVX10_512 BIT(4) -+#else -+ /* -+ * This should cause all calls to the AVX10 assembly functions to be -+ * optimized out, avoiding the need to ifdef each call individually. -+ */ -+# define FLAG_AVX10_256 0 -+# define FLAG_AVX10_512 0 -+#endif -+ -+static inline struct aes_gcm_key * -+aes_gcm_key_get(struct crypto_aead *tfm, int flags) -+{ -+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) -+ return PTR_ALIGN(crypto_aead_ctx(tfm), 64); -+ else -+ return PTR_ALIGN(crypto_aead_ctx(tfm), 16); -+} -+ -+asmlinkage void -+aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key); -+asmlinkage void -+aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key); -+asmlinkage void -+aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key); -+asmlinkage void -+aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key); -+ -+static void aes_gcm_precompute(struct aes_gcm_key *key, int flags) -+{ -+ /* -+ * To make things a bit easier on the assembly side, the AVX10 -+ * implementations use the same key format. Therefore, a single -+ * function using 256-bit vectors would suffice here. However, it's -+ * straightforward to provide a 512-bit one because of how the assembly -+ * code is structured, and it works nicely because the total size of the -+ * key powers is a multiple of 512 bits. So we take advantage of that. -+ * -+ * A similar situation applies to the AES-NI implementations. -+ */ -+ if (flags & FLAG_AVX10_512) -+ aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key)); -+ else if (flags & FLAG_AVX10_256) -+ aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key)); -+ else if (flags & FLAG_AVX) -+ aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key)); -+ else -+ aes_gcm_precompute_aesni(AES_GCM_KEY_AESNI(key)); -+} -+ -+asmlinkage void -+aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key, -+ u8 ghash_acc[16], const u8 *aad, int aadlen); -+asmlinkage void -+aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key, -+ u8 ghash_acc[16], const u8 *aad, int aadlen); -+asmlinkage void -+aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key, -+ u8 ghash_acc[16], const u8 *aad, int aadlen); -+ -+static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16], -+ const u8 *aad, int aadlen, int flags) -+{ -+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) -+ aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc, -+ aad, aadlen); -+ else if (flags & FLAG_AVX) -+ aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc, -+ aad, aadlen); -+ else -+ aes_gcm_aad_update_aesni(AES_GCM_KEY_AESNI(key), ghash_acc, -+ aad, aadlen); -+} -+ -+asmlinkage void -+aes_gcm_enc_update_aesni(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+asmlinkage void -+aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+asmlinkage void -+aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+asmlinkage void -+aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+ -+asmlinkage void -+aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+asmlinkage void -+aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+asmlinkage void -+aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+asmlinkage void -+aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen); -+ -+/* __always_inline to optimize out the branches based on @flags */ -+static __always_inline void -+aes_gcm_update(const struct aes_gcm_key *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ const u8 *src, u8 *dst, int datalen, int flags) -+{ -+ if (flags & FLAG_ENC) { -+ if (flags & FLAG_AVX10_512) -+ aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ else if (flags & FLAG_AVX10_256) -+ aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ else if (flags & FLAG_AVX) -+ aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ else -+ aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr, -+ ghash_acc, src, dst, datalen); -+ } else { -+ if (flags & FLAG_AVX10_512) -+ aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ else if (flags & FLAG_AVX10_256) -+ aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ else if (flags & FLAG_AVX) -+ aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ else -+ aes_gcm_dec_update_aesni(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ src, dst, datalen); -+ } -+} -+ -+asmlinkage void -+aes_gcm_enc_final_aesni(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen); -+asmlinkage void -+aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen); -+asmlinkage void -+aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen); -+ -+/* __always_inline to optimize out the branches based on @flags */ -+static __always_inline void -+aes_gcm_enc_final(const struct aes_gcm_key *key, -+ const u32 le_ctr[4], u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen, int flags) -+{ -+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) -+ aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key), -+ le_ctr, ghash_acc, -+ total_aadlen, total_datalen); -+ else if (flags & FLAG_AVX) -+ aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ total_aadlen, total_datalen); -+ else -+ aes_gcm_enc_final_aesni(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ total_aadlen, total_datalen); -+} -+ -+asmlinkage bool __must_check -+aes_gcm_dec_final_aesni(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], const u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen, -+ const u8 tag[16], int taglen); -+asmlinkage bool __must_check -+aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key, -+ const u32 le_ctr[4], const u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen, -+ const u8 tag[16], int taglen); -+asmlinkage bool __must_check -+aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, -+ const u32 le_ctr[4], const u8 ghash_acc[16], -+ u64 total_aadlen, u64 total_datalen, -+ const u8 tag[16], int taglen); -+ -+/* __always_inline to optimize out the branches based on @flags */ -+static __always_inline bool __must_check -+aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4], -+ u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen, -+ u8 tag[16], int taglen, int flags) -+{ -+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) -+ return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key), -+ le_ctr, ghash_acc, -+ total_aadlen, total_datalen, -+ tag, taglen); -+ else if (flags & FLAG_AVX) -+ return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ total_aadlen, total_datalen, -+ tag, taglen); -+ else -+ return aes_gcm_dec_final_aesni(AES_GCM_KEY_AESNI(key), -+ le_ctr, ghash_acc, -+ total_aadlen, total_datalen, -+ tag, taglen); -+} -+ -+/* -+ * This is the Integrity Check Value (aka the authentication tag) length and can -+ * be 8, 12 or 16 bytes long. -+ */ -+static int common_rfc4106_set_authsize(struct crypto_aead *aead, -+ unsigned int authsize) -+{ -+ switch (authsize) { -+ case 8: -+ case 12: -+ case 16: -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, -+ unsigned int authsize) -+{ -+ switch (authsize) { -+ case 4: -+ case 8: -+ case 12: -+ case 13: -+ case 14: -+ case 15: -+ case 16: -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/* -+ * This is the setkey function for the x86_64 implementations of AES-GCM. It -+ * saves the RFC4106 nonce if applicable, expands the AES key, and precomputes -+ * powers of the hash key. -+ * -+ * To comply with the crypto_aead API, this has to be usable in no-SIMD context. -+ * For that reason, this function includes a portable C implementation of the -+ * needed logic. However, the portable C implementation is very slow, taking -+ * about the same time as encrypting 37 KB of data. To be ready for users that -+ * may set a key even somewhat frequently, we therefore also include a SIMD -+ * assembly implementation, expanding the AES key using AES-NI and precomputing -+ * the hash key powers using PCLMULQDQ or VPCLMULQDQ. -+ */ -+static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, -+ unsigned int keylen, int flags) -+{ -+ struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags); -+ int err; -+ -+ if (flags & FLAG_RFC4106) { -+ if (keylen < 4) -+ return -EINVAL; -+ keylen -= 4; -+ key->rfc4106_nonce = get_unaligned_be32(raw_key + keylen); -+ } -+ -+ /* The assembly code assumes the following offsets. */ -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512); -+ BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768); -+ -+ if (likely(crypto_simd_usable())) { -+ err = aes_check_keylen(keylen); -+ if (err) -+ return err; -+ kernel_fpu_begin(); -+ aesni_set_key(&key->aes_key, raw_key, keylen); -+ aes_gcm_precompute(key, flags); -+ kernel_fpu_end(); -+ } else { -+ static const u8 x_to_the_minus1[16] __aligned(__alignof__(be128)) = { -+ [0] = 0xc2, [15] = 1 -+ }; -+ static const u8 x_to_the_63[16] __aligned(__alignof__(be128)) = { -+ [7] = 1, -+ }; -+ be128 h1 = {}; -+ be128 h; -+ int i; -+ -+ err = aes_expandkey(&key->aes_key, raw_key, keylen); -+ if (err) -+ return err; -+ -+ /* Encrypt the all-zeroes block to get the hash key H^1 */ -+ aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1); -+ -+ /* Compute H^1 * x^-1 */ -+ h = h1; -+ gf128mul_lle(&h, (const be128 *)x_to_the_minus1); -+ -+ /* Compute the needed key powers */ -+ if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) { -+ struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key); -+ -+ for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) { -+ k->h_powers[i][0] = be64_to_cpu(h.b); -+ k->h_powers[i][1] = be64_to_cpu(h.a); -+ gf128mul_lle(&h, &h1); -+ } -+ memset(k->padding, 0, sizeof(k->padding)); -+ } else { -+ struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key); -+ -+ for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) { -+ k->h_powers[i][0] = be64_to_cpu(h.b); -+ k->h_powers[i][1] = be64_to_cpu(h.a); -+ k->h_powers_xored[i] = k->h_powers[i][0] ^ -+ k->h_powers[i][1]; -+ gf128mul_lle(&h, &h1); -+ } -+ gf128mul_lle(&h1, (const be128 *)x_to_the_63); -+ k->h_times_x64[0] = be64_to_cpu(h1.b); -+ k->h_times_x64[1] = be64_to_cpu(h1.a); -+ } -+ } -+ return 0; -+} -+ -+/* -+ * Initialize @ghash_acc, then pass all @assoclen bytes of associated data -+ * (a.k.a. additional authenticated data) from @sg_src through the GHASH update -+ * assembly function. kernel_fpu_begin() must have already been called. -+ */ -+static void gcm_process_assoc(const struct aes_gcm_key *key, u8 ghash_acc[16], -+ struct scatterlist *sg_src, unsigned int assoclen, -+ int flags) -+{ -+ struct scatter_walk walk; -+ /* -+ * The assembly function requires that the length of any non-last -+ * segment of associated data be a multiple of 16 bytes, so this -+ * function does the buffering needed to achieve that. -+ */ -+ unsigned int pos = 0; -+ u8 buf[16]; -+ -+ memset(ghash_acc, 0, 16); -+ scatterwalk_start(&walk, sg_src); -+ -+ while (assoclen) { -+ unsigned int len_this_page = scatterwalk_clamp(&walk, assoclen); -+ void *mapped = scatterwalk_map(&walk); -+ const void *src = mapped; -+ unsigned int len; -+ -+ assoclen -= len_this_page; -+ scatterwalk_advance(&walk, len_this_page); -+ if (unlikely(pos)) { -+ len = min(len_this_page, 16 - pos); -+ memcpy(&buf[pos], src, len); -+ pos += len; -+ src += len; -+ len_this_page -= len; -+ if (pos < 16) -+ goto next; -+ aes_gcm_aad_update(key, ghash_acc, buf, 16, flags); -+ pos = 0; -+ } -+ len = len_this_page; -+ if (unlikely(assoclen)) /* Not the last segment yet? */ -+ len = round_down(len, 16); -+ aes_gcm_aad_update(key, ghash_acc, src, len, flags); -+ src += len; -+ len_this_page -= len; -+ if (unlikely(len_this_page)) { -+ memcpy(buf, src, len_this_page); -+ pos = len_this_page; -+ } -+next: -+ scatterwalk_unmap(mapped); -+ scatterwalk_pagedone(&walk, 0, assoclen); -+ if (need_resched()) { -+ kernel_fpu_end(); -+ kernel_fpu_begin(); -+ } -+ } -+ if (unlikely(pos)) -+ aes_gcm_aad_update(key, ghash_acc, buf, pos, flags); -+} -+ -+ -+/* __always_inline to optimize out the branches based on @flags */ -+static __always_inline int -+gcm_crypt(struct aead_request *req, int flags) -+{ -+ struct crypto_aead *tfm = crypto_aead_reqtfm(req); -+ const struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags); -+ unsigned int assoclen = req->assoclen; -+ struct skcipher_walk walk; -+ unsigned int nbytes; -+ u8 ghash_acc[16]; /* GHASH accumulator */ -+ u32 le_ctr[4]; /* Counter in little-endian format */ -+ int taglen; -+ int err; -+ -+ /* Initialize the counter and determine the associated data length. */ -+ le_ctr[0] = 2; -+ if (flags & FLAG_RFC4106) { -+ if (unlikely(assoclen != 16 && assoclen != 20)) -+ return -EINVAL; -+ assoclen -= 8; -+ le_ctr[1] = get_unaligned_be32(req->iv + 4); -+ le_ctr[2] = get_unaligned_be32(req->iv + 0); -+ le_ctr[3] = key->rfc4106_nonce; /* already byte-swapped */ -+ } else { -+ le_ctr[1] = get_unaligned_be32(req->iv + 8); -+ le_ctr[2] = get_unaligned_be32(req->iv + 4); -+ le_ctr[3] = get_unaligned_be32(req->iv + 0); -+ } -+ -+ /* Begin walking through the plaintext or ciphertext. */ -+ if (flags & FLAG_ENC) -+ err = skcipher_walk_aead_encrypt(&walk, req, false); -+ else -+ err = skcipher_walk_aead_decrypt(&walk, req, false); -+ -+ /* -+ * Since the AES-GCM assembly code requires that at least three assembly -+ * functions be called to process any message (this is needed to support -+ * incremental updates cleanly), to reduce overhead we try to do all -+ * three calls in the same kernel FPU section if possible. We close the -+ * section and start a new one if there are multiple data segments or if -+ * rescheduling is needed while processing the associated data. -+ */ -+ kernel_fpu_begin(); -+ -+ /* Pass the associated data through GHASH. */ -+ gcm_process_assoc(key, ghash_acc, req->src, assoclen, flags); -+ -+ /* En/decrypt the data and pass the ciphertext through GHASH. */ -+ while ((nbytes = walk.nbytes) != 0) { -+ if (unlikely(nbytes < walk.total)) { -+ /* -+ * Non-last segment. In this case, the assembly -+ * function requires that the length be a multiple of 16 -+ * (AES_BLOCK_SIZE) bytes. The needed buffering of up -+ * to 16 bytes is handled by the skcipher_walk. Here we -+ * just need to round down to a multiple of 16. -+ */ -+ nbytes = round_down(nbytes, AES_BLOCK_SIZE); -+ aes_gcm_update(key, le_ctr, ghash_acc, -+ walk.src.virt.addr, walk.dst.virt.addr, -+ nbytes, flags); -+ le_ctr[0] += nbytes / AES_BLOCK_SIZE; -+ kernel_fpu_end(); -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -+ kernel_fpu_begin(); -+ } else { -+ /* Last segment: process all remaining data. */ -+ aes_gcm_update(key, le_ctr, ghash_acc, -+ walk.src.virt.addr, walk.dst.virt.addr, -+ nbytes, flags); -+ err = skcipher_walk_done(&walk, 0); -+ /* -+ * The low word of the counter isn't used by the -+ * finalize, so there's no need to increment it here. -+ */ -+ } -+ } -+ if (err) -+ goto out; -+ -+ /* Finalize */ -+ taglen = crypto_aead_authsize(tfm); -+ if (flags & FLAG_ENC) { -+ /* Finish computing the auth tag. */ -+ aes_gcm_enc_final(key, le_ctr, ghash_acc, assoclen, -+ req->cryptlen, flags); -+ -+ /* Store the computed auth tag in the dst scatterlist. */ -+ scatterwalk_map_and_copy(ghash_acc, req->dst, req->assoclen + -+ req->cryptlen, taglen, 1); -+ } else { -+ unsigned int datalen = req->cryptlen - taglen; -+ u8 tag[16]; -+ -+ /* Get the transmitted auth tag from the src scatterlist. */ -+ scatterwalk_map_and_copy(tag, req->src, req->assoclen + datalen, -+ taglen, 0); -+ /* -+ * Finish computing the auth tag and compare it to the -+ * transmitted one. The assembly function does the actual tag -+ * comparison. Here, just check the boolean result. -+ */ -+ if (!aes_gcm_dec_final(key, le_ctr, ghash_acc, assoclen, -+ datalen, tag, taglen, flags)) -+ err = -EBADMSG; -+ } -+out: -+ kernel_fpu_end(); -+ return err; -+} -+ -+#define DEFINE_GCM_ALGS(suffix, flags, generic_driver_name, rfc_driver_name, \ -+ ctxsize, priority) \ -+ \ -+static int gcm_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \ -+ unsigned int keylen) \ -+{ \ -+ return gcm_setkey(tfm, raw_key, keylen, (flags)); \ -+} \ -+ \ -+static int gcm_encrypt_##suffix(struct aead_request *req) \ -+{ \ -+ return gcm_crypt(req, (flags) | FLAG_ENC); \ -+} \ -+ \ -+static int gcm_decrypt_##suffix(struct aead_request *req) \ -+{ \ -+ return gcm_crypt(req, (flags)); \ -+} \ -+ \ -+static int rfc4106_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \ -+ unsigned int keylen) \ -+{ \ -+ return gcm_setkey(tfm, raw_key, keylen, (flags) | FLAG_RFC4106); \ -+} \ -+ \ -+static int rfc4106_encrypt_##suffix(struct aead_request *req) \ -+{ \ -+ return gcm_crypt(req, (flags) | FLAG_RFC4106 | FLAG_ENC); \ -+} \ -+ \ -+static int rfc4106_decrypt_##suffix(struct aead_request *req) \ -+{ \ -+ return gcm_crypt(req, (flags) | FLAG_RFC4106); \ -+} \ -+ \ -+static struct aead_alg aes_gcm_algs_##suffix[] = { { \ -+ .setkey = gcm_setkey_##suffix, \ -+ .setauthsize = generic_gcmaes_set_authsize, \ -+ .encrypt = gcm_encrypt_##suffix, \ -+ .decrypt = gcm_decrypt_##suffix, \ -+ .ivsize = GCM_AES_IV_SIZE, \ -+ .chunksize = AES_BLOCK_SIZE, \ -+ .maxauthsize = 16, \ -+ .base = { \ -+ .cra_name = "__gcm(aes)", \ -+ .cra_driver_name = "__" generic_driver_name, \ -+ .cra_priority = (priority), \ -+ .cra_flags = CRYPTO_ALG_INTERNAL, \ -+ .cra_blocksize = 1, \ -+ .cra_ctxsize = (ctxsize), \ -+ .cra_module = THIS_MODULE, \ -+ }, \ -+}, { \ -+ .setkey = rfc4106_setkey_##suffix, \ -+ .setauthsize = common_rfc4106_set_authsize, \ -+ .encrypt = rfc4106_encrypt_##suffix, \ -+ .decrypt = rfc4106_decrypt_##suffix, \ -+ .ivsize = GCM_RFC4106_IV_SIZE, \ -+ .chunksize = AES_BLOCK_SIZE, \ -+ .maxauthsize = 16, \ -+ .base = { \ -+ .cra_name = "__rfc4106(gcm(aes))", \ -+ .cra_driver_name = "__" rfc_driver_name, \ -+ .cra_priority = (priority), \ -+ .cra_flags = CRYPTO_ALG_INTERNAL, \ -+ .cra_blocksize = 1, \ -+ .cra_ctxsize = (ctxsize), \ -+ .cra_module = THIS_MODULE, \ -+ }, \ -+} }; \ -+ \ -+static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2] \ -+ -+/* aes_gcm_algs_aesni */ -+DEFINE_GCM_ALGS(aesni, /* no flags */ 0, -+ "generic-gcm-aesni", "rfc4106-gcm-aesni", -+ AES_GCM_KEY_AESNI_SIZE, 400); -+ -+/* aes_gcm_algs_aesni_avx */ -+DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX, -+ "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx", -+ AES_GCM_KEY_AESNI_SIZE, 500); -+ -+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -+/* aes_gcm_algs_vaes_avx10_256 */ -+DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, -+ "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256", -+ AES_GCM_KEY_AVX10_SIZE, 700); -+ -+/* aes_gcm_algs_vaes_avx10_512 */ -+DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, -+ "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", -+ AES_GCM_KEY_AVX10_SIZE, 800); -+#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ -+ - /* - * This is a list of CPU models that are known to suffer from downclocking when -- * zmm registers (512-bit vectors) are used. On these CPUs, the AES-XTS -- * implementation with zmm registers won't be used by default. An -- * implementation with ymm registers (256-bit vectors) will be used instead. -+ * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode -+ * implementations with zmm registers won't be used by default. Implementations -+ * with ymm registers (256-bit vectors) will be used by default instead. - */ - static const struct x86_cpu_id zmm_exclusion_list[] = { - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), -@@ -1236,7 +1559,7 @@ static const struct x86_cpu_id zmm_exclusion_list[] = { - {}, - }; - --static int __init register_xts_algs(void) -+static int __init register_avx_algs(void) - { - int err; - -@@ -1246,6 +1569,11 @@ static int __init register_xts_algs(void) - &aes_xts_simdalg_aesni_avx); - if (err) - return err; -+ err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx, -+ ARRAY_SIZE(aes_gcm_algs_aesni_avx), -+ aes_gcm_simdalgs_aesni_avx); -+ if (err) -+ return err; - #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_VAES) || -@@ -1269,23 +1597,42 @@ static int __init register_xts_algs(void) - &aes_xts_simdalg_vaes_avx10_256); - if (err) - return err; -+ err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256, -+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), -+ aes_gcm_simdalgs_vaes_avx10_256); -+ if (err) -+ return err; -+ -+ if (x86_match_cpu(zmm_exclusion_list)) { -+ int i; - -- if (x86_match_cpu(zmm_exclusion_list)) - aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; -+ for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) -+ aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; -+ } - - err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, - &aes_xts_simdalg_vaes_avx10_512); - if (err) - return err; -+ err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512, -+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512), -+ aes_gcm_simdalgs_vaes_avx10_512); -+ if (err) -+ return err; - #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ - return 0; - } - --static void unregister_xts_algs(void) -+static void unregister_avx_algs(void) - { - if (aes_xts_simdalg_aesni_avx) - simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, - &aes_xts_simdalg_aesni_avx); -+ if (aes_gcm_simdalgs_aesni_avx[0]) -+ simd_unregister_aeads(aes_gcm_algs_aesni_avx, -+ ARRAY_SIZE(aes_gcm_algs_aesni_avx), -+ aes_gcm_simdalgs_aesni_avx); - #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) - if (aes_xts_simdalg_vaes_avx2) - simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, -@@ -1293,106 +1640,33 @@ static void unregister_xts_algs(void) - if (aes_xts_simdalg_vaes_avx10_256) - simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, - &aes_xts_simdalg_vaes_avx10_256); -+ if (aes_gcm_simdalgs_vaes_avx10_256[0]) -+ simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256, -+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), -+ aes_gcm_simdalgs_vaes_avx10_256); - if (aes_xts_simdalg_vaes_avx10_512) - simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, - &aes_xts_simdalg_vaes_avx10_512); -+ if (aes_gcm_simdalgs_vaes_avx10_512[0]) -+ simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512, -+ ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512), -+ aes_gcm_simdalgs_vaes_avx10_512); - #endif - } - #else /* CONFIG_X86_64 */ --static int __init register_xts_algs(void) -+static struct aead_alg aes_gcm_algs_aesni[0]; -+static struct simd_aead_alg *aes_gcm_simdalgs_aesni[0]; -+ -+static int __init register_avx_algs(void) - { - return 0; - } - --static void unregister_xts_algs(void) -+static void unregister_avx_algs(void) - { - } - #endif /* !CONFIG_X86_64 */ - --#ifdef CONFIG_X86_64 --static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, -- unsigned int key_len) --{ -- struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); -- -- return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: -- aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded, -- ctx->hash_subkey); --} -- --static int generic_gcmaes_encrypt(struct aead_request *req) --{ -- struct crypto_aead *tfm = crypto_aead_reqtfm(req); -- struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); -- void *aes_ctx = &(ctx->aes_key_expanded); -- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); -- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); -- __be32 counter = cpu_to_be32(1); -- -- memcpy(iv, req->iv, 12); -- *((__be32 *)(iv+12)) = counter; -- -- return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv, -- aes_ctx); --} -- --static int generic_gcmaes_decrypt(struct aead_request *req) --{ -- __be32 counter = cpu_to_be32(1); -- struct crypto_aead *tfm = crypto_aead_reqtfm(req); -- struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); -- void *aes_ctx = &(ctx->aes_key_expanded); -- u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); -- u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); -- -- memcpy(iv, req->iv, 12); -- *((__be32 *)(iv+12)) = counter; -- -- return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv, -- aes_ctx); --} -- --static struct aead_alg aesni_aeads[] = { { -- .setkey = common_rfc4106_set_key, -- .setauthsize = common_rfc4106_set_authsize, -- .encrypt = helper_rfc4106_encrypt, -- .decrypt = helper_rfc4106_decrypt, -- .ivsize = GCM_RFC4106_IV_SIZE, -- .maxauthsize = 16, -- .base = { -- .cra_name = "__rfc4106(gcm(aes))", -- .cra_driver_name = "__rfc4106-gcm-aesni", -- .cra_priority = 400, -- .cra_flags = CRYPTO_ALG_INTERNAL, -- .cra_blocksize = 1, -- .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx), -- .cra_alignmask = 0, -- .cra_module = THIS_MODULE, -- }, --}, { -- .setkey = generic_gcmaes_set_key, -- .setauthsize = generic_gcmaes_set_authsize, -- .encrypt = generic_gcmaes_encrypt, -- .decrypt = generic_gcmaes_decrypt, -- .ivsize = GCM_AES_IV_SIZE, -- .maxauthsize = 16, -- .base = { -- .cra_name = "__gcm(aes)", -- .cra_driver_name = "__generic-gcm-aesni", -- .cra_priority = 400, -- .cra_flags = CRYPTO_ALG_INTERNAL, -- .cra_blocksize = 1, -- .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), -- .cra_alignmask = 0, -- .cra_module = THIS_MODULE, -- }, --} }; --#else --static struct aead_alg aesni_aeads[0]; --#endif -- --static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; -- - static const struct x86_cpu_id aesni_cpu_id[] = { - X86_MATCH_FEATURE(X86_FEATURE_AES, NULL), - {} -@@ -1406,17 +1680,6 @@ static int __init aesni_init(void) - if (!x86_match_cpu(aesni_cpu_id)) - return -ENODEV; - #ifdef CONFIG_X86_64 -- if (boot_cpu_has(X86_FEATURE_AVX2)) { -- pr_info("AVX2 version of gcm_enc/dec engaged.\n"); -- static_branch_enable(&gcm_use_avx); -- static_branch_enable(&gcm_use_avx2); -- } else -- if (boot_cpu_has(X86_FEATURE_AVX)) { -- pr_info("AVX version of gcm_enc/dec engaged.\n"); -- static_branch_enable(&gcm_use_avx); -- } else { -- pr_info("SSE version of gcm_enc/dec engaged.\n"); -- } - if (boot_cpu_has(X86_FEATURE_AVX)) { - /* optimize performance of ctr mode encryption transform */ - static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm); -@@ -1434,8 +1697,9 @@ static int __init aesni_init(void) - if (err) - goto unregister_cipher; - -- err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads), -- aesni_simd_aeads); -+ err = simd_register_aeads_compat(aes_gcm_algs_aesni, -+ ARRAY_SIZE(aes_gcm_algs_aesni), -+ aes_gcm_simdalgs_aesni); - if (err) - goto unregister_skciphers; - -@@ -1447,22 +1711,22 @@ static int __init aesni_init(void) - goto unregister_aeads; - #endif /* CONFIG_X86_64 */ - -- err = register_xts_algs(); -+ err = register_avx_algs(); - if (err) -- goto unregister_xts; -+ goto unregister_avx; - - return 0; - --unregister_xts: -- unregister_xts_algs(); -+unregister_avx: -+ unregister_avx_algs(); - #ifdef CONFIG_X86_64 - if (aesni_simd_xctr) - simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); - unregister_aeads: - #endif /* CONFIG_X86_64 */ -- simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), -- aesni_simd_aeads); -- -+ simd_unregister_aeads(aes_gcm_algs_aesni, -+ ARRAY_SIZE(aes_gcm_algs_aesni), -+ aes_gcm_simdalgs_aesni); - unregister_skciphers: - simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), - aesni_simd_skciphers); -@@ -1473,8 +1737,9 @@ static int __init aesni_init(void) - - static void __exit aesni_exit(void) - { -- simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), -- aesni_simd_aeads); -+ simd_unregister_aeads(aes_gcm_algs_aesni, -+ ARRAY_SIZE(aes_gcm_algs_aesni), -+ aes_gcm_simdalgs_aesni); - simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), - aesni_simd_skciphers); - crypto_unregister_alg(&aesni_cipher_alg); -@@ -1482,7 +1747,7 @@ static void __exit aesni_exit(void) - if (boot_cpu_has(X86_FEATURE_AVX)) - simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); - #endif /* CONFIG_X86_64 */ -- unregister_xts_algs(); -+ unregister_avx_algs(); - } - - late_initcall(aesni_init); --- -2.46.0.rc1 - -From 3a6187f4ef69fa4f0bf82ee5138e23bd83b85691 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:25:57 +0200 -Subject: [PATCH 06/11] fixes - -Signed-off-by: Peter Jung ---- - arch/Kconfig | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/Kconfig b/arch/Kconfig -index 975dd22a2dbd..de69b8f5b5be 100644 ---- a/arch/Kconfig -+++ b/arch/Kconfig -@@ -1050,7 +1050,7 @@ config ARCH_MMAP_RND_BITS - int "Number of bits to use for ASLR of mmap base address" if EXPERT - range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX - default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT -- default ARCH_MMAP_RND_BITS_MIN -+ default ARCH_MMAP_RND_BITS_MAX - depends on HAVE_ARCH_MMAP_RND_BITS - help - This value can be used to select the number of bits to use to -@@ -1084,7 +1084,7 @@ config ARCH_MMAP_RND_COMPAT_BITS - int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT - range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX - default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT -- default ARCH_MMAP_RND_COMPAT_BITS_MIN -+ default ARCH_MMAP_RND_COMPAT_BITS_MAX - depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS - help - This value can be used to select the number of bits to use to --- -2.46.0.rc1 - -From 33ec19c577f867fff299c3b0ed6d84f14cdc23ad Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:26:09 +0200 -Subject: [PATCH 07/11] ksm - -Signed-off-by: Peter Jung ---- - arch/alpha/kernel/syscalls/syscall.tbl | 3 + - arch/arm/tools/syscall.tbl | 3 + - arch/arm64/include/asm/unistd.h | 2 +- - arch/arm64/include/asm/unistd32.h | 6 + - arch/m68k/kernel/syscalls/syscall.tbl | 3 + - arch/microblaze/kernel/syscalls/syscall.tbl | 3 + - arch/mips/kernel/syscalls/syscall_n32.tbl | 3 + - arch/mips/kernel/syscalls/syscall_n64.tbl | 3 + - arch/mips/kernel/syscalls/syscall_o32.tbl | 3 + - arch/parisc/kernel/syscalls/syscall.tbl | 3 + - arch/powerpc/kernel/syscalls/syscall.tbl | 3 + - arch/s390/kernel/syscalls/syscall.tbl | 3 + - arch/sh/kernel/syscalls/syscall.tbl | 3 + - arch/sparc/kernel/syscalls/syscall.tbl | 3 + - arch/x86/entry/syscalls/syscall_32.tbl | 3 + - arch/x86/entry/syscalls/syscall_64.tbl | 3 + - arch/xtensa/kernel/syscalls/syscall.tbl | 3 + - include/linux/syscalls.h | 3 + - include/uapi/asm-generic/unistd.h | 11 +- - kernel/sys.c | 147 ++++++++++++++++++++ - kernel/sys_ni.c | 3 + - 21 files changed, 215 insertions(+), 2 deletions(-) - -diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl -index 74720667fe09..e6a11f3c0a2e 100644 ---- a/arch/alpha/kernel/syscalls/syscall.tbl -+++ b/arch/alpha/kernel/syscalls/syscall.tbl -@@ -502,3 +502,6 @@ - 570 common lsm_set_self_attr sys_lsm_set_self_attr - 571 common lsm_list_modules sys_lsm_list_modules - 572 common mseal sys_mseal -+573 common process_ksm_enable sys_process_ksm_enable -+574 common process_ksm_disable sys_process_ksm_disable -+575 common process_ksm_status sys_process_ksm_status -diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl -index 2ed7d229c8f9..3f59e9c5c1ff 100644 ---- a/arch/arm/tools/syscall.tbl -+++ b/arch/arm/tools/syscall.tbl -@@ -476,3 +476,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h -index 1346579f802f..f3a77719eb05 100644 ---- a/arch/arm64/include/asm/unistd.h -+++ b/arch/arm64/include/asm/unistd.h -@@ -39,7 +39,7 @@ - #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) - #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) - --#define __NR_compat_syscalls 463 -+#define __NR_compat_syscalls 466 - #endif - - #define __ARCH_WANT_SYS_CLONE -diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h -index 1386e8e751f2..ccdc523fa4bd 100644 ---- a/arch/arm64/include/asm/unistd32.h -+++ b/arch/arm64/include/asm/unistd32.h -@@ -931,6 +931,12 @@ __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) - __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) - #define __NR_mseal 462 - __SYSCALL(__NR_mseal, sys_mseal) -+#define __NR_process_ksm_enable 463 -+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable) -+#define __NR_process_ksm_disable 464 -+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable) -+#define __NR_process_ksm_status 465 -+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status) - - /* - * Please add new compat syscalls above this comment and update -diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl -index 22a3cbd4c602..12d2c7594bf0 100644 ---- a/arch/m68k/kernel/syscalls/syscall.tbl -+++ b/arch/m68k/kernel/syscalls/syscall.tbl -@@ -462,3 +462,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl -index 2b81a6bd78b2..e2a93c856eed 100644 ---- a/arch/microblaze/kernel/syscalls/syscall.tbl -+++ b/arch/microblaze/kernel/syscalls/syscall.tbl -@@ -468,3 +468,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl -index 953f5b7dc723..b921fbf56fa6 100644 ---- a/arch/mips/kernel/syscalls/syscall_n32.tbl -+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl -@@ -401,3 +401,6 @@ - 460 n32 lsm_set_self_attr sys_lsm_set_self_attr - 461 n32 lsm_list_modules sys_lsm_list_modules - 462 n32 mseal sys_mseal -+463 n32 process_ksm_enable sys_process_ksm_enable -+464 n32 process_ksm_disable sys_process_ksm_disable -+465 n32 process_ksm_status sys_process_ksm_status -diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl -index 1464c6be6eb3..8d7f9ddd66f4 100644 ---- a/arch/mips/kernel/syscalls/syscall_n64.tbl -+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl -@@ -377,3 +377,6 @@ - 460 n64 lsm_set_self_attr sys_lsm_set_self_attr - 461 n64 lsm_list_modules sys_lsm_list_modules - 462 n64 mseal sys_mseal -+463 n64 process_ksm_enable sys_process_ksm_enable -+464 n64 process_ksm_disable sys_process_ksm_disable -+465 n64 process_ksm_status sys_process_ksm_status -diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl -index 2439a2491cff..9d6142739954 100644 ---- a/arch/mips/kernel/syscalls/syscall_o32.tbl -+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl -@@ -450,3 +450,6 @@ - 460 o32 lsm_set_self_attr sys_lsm_set_self_attr - 461 o32 lsm_list_modules sys_lsm_list_modules - 462 o32 mseal sys_mseal -+463 o32 process_ksm_enable sys_process_ksm_enable -+464 o32 process_ksm_disable sys_process_ksm_disable -+465 o32 process_ksm_status sys_process_ksm_status -diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl -index 66dc406b12e4..9d46476fd908 100644 ---- a/arch/parisc/kernel/syscalls/syscall.tbl -+++ b/arch/parisc/kernel/syscalls/syscall.tbl -@@ -461,3 +461,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl -index ebae8415dfbb..16f71bc2f6f0 100644 ---- a/arch/powerpc/kernel/syscalls/syscall.tbl -+++ b/arch/powerpc/kernel/syscalls/syscall.tbl -@@ -553,3 +553,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl -index 01071182763e..7394bad8178e 100644 ---- a/arch/s390/kernel/syscalls/syscall.tbl -+++ b/arch/s390/kernel/syscalls/syscall.tbl -@@ -465,3 +465,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status -diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl -index c55fd7696d40..b9fc31221b87 100644 ---- a/arch/sh/kernel/syscalls/syscall.tbl -+++ b/arch/sh/kernel/syscalls/syscall.tbl -@@ -466,3 +466,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl -index cfdfb3707c16..0d79fd772854 100644 ---- a/arch/sparc/kernel/syscalls/syscall.tbl -+++ b/arch/sparc/kernel/syscalls/syscall.tbl -@@ -508,3 +508,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl -index d6ebcab1d8b2..ae5d147f05f2 100644 ---- a/arch/x86/entry/syscalls/syscall_32.tbl -+++ b/arch/x86/entry/syscalls/syscall_32.tbl -@@ -467,3 +467,6 @@ - 460 i386 lsm_set_self_attr sys_lsm_set_self_attr - 461 i386 lsm_list_modules sys_lsm_list_modules - 462 i386 mseal sys_mseal -+463 i386 process_ksm_enable sys_process_ksm_enable -+464 i386 process_ksm_disable sys_process_ksm_disable -+465 i386 process_ksm_status sys_process_ksm_status -diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl -index a396f6e6ab5b..472c23b39a70 100644 ---- a/arch/x86/entry/syscalls/syscall_64.tbl -+++ b/arch/x86/entry/syscalls/syscall_64.tbl -@@ -384,6 +384,9 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status - - # - # Due to a historical design error, certain syscalls are numbered differently -diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl -index 67083fc1b2f5..c1aecee4ad9b 100644 ---- a/arch/xtensa/kernel/syscalls/syscall.tbl -+++ b/arch/xtensa/kernel/syscalls/syscall.tbl -@@ -433,3 +433,6 @@ - 460 common lsm_set_self_attr sys_lsm_set_self_attr - 461 common lsm_list_modules sys_lsm_list_modules - 462 common mseal sys_mseal -+463 common process_ksm_enable sys_process_ksm_enable -+464 common process_ksm_disable sys_process_ksm_disable -+465 common process_ksm_status sys_process_ksm_status -diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h -index fff820c3e93e..ab7d77ddc112 100644 ---- a/include/linux/syscalls.h -+++ b/include/linux/syscalls.h -@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); - asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, - size_t vlen, int behavior, unsigned int flags); - asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); -+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags); -+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags); -+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags); - asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long prot, unsigned long pgoff, - unsigned long flags); -diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h -index d4cc26932ff4..d191548f6326 100644 ---- a/include/uapi/asm-generic/unistd.h -+++ b/include/uapi/asm-generic/unistd.h -@@ -845,8 +845,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) - #define __NR_mseal 462 - __SYSCALL(__NR_mseal, sys_mseal) - -+#define __NR_process_ksm_enable 463 -+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable) -+ -+#define __NR_process_ksm_disable 464 -+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable) -+ -+#define __NR_process_ksm_status 465 -+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status) -+ - #undef __NR_syscalls --#define __NR_syscalls 463 -+#define __NR_syscalls 466 - - /* - * 32 bit systems traditionally used different -diff --git a/kernel/sys.c b/kernel/sys.c -index 3a2df1bd9f64..86c6dd9d8c84 100644 ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -2789,6 +2789,153 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, - return error; - } - -+#ifdef CONFIG_KSM -+enum pkc_action { -+ PKSM_ENABLE = 0, -+ PKSM_DISABLE, -+ PKSM_STATUS, -+}; -+ -+static long do_process_ksm_control(int pidfd, enum pkc_action action) -+{ -+ long ret; -+ struct pid *pid; -+ struct task_struct *task; -+ struct mm_struct *mm; -+ unsigned int f_flags; -+ -+ pid = pidfd_get_pid(pidfd, &f_flags); -+ if (IS_ERR(pid)) { -+ ret = PTR_ERR(pid); -+ goto out; -+ } -+ -+ task = get_pid_task(pid, PIDTYPE_PID); -+ if (!task) { -+ ret = -ESRCH; -+ goto put_pid; -+ } -+ -+ /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ -+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); -+ if (IS_ERR_OR_NULL(mm)) { -+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; -+ goto release_task; -+ } -+ -+ /* Require CAP_SYS_NICE for influencing process performance. */ -+ if (!capable(CAP_SYS_NICE)) { -+ ret = -EPERM; -+ goto release_mm; -+ } -+ -+ if (mmap_write_lock_killable(mm)) { -+ ret = -EINTR; -+ goto release_mm; -+ } -+ -+ switch (action) { -+ case PKSM_ENABLE: -+ ret = ksm_enable_merge_any(mm); -+ break; -+ case PKSM_DISABLE: -+ ret = ksm_disable_merge_any(mm); -+ break; -+ case PKSM_STATUS: -+ ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags); -+ break; -+ } -+ -+ mmap_write_unlock(mm); -+ -+release_mm: -+ mmput(mm); -+release_task: -+ put_task_struct(task); -+put_pid: -+ put_pid(pid); -+out: -+ return ret; -+} -+#endif /* CONFIG_KSM */ -+ -+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags) -+{ -+#ifdef CONFIG_KSM -+ if (flags != 0) -+ return -EINVAL; -+ -+ return do_process_ksm_control(pidfd, PKSM_ENABLE); -+#else /* CONFIG_KSM */ -+ return -ENOSYS; -+#endif /* CONFIG_KSM */ -+} -+ -+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags) -+{ -+#ifdef CONFIG_KSM -+ if (flags != 0) -+ return -EINVAL; -+ -+ return do_process_ksm_control(pidfd, PKSM_DISABLE); -+#else /* CONFIG_KSM */ -+ return -ENOSYS; -+#endif /* CONFIG_KSM */ -+} -+ -+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags) -+{ -+#ifdef CONFIG_KSM -+ if (flags != 0) -+ return -EINVAL; -+ -+ return do_process_ksm_control(pidfd, PKSM_STATUS); -+#else /* CONFIG_KSM */ -+ return -ENOSYS; -+#endif /* CONFIG_KSM */ -+} -+ -+#ifdef CONFIG_KSM -+static ssize_t process_ksm_enable_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", __NR_process_ksm_enable); -+} -+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable); -+ -+static ssize_t process_ksm_disable_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", __NR_process_ksm_disable); -+} -+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable); -+ -+static ssize_t process_ksm_status_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", __NR_process_ksm_status); -+} -+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status); -+ -+static struct attribute *process_ksm_sysfs_attrs[] = { -+ &process_ksm_enable_attr.attr, -+ &process_ksm_disable_attr.attr, -+ &process_ksm_status_attr.attr, -+ NULL, -+}; -+ -+static const struct attribute_group process_ksm_sysfs_attr_group = { -+ .attrs = process_ksm_sysfs_attrs, -+ .name = "process_ksm", -+}; -+ -+static int __init process_ksm_sysfs_init(void) -+{ -+ return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group); -+} -+subsys_initcall(process_ksm_sysfs_init); -+#endif /* CONFIG_KSM */ -+ - SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, - struct getcpu_cache __user *, unused) - { -diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c -index b696b85ac63e..cf7f3d841b1e 100644 ---- a/kernel/sys_ni.c -+++ b/kernel/sys_ni.c -@@ -188,6 +188,9 @@ COND_SYSCALL(mincore); - COND_SYSCALL(madvise); - COND_SYSCALL(process_madvise); - COND_SYSCALL(process_mrelease); -+COND_SYSCALL(process_ksm_enable); -+COND_SYSCALL(process_ksm_disable); -+COND_SYSCALL(process_ksm_status); - COND_SYSCALL(remap_file_pages); - COND_SYSCALL(mbind); - COND_SYSCALL(get_mempolicy); --- -2.46.0.rc1 - -From d0a6d18c3ce077b9b944a383d001bc4a8b907006 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:26:39 +0200 -Subject: [PATCH 08/11] ntsync - -Signed-off-by: Peter Jung ---- - Documentation/userspace-api/index.rst | 1 + - Documentation/userspace-api/ntsync.rst | 398 +++++ - MAINTAINERS | 9 + - drivers/misc/Kconfig | 1 - - drivers/misc/ntsync.c | 989 +++++++++++- - include/uapi/linux/ntsync.h | 39 + - tools/testing/selftests/Makefile | 1 + - .../selftests/drivers/ntsync/.gitignore | 1 + - .../testing/selftests/drivers/ntsync/Makefile | 7 + - tools/testing/selftests/drivers/ntsync/config | 1 + - .../testing/selftests/drivers/ntsync/ntsync.c | 1407 +++++++++++++++++ - 11 files changed, 2850 insertions(+), 4 deletions(-) - create mode 100644 Documentation/userspace-api/ntsync.rst - create mode 100644 tools/testing/selftests/drivers/ntsync/.gitignore - create mode 100644 tools/testing/selftests/drivers/ntsync/Makefile - create mode 100644 tools/testing/selftests/drivers/ntsync/config - create mode 100644 tools/testing/selftests/drivers/ntsync/ntsync.c - -diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst -index 8a251d71fa6e..02bea81fb4bf 100644 ---- a/Documentation/userspace-api/index.rst -+++ b/Documentation/userspace-api/index.rst -@@ -64,6 +64,7 @@ Everything else - vduse - futex2 - perf_ring_buffer -+ ntsync - - .. only:: subproject and html - -diff --git a/Documentation/userspace-api/ntsync.rst b/Documentation/userspace-api/ntsync.rst -new file mode 100644 -index 000000000000..767844637a7d ---- /dev/null -+++ b/Documentation/userspace-api/ntsync.rst -@@ -0,0 +1,398 @@ -+=================================== -+NT synchronization primitive driver -+=================================== -+ -+This page documents the user-space API for the ntsync driver. -+ -+ntsync is a support driver for emulation of NT synchronization -+primitives by user-space NT emulators. It exists because implementation -+in user-space, using existing tools, cannot match Windows performance -+while offering accurate semantics. It is implemented entirely in -+software, and does not drive any hardware device. -+ -+This interface is meant as a compatibility tool only, and should not -+be used for general synchronization. Instead use generic, versatile -+interfaces such as futex(2) and poll(2). -+ -+Synchronization primitives -+========================== -+ -+The ntsync driver exposes three types of synchronization primitives: -+semaphores, mutexes, and events. -+ -+A semaphore holds a single volatile 32-bit counter, and a static 32-bit -+integer denoting the maximum value. It is considered signaled (that is, -+can be acquired without contention, or will wake up a waiting thread) -+when the counter is nonzero. The counter is decremented by one when a -+wait is satisfied. Both the initial and maximum count are established -+when the semaphore is created. -+ -+A mutex holds a volatile 32-bit recursion count, and a volatile 32-bit -+identifier denoting its owner. A mutex is considered signaled when its -+owner is zero (indicating that it is not owned). The recursion count is -+incremented when a wait is satisfied, and ownership is set to the given -+identifier. -+ -+A mutex also holds an internal flag denoting whether its previous owner -+has died; such a mutex is said to be abandoned. Owner death is not -+tracked automatically based on thread death, but rather must be -+communicated using ``NTSYNC_IOC_MUTEX_KILL``. An abandoned mutex is -+inherently considered unowned. -+ -+Except for the "unowned" semantics of zero, the actual value of the -+owner identifier is not interpreted by the ntsync driver at all. The -+intended use is to store a thread identifier; however, the ntsync -+driver does not actually validate that a calling thread provides -+consistent or unique identifiers. -+ -+An event is similar to a semaphore with a maximum count of one. It holds -+a volatile boolean state denoting whether it is signaled or not. There -+are two types of events, auto-reset and manual-reset. An auto-reset -+event is designaled when a wait is satisfied; a manual-reset event is -+not. The event type is specified when the event is created. -+ -+Unless specified otherwise, all operations on an object are atomic and -+totally ordered with respect to other operations on the same object. -+ -+Objects are represented by files. When all file descriptors to an -+object are closed, that object is deleted. -+ -+Char device -+=========== -+ -+The ntsync driver creates a single char device /dev/ntsync. Each file -+description opened on the device represents a unique instance intended -+to back an individual NT virtual machine. Objects created by one ntsync -+instance may only be used with other objects created by the same -+instance. -+ -+ioctl reference -+=============== -+ -+All operations on the device are done through ioctls. There are four -+structures used in ioctl calls:: -+ -+ struct ntsync_sem_args { -+ __u32 sem; -+ __u32 count; -+ __u32 max; -+ }; -+ -+ struct ntsync_mutex_args { -+ __u32 mutex; -+ __u32 owner; -+ __u32 count; -+ }; -+ -+ struct ntsync_event_args { -+ __u32 event; -+ __u32 signaled; -+ __u32 manual; -+ }; -+ -+ struct ntsync_wait_args { -+ __u64 timeout; -+ __u64 objs; -+ __u32 count; -+ __u32 owner; -+ __u32 index; -+ __u32 alert; -+ __u32 flags; -+ __u32 pad; -+ }; -+ -+Depending on the ioctl, members of the structure may be used as input, -+output, or not at all. All ioctls return 0 on success. -+ -+The ioctls on the device file are as follows: -+ -+.. c:macro:: NTSYNC_IOC_CREATE_SEM -+ -+ Create a semaphore object. Takes a pointer to struct -+ :c:type:`ntsync_sem_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``sem`` -+ - On output, contains a file descriptor to the created semaphore. -+ * - ``count`` -+ - Initial count of the semaphore. -+ * - ``max`` -+ - Maximum count of the semaphore. -+ -+ Fails with ``EINVAL`` if ``count`` is greater than ``max``. -+ -+.. c:macro:: NTSYNC_IOC_CREATE_MUTEX -+ -+ Create a mutex object. Takes a pointer to struct -+ :c:type:`ntsync_mutex_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``mutex`` -+ - On output, contains a file descriptor to the created mutex. -+ * - ``count`` -+ - Initial recursion count of the mutex. -+ * - ``owner`` -+ - Initial owner of the mutex. -+ -+ If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is -+ zero and ``count`` is nonzero, the function fails with ``EINVAL``. -+ -+.. c:macro:: NTSYNC_IOC_CREATE_EVENT -+ -+ Create an event object. Takes a pointer to struct -+ :c:type:`ntsync_event_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``event`` -+ - On output, contains a file descriptor to the created event. -+ * - ``signaled`` -+ - If nonzero, the event is initially signaled, otherwise -+ nonsignaled. -+ * - ``manual`` -+ - If nonzero, the event is a manual-reset event, otherwise -+ auto-reset. -+ -+The ioctls on the individual objects are as follows: -+ -+.. c:macro:: NTSYNC_IOC_SEM_POST -+ -+ Post to a semaphore object. Takes a pointer to a 32-bit integer, -+ which on input holds the count to be added to the semaphore, and on -+ output contains its previous count. -+ -+ If adding to the semaphore's current count would raise the latter -+ past the semaphore's maximum count, the ioctl fails with -+ ``EOVERFLOW`` and the semaphore is not affected. If raising the -+ semaphore's count causes it to become signaled, eligible threads -+ waiting on this semaphore will be woken and the semaphore's count -+ decremented appropriately. -+ -+.. c:macro:: NTSYNC_IOC_MUTEX_UNLOCK -+ -+ Release a mutex object. Takes a pointer to struct -+ :c:type:`ntsync_mutex_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``mutex`` -+ - Ignored. -+ * - ``owner`` -+ - Specifies the owner trying to release this mutex. -+ * - ``count`` -+ - On output, contains the previous recursion count. -+ -+ If ``owner`` is zero, the ioctl fails with ``EINVAL``. If ``owner`` -+ is not the current owner of the mutex, the ioctl fails with -+ ``EPERM``. -+ -+ The mutex's count will be decremented by one. If decrementing the -+ mutex's count causes it to become zero, the mutex is marked as -+ unowned and signaled, and eligible threads waiting on it will be -+ woken as appropriate. -+ -+.. c:macro:: NTSYNC_IOC_SET_EVENT -+ -+ Signal an event object. Takes a pointer to a 32-bit integer, which on -+ output contains the previous state of the event. -+ -+ Eligible threads will be woken, and auto-reset events will be -+ designaled appropriately. -+ -+.. c:macro:: NTSYNC_IOC_RESET_EVENT -+ -+ Designal an event object. Takes a pointer to a 32-bit integer, which -+ on output contains the previous state of the event. -+ -+.. c:macro:: NTSYNC_IOC_PULSE_EVENT -+ -+ Wake threads waiting on an event object while leaving it in an -+ unsignaled state. Takes a pointer to a 32-bit integer, which on -+ output contains the previous state of the event. -+ -+ A pulse operation can be thought of as a set followed by a reset, -+ performed as a single atomic operation. If two threads are waiting on -+ an auto-reset event which is pulsed, only one will be woken. If two -+ threads are waiting a manual-reset event which is pulsed, both will -+ be woken. However, in both cases, the event will be unsignaled -+ afterwards, and a simultaneous read operation will always report the -+ event as unsignaled. -+ -+.. c:macro:: NTSYNC_IOC_READ_SEM -+ -+ Read the current state of a semaphore object. Takes a pointer to -+ struct :c:type:`ntsync_sem_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``sem`` -+ - Ignored. -+ * - ``count`` -+ - On output, contains the current count of the semaphore. -+ * - ``max`` -+ - On output, contains the maximum count of the semaphore. -+ -+.. c:macro:: NTSYNC_IOC_READ_MUTEX -+ -+ Read the current state of a mutex object. Takes a pointer to struct -+ :c:type:`ntsync_mutex_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``mutex`` -+ - Ignored. -+ * - ``owner`` -+ - On output, contains the current owner of the mutex, or zero -+ if the mutex is not currently owned. -+ * - ``count`` -+ - On output, contains the current recursion count of the mutex. -+ -+ If the mutex is marked as abandoned, the function fails with -+ ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to -+ zero. -+ -+.. c:macro:: NTSYNC_IOC_READ_EVENT -+ -+ Read the current state of an event object. Takes a pointer to struct -+ :c:type:`ntsync_event_args`, which is used as follows: -+ -+ .. list-table:: -+ -+ * - ``event`` -+ - Ignored. -+ * - ``signaled`` -+ - On output, contains the current state of the event. -+ * - ``manual`` -+ - On output, contains 1 if the event is a manual-reset event, -+ and 0 otherwise. -+ -+.. c:macro:: NTSYNC_IOC_KILL_OWNER -+ -+ Mark a mutex as unowned and abandoned if it is owned by the given -+ owner. Takes an input-only pointer to a 32-bit integer denoting the -+ owner. If the owner is zero, the ioctl fails with ``EINVAL``. If the -+ owner does not own the mutex, the function fails with ``EPERM``. -+ -+ Eligible threads waiting on the mutex will be woken as appropriate -+ (and such waits will fail with ``EOWNERDEAD``, as described below). -+ -+.. c:macro:: NTSYNC_IOC_WAIT_ANY -+ -+ Poll on any of a list of objects, atomically acquiring at most one. -+ Takes a pointer to struct :c:type:`ntsync_wait_args`, which is -+ used as follows: -+ -+ .. list-table:: -+ -+ * - ``timeout`` -+ - Absolute timeout in nanoseconds. If ``NTSYNC_WAIT_REALTIME`` -+ is set, the timeout is measured against the REALTIME clock; -+ otherwise it is measured against the MONOTONIC clock. If the -+ timeout is equal to or earlier than the current time, the -+ function returns immediately without sleeping. If ``timeout`` -+ is U64_MAX, the function will sleep until an object is -+ signaled, and will not fail with ``ETIMEDOUT``. -+ * - ``objs`` -+ - Pointer to an array of ``count`` file descriptors -+ (specified as an integer so that the structure has the same -+ size regardless of architecture). If any object is -+ invalid, the function fails with ``EINVAL``. -+ * - ``count`` -+ - Number of objects specified in the ``objs`` array. -+ If greater than ``NTSYNC_MAX_WAIT_COUNT``, the function fails -+ with ``EINVAL``. -+ * - ``owner`` -+ - Mutex owner identifier. If any object in ``objs`` is a mutex, -+ the ioctl will attempt to acquire that mutex on behalf of -+ ``owner``. If ``owner`` is zero, the ioctl fails with -+ ``EINVAL``. -+ * - ``index`` -+ - On success, contains the index (into ``objs``) of the object -+ which was signaled. If ``alert`` was signaled instead, -+ this contains ``count``. -+ * - ``alert`` -+ - Optional event object file descriptor. If nonzero, this -+ specifies an "alert" event object which, if signaled, will -+ terminate the wait. If nonzero, the identifier must point to a -+ valid event. -+ * - ``flags`` -+ - Zero or more flags. Currently the only flag is -+ ``NTSYNC_WAIT_REALTIME``, which causes the timeout to be -+ measured against the REALTIME clock instead of MONOTONIC. -+ * - ``pad`` -+ - Unused, must be set to zero. -+ -+ This function attempts to acquire one of the given objects. If unable -+ to do so, it sleeps until an object becomes signaled, subsequently -+ acquiring it, or the timeout expires. In the latter case the ioctl -+ fails with ``ETIMEDOUT``. The function only acquires one object, even -+ if multiple objects are signaled. -+ -+ A semaphore is considered to be signaled if its count is nonzero, and -+ is acquired by decrementing its count by one. A mutex is considered -+ to be signaled if it is unowned or if its owner matches the ``owner`` -+ argument, and is acquired by incrementing its recursion count by one -+ and setting its owner to the ``owner`` argument. An auto-reset event -+ is acquired by designaling it; a manual-reset event is not affected -+ by acquisition. -+ -+ Acquisition is atomic and totally ordered with respect to other -+ operations on the same object. If two wait operations (with different -+ ``owner`` identifiers) are queued on the same mutex, only one is -+ signaled. If two wait operations are queued on the same semaphore, -+ and a value of one is posted to it, only one is signaled. -+ -+ If an abandoned mutex is acquired, the ioctl fails with -+ ``EOWNERDEAD``. Although this is a failure return, the function may -+ otherwise be considered successful. The mutex is marked as owned by -+ the given owner (with a recursion count of 1) and as no longer -+ abandoned, and ``index`` is still set to the index of the mutex. -+ -+ The ``alert`` argument is an "extra" event which can terminate the -+ wait, independently of all other objects. -+ -+ It is valid to pass the same object more than once, including by -+ passing the same event in the ``objs`` array and in ``alert``. If a -+ wakeup occurs due to that object being signaled, ``index`` is set to -+ the lowest index corresponding to that object. -+ -+ The function may fail with ``EINTR`` if a signal is received. -+ -+.. c:macro:: NTSYNC_IOC_WAIT_ALL -+ -+ Poll on a list of objects, atomically acquiring all of them. Takes a -+ pointer to struct :c:type:`ntsync_wait_args`, which is used -+ identically to ``NTSYNC_IOC_WAIT_ANY``, except that ``index`` is -+ always filled with zero on success if not woken via alert. -+ -+ This function attempts to simultaneously acquire all of the given -+ objects. If unable to do so, it sleeps until all objects become -+ simultaneously signaled, subsequently acquiring them, or the timeout -+ expires. In the latter case the ioctl fails with ``ETIMEDOUT`` and no -+ objects are modified. -+ -+ Objects may become signaled and subsequently designaled (through -+ acquisition by other threads) while this thread is sleeping. Only -+ once all objects are simultaneously signaled does the ioctl acquire -+ them and return. The entire acquisition is atomic and totally ordered -+ with respect to other operations on any of the given objects. -+ -+ If an abandoned mutex is acquired, the ioctl fails with -+ ``EOWNERDEAD``. Similarly to ``NTSYNC_IOC_WAIT_ANY``, all objects are -+ nevertheless marked as acquired. Note that if multiple mutex objects -+ are specified, there is no way to know which were marked as -+ abandoned. -+ -+ As with "any" waits, the ``alert`` argument is an "extra" event which -+ can terminate the wait. Critically, however, an "all" wait will -+ succeed if all members in ``objs`` are signaled, *or* if ``alert`` is -+ signaled. In the latter case ``index`` will be set to ``count``. As -+ with "any" waits, if both conditions are filled, the former takes -+ priority, and objects in ``objs`` will be acquired. -+ -+ Unlike ``NTSYNC_IOC_WAIT_ANY``, it is not valid to pass the same -+ object more than once, nor is it valid to pass the same object in -+ ``objs`` and in ``alert``. If this is attempted, the function fails -+ with ``EINVAL``. -diff --git a/MAINTAINERS b/MAINTAINERS -index 958e935449e5..b25b2a731512 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -15976,6 +15976,15 @@ T: git https://github.com/Paragon-Software-Group/linux-ntfs3.git - F: Documentation/filesystems/ntfs3.rst - F: fs/ntfs3/ - -+NTSYNC SYNCHRONIZATION PRIMITIVE DRIVER -+M: Elizabeth Figura -+L: wine-devel@winehq.org -+S: Supported -+F: Documentation/userspace-api/ntsync.rst -+F: drivers/misc/ntsync.c -+F: include/uapi/linux/ntsync.h -+F: tools/testing/selftests/drivers/ntsync/ -+ - NUBUS SUBSYSTEM - M: Finn Thain - L: linux-m68k@lists.linux-m68k.org -diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig -index faf983680040..2907b5c23368 100644 ---- a/drivers/misc/Kconfig -+++ b/drivers/misc/Kconfig -@@ -507,7 +507,6 @@ config OPEN_DICE - - config NTSYNC - tristate "NT synchronization primitive emulation" -- depends on BROKEN - help - This module provides kernel support for emulation of Windows NT - synchronization primitives. It is not a hardware driver. -diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c -index 3c2f743c58b0..87a24798a5c7 100644 ---- a/drivers/misc/ntsync.c -+++ b/drivers/misc/ntsync.c -@@ -6,11 +6,17 @@ - */ - - #include -+#include - #include - #include -+#include -+#include - #include - #include -+#include - #include -+#include -+#include - #include - #include - #include -@@ -19,6 +25,8 @@ - - enum ntsync_type { - NTSYNC_TYPE_SEM, -+ NTSYNC_TYPE_MUTEX, -+ NTSYNC_TYPE_EVENT, - }; - - /* -@@ -30,10 +38,13 @@ enum ntsync_type { - * - * Both rely on struct file for reference counting. Individual - * ntsync_obj objects take a reference to the device when created. -+ * Wait operations take a reference to each object being waited on for -+ * the duration of the wait. - */ - - struct ntsync_obj { - spinlock_t lock; -+ int dev_locked; - - enum ntsync_type type; - -@@ -46,13 +57,335 @@ struct ntsync_obj { - __u32 count; - __u32 max; - } sem; -+ struct { -+ __u32 count; -+ pid_t owner; -+ bool ownerdead; -+ } mutex; -+ struct { -+ bool manual; -+ bool signaled; -+ } event; - } u; -+ -+ /* -+ * any_waiters is protected by the object lock, but all_waiters is -+ * protected by the device wait_all_lock. -+ */ -+ struct list_head any_waiters; -+ struct list_head all_waiters; -+ -+ /* -+ * Hint describing how many tasks are queued on this object in a -+ * wait-all operation. -+ * -+ * Any time we do a wake, we may need to wake "all" waiters as well as -+ * "any" waiters. In order to atomically wake "all" waiters, we must -+ * lock all of the objects, and that means grabbing the wait_all_lock -+ * below (and, due to lock ordering rules, before locking this object). -+ * However, wait-all is a rare operation, and grabbing the wait-all -+ * lock for every wake would create unnecessary contention. -+ * Therefore we first check whether all_hint is zero, and, if it is, -+ * we skip trying to wake "all" waiters. -+ * -+ * Since wait requests must originate from user-space threads, we're -+ * limited here by PID_MAX_LIMIT, so there's no risk of overflow. -+ */ -+ atomic_t all_hint; -+}; -+ -+struct ntsync_q_entry { -+ struct list_head node; -+ struct ntsync_q *q; -+ struct ntsync_obj *obj; -+ __u32 index; -+}; -+ -+struct ntsync_q { -+ struct task_struct *task; -+ __u32 owner; -+ -+ /* -+ * Protected via atomic_try_cmpxchg(). Only the thread that wins the -+ * compare-and-swap may actually change object states and wake this -+ * task. -+ */ -+ atomic_t signaled; -+ -+ bool all; -+ bool ownerdead; -+ __u32 count; -+ struct ntsync_q_entry entries[]; - }; - - struct ntsync_device { -+ /* -+ * Wait-all operations must atomically grab all objects, and be totally -+ * ordered with respect to each other and wait-any operations. -+ * If one thread is trying to acquire several objects, another thread -+ * cannot touch the object at the same time. -+ * -+ * This device-wide lock is used to serialize wait-for-all -+ * operations, and operations on an object that is involved in a -+ * wait-for-all. -+ */ -+ struct mutex wait_all_lock; -+ - struct file *file; - }; - -+/* -+ * Single objects are locked using obj->lock. -+ * -+ * Multiple objects are 'locked' while holding dev->wait_all_lock. -+ * In this case however, individual objects are not locked by holding -+ * obj->lock, but by setting obj->dev_locked. -+ * -+ * This means that in order to lock a single object, the sequence is slightly -+ * more complicated than usual. Specifically it needs to check obj->dev_locked -+ * after acquiring obj->lock, if set, it needs to drop the lock and acquire -+ * dev->wait_all_lock in order to serialize against the multi-object operation. -+ */ -+ -+static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj) -+{ -+ lockdep_assert_held(&dev->wait_all_lock); -+ lockdep_assert(obj->dev == dev); -+ spin_lock(&obj->lock); -+ /* -+ * By setting obj->dev_locked inside obj->lock, it is ensured that -+ * anyone holding obj->lock must see the value. -+ */ -+ obj->dev_locked = 1; -+ spin_unlock(&obj->lock); -+} -+ -+static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj) -+{ -+ lockdep_assert_held(&dev->wait_all_lock); -+ lockdep_assert(obj->dev == dev); -+ spin_lock(&obj->lock); -+ obj->dev_locked = 0; -+ spin_unlock(&obj->lock); -+} -+ -+static void obj_lock(struct ntsync_obj *obj) -+{ -+ struct ntsync_device *dev = obj->dev; -+ -+ for (;;) { -+ spin_lock(&obj->lock); -+ if (likely(!obj->dev_locked)) -+ break; -+ -+ spin_unlock(&obj->lock); -+ mutex_lock(&dev->wait_all_lock); -+ spin_lock(&obj->lock); -+ /* -+ * obj->dev_locked should be set and released under the same -+ * wait_all_lock section, since we now own this lock, it should -+ * be clear. -+ */ -+ lockdep_assert(!obj->dev_locked); -+ spin_unlock(&obj->lock); -+ mutex_unlock(&dev->wait_all_lock); -+ } -+} -+ -+static void obj_unlock(struct ntsync_obj *obj) -+{ -+ spin_unlock(&obj->lock); -+} -+ -+static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj) -+{ -+ bool all; -+ -+ obj_lock(obj); -+ all = atomic_read(&obj->all_hint); -+ if (unlikely(all)) { -+ obj_unlock(obj); -+ mutex_lock(&dev->wait_all_lock); -+ dev_lock_obj(dev, obj); -+ } -+ -+ return all; -+} -+ -+static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all) -+{ -+ if (all) { -+ dev_unlock_obj(dev, obj); -+ mutex_unlock(&dev->wait_all_lock); -+ } else { -+ obj_unlock(obj); -+ } -+} -+ -+#define ntsync_assert_held(obj) \ -+ lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \ -+ ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \ -+ (obj)->dev_locked)) -+ -+static bool is_signaled(struct ntsync_obj *obj, __u32 owner) -+{ -+ ntsync_assert_held(obj); -+ -+ switch (obj->type) { -+ case NTSYNC_TYPE_SEM: -+ return !!obj->u.sem.count; -+ case NTSYNC_TYPE_MUTEX: -+ if (obj->u.mutex.owner && obj->u.mutex.owner != owner) -+ return false; -+ return obj->u.mutex.count < UINT_MAX; -+ case NTSYNC_TYPE_EVENT: -+ return obj->u.event.signaled; -+ } -+ -+ WARN(1, "bad object type %#x\n", obj->type); -+ return false; -+} -+ -+/* -+ * "locked_obj" is an optional pointer to an object which is already locked and -+ * should not be locked again. This is necessary so that changing an object's -+ * state and waking it can be a single atomic operation. -+ */ -+static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q, -+ struct ntsync_obj *locked_obj) -+{ -+ __u32 count = q->count; -+ bool can_wake = true; -+ int signaled = -1; -+ __u32 i; -+ -+ lockdep_assert_held(&dev->wait_all_lock); -+ if (locked_obj) -+ lockdep_assert(locked_obj->dev_locked); -+ -+ for (i = 0; i < count; i++) { -+ if (q->entries[i].obj != locked_obj) -+ dev_lock_obj(dev, q->entries[i].obj); -+ } -+ -+ for (i = 0; i < count; i++) { -+ if (!is_signaled(q->entries[i].obj, q->owner)) { -+ can_wake = false; -+ break; -+ } -+ } -+ -+ if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) { -+ for (i = 0; i < count; i++) { -+ struct ntsync_obj *obj = q->entries[i].obj; -+ -+ switch (obj->type) { -+ case NTSYNC_TYPE_SEM: -+ obj->u.sem.count--; -+ break; -+ case NTSYNC_TYPE_MUTEX: -+ if (obj->u.mutex.ownerdead) -+ q->ownerdead = true; -+ obj->u.mutex.ownerdead = false; -+ obj->u.mutex.count++; -+ obj->u.mutex.owner = q->owner; -+ break; -+ case NTSYNC_TYPE_EVENT: -+ if (!obj->u.event.manual) -+ obj->u.event.signaled = false; -+ break; -+ } -+ } -+ wake_up_process(q->task); -+ } -+ -+ for (i = 0; i < count; i++) { -+ if (q->entries[i].obj != locked_obj) -+ dev_unlock_obj(dev, q->entries[i].obj); -+ } -+} -+ -+static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj) -+{ -+ struct ntsync_q_entry *entry; -+ -+ lockdep_assert_held(&dev->wait_all_lock); -+ lockdep_assert(obj->dev_locked); -+ -+ list_for_each_entry(entry, &obj->all_waiters, node) -+ try_wake_all(dev, entry->q, obj); -+} -+ -+static void try_wake_any_sem(struct ntsync_obj *sem) -+{ -+ struct ntsync_q_entry *entry; -+ -+ ntsync_assert_held(sem); -+ lockdep_assert(sem->type == NTSYNC_TYPE_SEM); -+ -+ list_for_each_entry(entry, &sem->any_waiters, node) { -+ struct ntsync_q *q = entry->q; -+ int signaled = -1; -+ -+ if (!sem->u.sem.count) -+ break; -+ -+ if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { -+ sem->u.sem.count--; -+ wake_up_process(q->task); -+ } -+ } -+} -+ -+static void try_wake_any_mutex(struct ntsync_obj *mutex) -+{ -+ struct ntsync_q_entry *entry; -+ -+ ntsync_assert_held(mutex); -+ lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX); -+ -+ list_for_each_entry(entry, &mutex->any_waiters, node) { -+ struct ntsync_q *q = entry->q; -+ int signaled = -1; -+ -+ if (mutex->u.mutex.count == UINT_MAX) -+ break; -+ if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner) -+ continue; -+ -+ if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { -+ if (mutex->u.mutex.ownerdead) -+ q->ownerdead = true; -+ mutex->u.mutex.ownerdead = false; -+ mutex->u.mutex.count++; -+ mutex->u.mutex.owner = q->owner; -+ wake_up_process(q->task); -+ } -+ } -+} -+ -+static void try_wake_any_event(struct ntsync_obj *event) -+{ -+ struct ntsync_q_entry *entry; -+ -+ ntsync_assert_held(event); -+ lockdep_assert(event->type == NTSYNC_TYPE_EVENT); -+ -+ list_for_each_entry(entry, &event->any_waiters, node) { -+ struct ntsync_q *q = entry->q; -+ int signaled = -1; -+ -+ if (!event->u.event.signaled) -+ break; -+ -+ if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { -+ if (!event->u.event.manual) -+ event->u.event.signaled = false; -+ wake_up_process(q->task); -+ } -+ } -+} -+ - /* - * Actually change the semaphore state, returning -EOVERFLOW if it is made - * invalid. -@@ -61,7 +394,7 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count) - { - __u32 sum; - -- lockdep_assert_held(&sem->lock); -+ ntsync_assert_held(sem); - - if (check_add_overflow(sem->u.sem.count, count, &sum) || - sum > sem->u.sem.max) -@@ -73,9 +406,11 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count) - - static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp) - { -+ struct ntsync_device *dev = sem->dev; - __u32 __user *user_args = argp; - __u32 prev_count; - __u32 args; -+ bool all; - int ret; - - if (copy_from_user(&args, argp, sizeof(args))) -@@ -84,12 +419,17 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp) - if (sem->type != NTSYNC_TYPE_SEM) - return -EINVAL; - -- spin_lock(&sem->lock); -+ all = ntsync_lock_obj(dev, sem); - - prev_count = sem->u.sem.count; - ret = post_sem_state(sem, args); -+ if (!ret) { -+ if (all) -+ try_wake_all_obj(dev, sem); -+ try_wake_any_sem(sem); -+ } - -- spin_unlock(&sem->lock); -+ ntsync_unlock_obj(dev, sem, all); - - if (!ret && put_user(prev_count, user_args)) - ret = -EFAULT; -@@ -97,6 +437,226 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp) - return ret; - } - -+/* -+ * Actually change the mutex state, returning -EPERM if not the owner. -+ */ -+static int unlock_mutex_state(struct ntsync_obj *mutex, -+ const struct ntsync_mutex_args *args) -+{ -+ ntsync_assert_held(mutex); -+ -+ if (mutex->u.mutex.owner != args->owner) -+ return -EPERM; -+ -+ if (!--mutex->u.mutex.count) -+ mutex->u.mutex.owner = 0; -+ return 0; -+} -+ -+static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp) -+{ -+ struct ntsync_mutex_args __user *user_args = argp; -+ struct ntsync_device *dev = mutex->dev; -+ struct ntsync_mutex_args args; -+ __u32 prev_count; -+ bool all; -+ int ret; -+ -+ if (copy_from_user(&args, argp, sizeof(args))) -+ return -EFAULT; -+ if (!args.owner) -+ return -EINVAL; -+ -+ if (mutex->type != NTSYNC_TYPE_MUTEX) -+ return -EINVAL; -+ -+ all = ntsync_lock_obj(dev, mutex); -+ -+ prev_count = mutex->u.mutex.count; -+ ret = unlock_mutex_state(mutex, &args); -+ if (!ret) { -+ if (all) -+ try_wake_all_obj(dev, mutex); -+ try_wake_any_mutex(mutex); -+ } -+ -+ ntsync_unlock_obj(dev, mutex, all); -+ -+ if (!ret && put_user(prev_count, &user_args->count)) -+ ret = -EFAULT; -+ -+ return ret; -+} -+ -+/* -+ * Actually change the mutex state to mark its owner as dead, -+ * returning -EPERM if not the owner. -+ */ -+static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner) -+{ -+ ntsync_assert_held(mutex); -+ -+ if (mutex->u.mutex.owner != owner) -+ return -EPERM; -+ -+ mutex->u.mutex.ownerdead = true; -+ mutex->u.mutex.owner = 0; -+ mutex->u.mutex.count = 0; -+ return 0; -+} -+ -+static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp) -+{ -+ struct ntsync_device *dev = mutex->dev; -+ __u32 owner; -+ bool all; -+ int ret; -+ -+ if (get_user(owner, (__u32 __user *)argp)) -+ return -EFAULT; -+ if (!owner) -+ return -EINVAL; -+ -+ if (mutex->type != NTSYNC_TYPE_MUTEX) -+ return -EINVAL; -+ -+ all = ntsync_lock_obj(dev, mutex); -+ -+ ret = kill_mutex_state(mutex, owner); -+ if (!ret) { -+ if (all) -+ try_wake_all_obj(dev, mutex); -+ try_wake_any_mutex(mutex); -+ } -+ -+ ntsync_unlock_obj(dev, mutex, all); -+ -+ return ret; -+} -+ -+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse) -+{ -+ struct ntsync_device *dev = event->dev; -+ __u32 prev_state; -+ bool all; -+ -+ if (event->type != NTSYNC_TYPE_EVENT) -+ return -EINVAL; -+ -+ all = ntsync_lock_obj(dev, event); -+ -+ prev_state = event->u.event.signaled; -+ event->u.event.signaled = true; -+ if (all) -+ try_wake_all_obj(dev, event); -+ try_wake_any_event(event); -+ if (pulse) -+ event->u.event.signaled = false; -+ -+ ntsync_unlock_obj(dev, event, all); -+ -+ if (put_user(prev_state, (__u32 __user *)argp)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp) -+{ -+ struct ntsync_device *dev = event->dev; -+ __u32 prev_state; -+ bool all; -+ -+ if (event->type != NTSYNC_TYPE_EVENT) -+ return -EINVAL; -+ -+ all = ntsync_lock_obj(dev, event); -+ -+ prev_state = event->u.event.signaled; -+ event->u.event.signaled = false; -+ -+ ntsync_unlock_obj(dev, event, all); -+ -+ if (put_user(prev_state, (__u32 __user *)argp)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp) -+{ -+ struct ntsync_sem_args __user *user_args = argp; -+ struct ntsync_device *dev = sem->dev; -+ struct ntsync_sem_args args; -+ bool all; -+ -+ if (sem->type != NTSYNC_TYPE_SEM) -+ return -EINVAL; -+ -+ args.sem = 0; -+ -+ all = ntsync_lock_obj(dev, sem); -+ -+ args.count = sem->u.sem.count; -+ args.max = sem->u.sem.max; -+ -+ ntsync_unlock_obj(dev, sem, all); -+ -+ if (copy_to_user(user_args, &args, sizeof(args))) -+ return -EFAULT; -+ return 0; -+} -+ -+static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp) -+{ -+ struct ntsync_mutex_args __user *user_args = argp; -+ struct ntsync_device *dev = mutex->dev; -+ struct ntsync_mutex_args args; -+ bool all; -+ int ret; -+ -+ if (mutex->type != NTSYNC_TYPE_MUTEX) -+ return -EINVAL; -+ -+ args.mutex = 0; -+ -+ all = ntsync_lock_obj(dev, mutex); -+ -+ args.count = mutex->u.mutex.count; -+ args.owner = mutex->u.mutex.owner; -+ ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0; -+ -+ ntsync_unlock_obj(dev, mutex, all); -+ -+ if (copy_to_user(user_args, &args, sizeof(args))) -+ return -EFAULT; -+ return ret; -+} -+ -+static int ntsync_event_read(struct ntsync_obj *event, void __user *argp) -+{ -+ struct ntsync_event_args __user *user_args = argp; -+ struct ntsync_device *dev = event->dev; -+ struct ntsync_event_args args; -+ bool all; -+ -+ if (event->type != NTSYNC_TYPE_EVENT) -+ return -EINVAL; -+ -+ args.event = 0; -+ -+ all = ntsync_lock_obj(dev, event); -+ -+ args.manual = event->u.event.manual; -+ args.signaled = event->u.event.signaled; -+ -+ ntsync_unlock_obj(dev, event, all); -+ -+ if (copy_to_user(user_args, &args, sizeof(args))) -+ return -EFAULT; -+ return 0; -+} -+ - static int ntsync_obj_release(struct inode *inode, struct file *file) - { - struct ntsync_obj *obj = file->private_data; -@@ -116,6 +676,22 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, - switch (cmd) { - case NTSYNC_IOC_SEM_POST: - return ntsync_sem_post(obj, argp); -+ case NTSYNC_IOC_SEM_READ: -+ return ntsync_sem_read(obj, argp); -+ case NTSYNC_IOC_MUTEX_UNLOCK: -+ return ntsync_mutex_unlock(obj, argp); -+ case NTSYNC_IOC_MUTEX_KILL: -+ return ntsync_mutex_kill(obj, argp); -+ case NTSYNC_IOC_MUTEX_READ: -+ return ntsync_mutex_read(obj, argp); -+ case NTSYNC_IOC_EVENT_SET: -+ return ntsync_event_set(obj, argp, false); -+ case NTSYNC_IOC_EVENT_RESET: -+ return ntsync_event_reset(obj, argp); -+ case NTSYNC_IOC_EVENT_PULSE: -+ return ntsync_event_set(obj, argp, true); -+ case NTSYNC_IOC_EVENT_READ: -+ return ntsync_event_read(obj, argp); - default: - return -ENOIOCTLCMD; - } -@@ -141,6 +717,9 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev, - obj->dev = dev; - get_file(dev->file); - spin_lock_init(&obj->lock); -+ INIT_LIST_HEAD(&obj->any_waiters); -+ INIT_LIST_HEAD(&obj->all_waiters); -+ atomic_set(&obj->all_hint, 0); - - return obj; - } -@@ -191,6 +770,400 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp) - return put_user(fd, &user_args->sem); - } - -+static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp) -+{ -+ struct ntsync_mutex_args __user *user_args = argp; -+ struct ntsync_mutex_args args; -+ struct ntsync_obj *mutex; -+ int fd; -+ -+ if (copy_from_user(&args, argp, sizeof(args))) -+ return -EFAULT; -+ -+ if (!args.owner != !args.count) -+ return -EINVAL; -+ -+ mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX); -+ if (!mutex) -+ return -ENOMEM; -+ mutex->u.mutex.count = args.count; -+ mutex->u.mutex.owner = args.owner; -+ fd = ntsync_obj_get_fd(mutex); -+ if (fd < 0) { -+ kfree(mutex); -+ return fd; -+ } -+ -+ return put_user(fd, &user_args->mutex); -+} -+ -+static int ntsync_create_event(struct ntsync_device *dev, void __user *argp) -+{ -+ struct ntsync_event_args __user *user_args = argp; -+ struct ntsync_event_args args; -+ struct ntsync_obj *event; -+ int fd; -+ -+ if (copy_from_user(&args, argp, sizeof(args))) -+ return -EFAULT; -+ -+ event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT); -+ if (!event) -+ return -ENOMEM; -+ event->u.event.manual = args.manual; -+ event->u.event.signaled = args.signaled; -+ fd = ntsync_obj_get_fd(event); -+ if (fd < 0) { -+ kfree(event); -+ return fd; -+ } -+ -+ return put_user(fd, &user_args->event); -+} -+ -+static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd) -+{ -+ struct file *file = fget(fd); -+ struct ntsync_obj *obj; -+ -+ if (!file) -+ return NULL; -+ -+ if (file->f_op != &ntsync_obj_fops) { -+ fput(file); -+ return NULL; -+ } -+ -+ obj = file->private_data; -+ if (obj->dev != dev) { -+ fput(file); -+ return NULL; -+ } -+ -+ return obj; -+} -+ -+static void put_obj(struct ntsync_obj *obj) -+{ -+ fput(obj->file); -+} -+ -+static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args) -+{ -+ ktime_t timeout = ns_to_ktime(args->timeout); -+ clockid_t clock = CLOCK_MONOTONIC; -+ ktime_t *timeout_ptr; -+ int ret = 0; -+ -+ timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout); -+ -+ if (args->flags & NTSYNC_WAIT_REALTIME) -+ clock = CLOCK_REALTIME; -+ -+ do { -+ if (signal_pending(current)) { -+ ret = -ERESTARTSYS; -+ break; -+ } -+ -+ set_current_state(TASK_INTERRUPTIBLE); -+ if (atomic_read(&q->signaled) != -1) { -+ ret = 0; -+ break; -+ } -+ ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock); -+ } while (ret < 0); -+ __set_current_state(TASK_RUNNING); -+ -+ return ret; -+} -+ -+/* -+ * Allocate and initialize the ntsync_q structure, but do not queue us yet. -+ */ -+static int setup_wait(struct ntsync_device *dev, -+ const struct ntsync_wait_args *args, bool all, -+ struct ntsync_q **ret_q) -+{ -+ int fds[NTSYNC_MAX_WAIT_COUNT + 1]; -+ const __u32 count = args->count; -+ struct ntsync_q *q; -+ __u32 total_count; -+ __u32 i, j; -+ -+ if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME)) -+ return -EINVAL; -+ -+ if (args->count > NTSYNC_MAX_WAIT_COUNT) -+ return -EINVAL; -+ -+ total_count = count; -+ if (args->alert) -+ total_count++; -+ -+ if (copy_from_user(fds, u64_to_user_ptr(args->objs), -+ array_size(count, sizeof(*fds)))) -+ return -EFAULT; -+ if (args->alert) -+ fds[count] = args->alert; -+ -+ q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL); -+ if (!q) -+ return -ENOMEM; -+ q->task = current; -+ q->owner = args->owner; -+ atomic_set(&q->signaled, -1); -+ q->all = all; -+ q->ownerdead = false; -+ q->count = count; -+ -+ for (i = 0; i < total_count; i++) { -+ struct ntsync_q_entry *entry = &q->entries[i]; -+ struct ntsync_obj *obj = get_obj(dev, fds[i]); -+ -+ if (!obj) -+ goto err; -+ -+ if (all) { -+ /* Check that the objects are all distinct. */ -+ for (j = 0; j < i; j++) { -+ if (obj == q->entries[j].obj) { -+ put_obj(obj); -+ goto err; -+ } -+ } -+ } -+ -+ entry->obj = obj; -+ entry->q = q; -+ entry->index = i; -+ } -+ -+ *ret_q = q; -+ return 0; -+ -+err: -+ for (j = 0; j < i; j++) -+ put_obj(q->entries[j].obj); -+ kfree(q); -+ return -EINVAL; -+} -+ -+static void try_wake_any_obj(struct ntsync_obj *obj) -+{ -+ switch (obj->type) { -+ case NTSYNC_TYPE_SEM: -+ try_wake_any_sem(obj); -+ break; -+ case NTSYNC_TYPE_MUTEX: -+ try_wake_any_mutex(obj); -+ break; -+ case NTSYNC_TYPE_EVENT: -+ try_wake_any_event(obj); -+ break; -+ } -+} -+ -+static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) -+{ -+ struct ntsync_wait_args args; -+ __u32 i, total_count; -+ struct ntsync_q *q; -+ int signaled; -+ bool all; -+ int ret; -+ -+ if (copy_from_user(&args, argp, sizeof(args))) -+ return -EFAULT; -+ -+ ret = setup_wait(dev, &args, false, &q); -+ if (ret < 0) -+ return ret; -+ -+ total_count = args.count; -+ if (args.alert) -+ total_count++; -+ -+ /* queue ourselves */ -+ -+ for (i = 0; i < total_count; i++) { -+ struct ntsync_q_entry *entry = &q->entries[i]; -+ struct ntsync_obj *obj = entry->obj; -+ -+ all = ntsync_lock_obj(dev, obj); -+ list_add_tail(&entry->node, &obj->any_waiters); -+ ntsync_unlock_obj(dev, obj, all); -+ } -+ -+ /* -+ * Check if we are already signaled. -+ * -+ * Note that the API requires that normal objects are checked before -+ * the alert event. Hence we queue the alert event last, and check -+ * objects in order. -+ */ -+ -+ for (i = 0; i < total_count; i++) { -+ struct ntsync_obj *obj = q->entries[i].obj; -+ -+ if (atomic_read(&q->signaled) != -1) -+ break; -+ -+ all = ntsync_lock_obj(dev, obj); -+ try_wake_any_obj(obj); -+ ntsync_unlock_obj(dev, obj, all); -+ } -+ -+ /* sleep */ -+ -+ ret = ntsync_schedule(q, &args); -+ -+ /* and finally, unqueue */ -+ -+ for (i = 0; i < total_count; i++) { -+ struct ntsync_q_entry *entry = &q->entries[i]; -+ struct ntsync_obj *obj = entry->obj; -+ -+ all = ntsync_lock_obj(dev, obj); -+ list_del(&entry->node); -+ ntsync_unlock_obj(dev, obj, all); -+ -+ put_obj(obj); -+ } -+ -+ signaled = atomic_read(&q->signaled); -+ if (signaled != -1) { -+ struct ntsync_wait_args __user *user_args = argp; -+ -+ /* even if we caught a signal, we need to communicate success */ -+ ret = q->ownerdead ? -EOWNERDEAD : 0; -+ -+ if (put_user(signaled, &user_args->index)) -+ ret = -EFAULT; -+ } else if (!ret) { -+ ret = -ETIMEDOUT; -+ } -+ -+ kfree(q); -+ return ret; -+} -+ -+static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp) -+{ -+ struct ntsync_wait_args args; -+ struct ntsync_q *q; -+ int signaled; -+ __u32 i; -+ int ret; -+ -+ if (copy_from_user(&args, argp, sizeof(args))) -+ return -EFAULT; -+ -+ ret = setup_wait(dev, &args, true, &q); -+ if (ret < 0) -+ return ret; -+ -+ /* queue ourselves */ -+ -+ mutex_lock(&dev->wait_all_lock); -+ -+ for (i = 0; i < args.count; i++) { -+ struct ntsync_q_entry *entry = &q->entries[i]; -+ struct ntsync_obj *obj = entry->obj; -+ -+ atomic_inc(&obj->all_hint); -+ -+ /* -+ * obj->all_waiters is protected by dev->wait_all_lock rather -+ * than obj->lock, so there is no need to acquire obj->lock -+ * here. -+ */ -+ list_add_tail(&entry->node, &obj->all_waiters); -+ } -+ if (args.alert) { -+ struct ntsync_q_entry *entry = &q->entries[args.count]; -+ struct ntsync_obj *obj = entry->obj; -+ -+ dev_lock_obj(dev, obj); -+ list_add_tail(&entry->node, &obj->any_waiters); -+ dev_unlock_obj(dev, obj); -+ } -+ -+ /* check if we are already signaled */ -+ -+ try_wake_all(dev, q, NULL); -+ -+ mutex_unlock(&dev->wait_all_lock); -+ -+ /* -+ * Check if the alert event is signaled, making sure to do so only -+ * after checking if the other objects are signaled. -+ */ -+ -+ if (args.alert) { -+ struct ntsync_obj *obj = q->entries[args.count].obj; -+ -+ if (atomic_read(&q->signaled) == -1) { -+ bool all = ntsync_lock_obj(dev, obj); -+ try_wake_any_obj(obj); -+ ntsync_unlock_obj(dev, obj, all); -+ } -+ } -+ -+ /* sleep */ -+ -+ ret = ntsync_schedule(q, &args); -+ -+ /* and finally, unqueue */ -+ -+ mutex_lock(&dev->wait_all_lock); -+ -+ for (i = 0; i < args.count; i++) { -+ struct ntsync_q_entry *entry = &q->entries[i]; -+ struct ntsync_obj *obj = entry->obj; -+ -+ /* -+ * obj->all_waiters is protected by dev->wait_all_lock rather -+ * than obj->lock, so there is no need to acquire it here. -+ */ -+ list_del(&entry->node); -+ -+ atomic_dec(&obj->all_hint); -+ -+ put_obj(obj); -+ } -+ -+ mutex_unlock(&dev->wait_all_lock); -+ -+ if (args.alert) { -+ struct ntsync_q_entry *entry = &q->entries[args.count]; -+ struct ntsync_obj *obj = entry->obj; -+ bool all; -+ -+ all = ntsync_lock_obj(dev, obj); -+ list_del(&entry->node); -+ ntsync_unlock_obj(dev, obj, all); -+ -+ put_obj(obj); -+ } -+ -+ signaled = atomic_read(&q->signaled); -+ if (signaled != -1) { -+ struct ntsync_wait_args __user *user_args = argp; -+ -+ /* even if we caught a signal, we need to communicate success */ -+ ret = q->ownerdead ? -EOWNERDEAD : 0; -+ -+ if (put_user(signaled, &user_args->index)) -+ ret = -EFAULT; -+ } else if (!ret) { -+ ret = -ETIMEDOUT; -+ } -+ -+ kfree(q); -+ return ret; -+} -+ - static int ntsync_char_open(struct inode *inode, struct file *file) - { - struct ntsync_device *dev; -@@ -199,6 +1172,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file) - if (!dev) - return -ENOMEM; - -+ mutex_init(&dev->wait_all_lock); -+ - file->private_data = dev; - dev->file = file; - return nonseekable_open(inode, file); -@@ -220,8 +1195,16 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd, - void __user *argp = (void __user *)parm; - - switch (cmd) { -+ case NTSYNC_IOC_CREATE_EVENT: -+ return ntsync_create_event(dev, argp); -+ case NTSYNC_IOC_CREATE_MUTEX: -+ return ntsync_create_mutex(dev, argp); - case NTSYNC_IOC_CREATE_SEM: - return ntsync_create_sem(dev, argp); -+ case NTSYNC_IOC_WAIT_ALL: -+ return ntsync_wait_all(dev, argp); -+ case NTSYNC_IOC_WAIT_ANY: -+ return ntsync_wait_any(dev, argp); - default: - return -ENOIOCTLCMD; - } -diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h -index dcfa38fdc93c..4a8095a3fc34 100644 ---- a/include/uapi/linux/ntsync.h -+++ b/include/uapi/linux/ntsync.h -@@ -16,8 +16,47 @@ struct ntsync_sem_args { - __u32 max; - }; - -+struct ntsync_mutex_args { -+ __u32 mutex; -+ __u32 owner; -+ __u32 count; -+}; -+ -+struct ntsync_event_args { -+ __u32 event; -+ __u32 manual; -+ __u32 signaled; -+}; -+ -+#define NTSYNC_WAIT_REALTIME 0x1 -+ -+struct ntsync_wait_args { -+ __u64 timeout; -+ __u64 objs; -+ __u32 count; -+ __u32 index; -+ __u32 flags; -+ __u32 owner; -+ __u32 alert; -+ __u32 pad; -+}; -+ -+#define NTSYNC_MAX_WAIT_COUNT 64 -+ - #define NTSYNC_IOC_CREATE_SEM _IOWR('N', 0x80, struct ntsync_sem_args) -+#define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args) -+#define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args) -+#define NTSYNC_IOC_CREATE_MUTEX _IOWR('N', 0x84, struct ntsync_sem_args) -+#define NTSYNC_IOC_CREATE_EVENT _IOWR('N', 0x87, struct ntsync_event_args) - - #define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32) -+#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args) -+#define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32) -+#define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32) -+#define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32) -+#define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32) -+#define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args) -+#define NTSYNC_IOC_MUTEX_READ _IOR ('N', 0x8c, struct ntsync_mutex_args) -+#define NTSYNC_IOC_EVENT_READ _IOR ('N', 0x8d, struct ntsync_event_args) - - #endif -diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile -index 9039f3709aff..d5aeaa8fe3ca 100644 ---- a/tools/testing/selftests/Makefile -+++ b/tools/testing/selftests/Makefile -@@ -16,6 +16,7 @@ TARGETS += damon - TARGETS += devices - TARGETS += dmabuf-heaps - TARGETS += drivers/dma-buf -+TARGETS += drivers/ntsync - TARGETS += drivers/s390x/uvdevice - TARGETS += drivers/net - TARGETS += drivers/net/bonding -diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore -new file mode 100644 -index 000000000000..848573a3d3ea ---- /dev/null -+++ b/tools/testing/selftests/drivers/ntsync/.gitignore -@@ -0,0 +1 @@ -+ntsync -diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile -new file mode 100644 -index 000000000000..dbf2b055c0b2 ---- /dev/null -+++ b/tools/testing/selftests/drivers/ntsync/Makefile -@@ -0,0 +1,7 @@ -+# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only -+TEST_GEN_PROGS := ntsync -+ -+CFLAGS += $(KHDR_INCLUDES) -+LDLIBS += -lpthread -+ -+include ../../lib.mk -diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config -new file mode 100644 -index 000000000000..60539c826d06 ---- /dev/null -+++ b/tools/testing/selftests/drivers/ntsync/config -@@ -0,0 +1 @@ -+CONFIG_WINESYNC=y -diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c -new file mode 100644 -index 000000000000..5fa2c9a0768c ---- /dev/null -+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c -@@ -0,0 +1,1407 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Various unit tests for the "ntsync" synchronization primitive driver. -+ * -+ * Copyright (C) 2021-2022 Elizabeth Figura -+ */ -+ -+#define _GNU_SOURCE -+#include -+#include -+#include -+#include -+#include -+#include -+#include "../../kselftest_harness.h" -+ -+static int read_sem_state(int sem, __u32 *count, __u32 *max) -+{ -+ struct ntsync_sem_args args; -+ int ret; -+ -+ memset(&args, 0xcc, sizeof(args)); -+ ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args); -+ *count = args.count; -+ *max = args.max; -+ return ret; -+} -+ -+#define check_sem_state(sem, count, max) \ -+ ({ \ -+ __u32 __count, __max; \ -+ int ret = read_sem_state((sem), &__count, &__max); \ -+ EXPECT_EQ(0, ret); \ -+ EXPECT_EQ((count), __count); \ -+ EXPECT_EQ((max), __max); \ -+ }) -+ -+static int post_sem(int sem, __u32 *count) -+{ -+ return ioctl(sem, NTSYNC_IOC_SEM_POST, count); -+} -+ -+static int read_mutex_state(int mutex, __u32 *count, __u32 *owner) -+{ -+ struct ntsync_mutex_args args; -+ int ret; -+ -+ memset(&args, 0xcc, sizeof(args)); -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args); -+ *count = args.count; -+ *owner = args.owner; -+ return ret; -+} -+ -+#define check_mutex_state(mutex, count, owner) \ -+ ({ \ -+ __u32 __count, __owner; \ -+ int ret = read_mutex_state((mutex), &__count, &__owner); \ -+ EXPECT_EQ(0, ret); \ -+ EXPECT_EQ((count), __count); \ -+ EXPECT_EQ((owner), __owner); \ -+ }) -+ -+static int unlock_mutex(int mutex, __u32 owner, __u32 *count) -+{ -+ struct ntsync_mutex_args args; -+ int ret; -+ -+ args.owner = owner; -+ args.count = 0xdeadbeef; -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args); -+ *count = args.count; -+ return ret; -+} -+ -+static int read_event_state(int event, __u32 *signaled, __u32 *manual) -+{ -+ struct ntsync_event_args args; -+ int ret; -+ -+ memset(&args, 0xcc, sizeof(args)); -+ ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args); -+ *signaled = args.signaled; -+ *manual = args.manual; -+ return ret; -+} -+ -+#define check_event_state(event, signaled, manual) \ -+ ({ \ -+ __u32 __signaled, __manual; \ -+ int ret = read_event_state((event), &__signaled, &__manual); \ -+ EXPECT_EQ(0, ret); \ -+ EXPECT_EQ((signaled), __signaled); \ -+ EXPECT_EQ((manual), __manual); \ -+ }) -+ -+static int wait_objs(int fd, unsigned long request, __u32 count, -+ const int *objs, __u32 owner, int alert, __u32 *index) -+{ -+ struct ntsync_wait_args args = {0}; -+ struct timespec timeout; -+ int ret; -+ -+ clock_gettime(CLOCK_MONOTONIC, &timeout); -+ -+ args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec; -+ args.count = count; -+ args.objs = (uintptr_t)objs; -+ args.owner = owner; -+ args.index = 0xdeadbeef; -+ args.alert = alert; -+ ret = ioctl(fd, request, &args); -+ *index = args.index; -+ return ret; -+} -+ -+static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index) -+{ -+ return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index); -+} -+ -+static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index) -+{ -+ return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index); -+} -+ -+static int wait_any_alert(int fd, __u32 count, const int *objs, -+ __u32 owner, int alert, __u32 *index) -+{ -+ return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, -+ count, objs, owner, alert, index); -+} -+ -+static int wait_all_alert(int fd, __u32 count, const int *objs, -+ __u32 owner, int alert, __u32 *index) -+{ -+ return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, -+ count, objs, owner, alert, index); -+} -+ -+TEST(semaphore_state) -+{ -+ struct ntsync_sem_args sem_args; -+ struct timespec timeout; -+ __u32 count, index; -+ int fd, ret, sem; -+ -+ clock_gettime(CLOCK_MONOTONIC, &timeout); -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 3; -+ sem_args.max = 2; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ sem_args.count = 2; -+ sem_args.max = 2; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ sem = sem_args.sem; -+ check_sem_state(sem, 2, 2); -+ -+ count = 0; -+ ret = post_sem(sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, count); -+ check_sem_state(sem, 2, 2); -+ -+ count = 1; -+ ret = post_sem(sem, &count); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOVERFLOW, errno); -+ check_sem_state(sem, 2, 2); -+ -+ ret = wait_any(fd, 1, &sem, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem, 1, 2); -+ -+ ret = wait_any(fd, 1, &sem, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem, 0, 2); -+ -+ ret = wait_any(fd, 1, &sem, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ count = 3; -+ ret = post_sem(sem, &count); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOVERFLOW, errno); -+ check_sem_state(sem, 0, 2); -+ -+ count = 2; -+ ret = post_sem(sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ check_sem_state(sem, 2, 2); -+ -+ ret = wait_any(fd, 1, &sem, 123, &index); -+ EXPECT_EQ(0, ret); -+ ret = wait_any(fd, 1, &sem, 123, &index); -+ EXPECT_EQ(0, ret); -+ -+ count = 1; -+ ret = post_sem(sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ check_sem_state(sem, 1, 2); -+ -+ count = ~0u; -+ ret = post_sem(sem, &count); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOVERFLOW, errno); -+ check_sem_state(sem, 1, 2); -+ -+ close(sem); -+ -+ close(fd); -+} -+ -+TEST(mutex_state) -+{ -+ struct ntsync_mutex_args mutex_args; -+ __u32 owner, count, index; -+ struct timespec timeout; -+ int fd, ret, mutex; -+ -+ clock_gettime(CLOCK_MONOTONIC, &timeout); -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ mutex_args.owner = 123; -+ mutex_args.count = 0; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ mutex_args.owner = 0; -+ mutex_args.count = 2; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ mutex_args.owner = 123; -+ mutex_args.count = 2; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ mutex = mutex_args.mutex; -+ check_mutex_state(mutex, 2, 123); -+ -+ ret = unlock_mutex(mutex, 0, &count); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ ret = unlock_mutex(mutex, 456, &count); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EPERM, errno); -+ check_mutex_state(mutex, 2, 123); -+ -+ ret = unlock_mutex(mutex, 123, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, count); -+ check_mutex_state(mutex, 1, 123); -+ -+ ret = unlock_mutex(mutex, 123, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, count); -+ check_mutex_state(mutex, 0, 0); -+ -+ ret = unlock_mutex(mutex, 123, &count); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EPERM, errno); -+ -+ ret = wait_any(fd, 1, &mutex, 456, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_mutex_state(mutex, 1, 456); -+ -+ ret = wait_any(fd, 1, &mutex, 456, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_mutex_state(mutex, 2, 456); -+ -+ ret = unlock_mutex(mutex, 456, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, count); -+ check_mutex_state(mutex, 1, 456); -+ -+ ret = wait_any(fd, 1, &mutex, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ owner = 0; -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ owner = 123; -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EPERM, errno); -+ check_mutex_state(mutex, 1, 456); -+ -+ owner = 456; -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); -+ EXPECT_EQ(0, ret); -+ -+ memset(&mutex_args, 0xcc, sizeof(mutex_args)); -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ EXPECT_EQ(0, mutex_args.count); -+ EXPECT_EQ(0, mutex_args.owner); -+ -+ memset(&mutex_args, 0xcc, sizeof(mutex_args)); -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ EXPECT_EQ(0, mutex_args.count); -+ EXPECT_EQ(0, mutex_args.owner); -+ -+ ret = wait_any(fd, 1, &mutex, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ EXPECT_EQ(0, index); -+ check_mutex_state(mutex, 1, 123); -+ -+ owner = 123; -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); -+ EXPECT_EQ(0, ret); -+ -+ memset(&mutex_args, 0xcc, sizeof(mutex_args)); -+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ EXPECT_EQ(0, mutex_args.count); -+ EXPECT_EQ(0, mutex_args.owner); -+ -+ ret = wait_any(fd, 1, &mutex, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ EXPECT_EQ(0, index); -+ check_mutex_state(mutex, 1, 123); -+ -+ close(mutex); -+ -+ mutex_args.owner = 0; -+ mutex_args.count = 0; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ mutex = mutex_args.mutex; -+ check_mutex_state(mutex, 0, 0); -+ -+ ret = wait_any(fd, 1, &mutex, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_mutex_state(mutex, 1, 123); -+ -+ close(mutex); -+ -+ mutex_args.owner = 123; -+ mutex_args.count = ~0u; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ mutex = mutex_args.mutex; -+ check_mutex_state(mutex, ~0u, 123); -+ -+ ret = wait_any(fd, 1, &mutex, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ close(mutex); -+ -+ close(fd); -+} -+ -+TEST(manual_event_state) -+{ -+ struct ntsync_event_args event_args; -+ __u32 index, signaled; -+ int fd, event, ret; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ event_args.manual = 1; -+ event_args.signaled = 0; -+ event_args.event = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, event_args.event); -+ event = event_args.event; -+ check_event_state(event, 0, 1); -+ -+ signaled = 0xdeadbeef; -+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event, 1, 1); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ check_event_state(event, 1, 1); -+ -+ ret = wait_any(fd, 1, &event, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_event_state(event, 1, 1); -+ -+ signaled = 0xdeadbeef; -+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ check_event_state(event, 0, 1); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event, 0, 1); -+ -+ ret = wait_any(fd, 1, &event, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ check_event_state(event, 0, 1); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event, 0, 1); -+ -+ close(event); -+ -+ close(fd); -+} -+ -+TEST(auto_event_state) -+{ -+ struct ntsync_event_args event_args; -+ __u32 index, signaled; -+ int fd, event, ret; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ event_args.manual = 0; -+ event_args.signaled = 1; -+ event_args.event = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, event_args.event); -+ event = event_args.event; -+ -+ check_event_state(event, 1, 0); -+ -+ signaled = 0xdeadbeef; -+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ check_event_state(event, 1, 0); -+ -+ ret = wait_any(fd, 1, &event, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_event_state(event, 0, 0); -+ -+ signaled = 0xdeadbeef; -+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event, 0, 0); -+ -+ ret = wait_any(fd, 1, &event, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ check_event_state(event, 0, 0); -+ -+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event, 0, 0); -+ -+ close(event); -+ -+ close(fd); -+} -+ -+TEST(test_wait_any) -+{ -+ int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret; -+ struct ntsync_mutex_args mutex_args = {0}; -+ struct ntsync_sem_args sem_args = {0}; -+ __u32 owner, index, count, i; -+ struct timespec timeout; -+ -+ clock_gettime(CLOCK_MONOTONIC, &timeout); -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 2; -+ sem_args.max = 3; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ -+ mutex_args.owner = 0; -+ mutex_args.count = 0; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ -+ objs[0] = sem_args.sem; -+ objs[1] = mutex_args.mutex; -+ -+ ret = wait_any(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 1, 3); -+ check_mutex_state(mutex_args.mutex, 0, 0); -+ -+ ret = wait_any(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_mutex_state(mutex_args.mutex, 0, 0); -+ -+ ret = wait_any(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, index); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_mutex_state(mutex_args.mutex, 1, 123); -+ -+ count = 1; -+ ret = post_sem(sem_args.sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ -+ ret = wait_any(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_mutex_state(mutex_args.mutex, 1, 123); -+ -+ ret = wait_any(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, index); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_mutex_state(mutex_args.mutex, 2, 123); -+ -+ ret = wait_any(fd, 2, objs, 456, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ owner = 123; -+ ret = ioctl(mutex_args.mutex, NTSYNC_IOC_MUTEX_KILL, &owner); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_any(fd, 2, objs, 456, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ EXPECT_EQ(1, index); -+ -+ ret = wait_any(fd, 2, objs, 456, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, index); -+ -+ /* test waiting on the same object twice */ -+ count = 2; -+ ret = post_sem(sem_args.sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ -+ objs[0] = objs[1] = sem_args.sem; -+ ret = wait_any(fd, 2, objs, 456, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 1, 3); -+ -+ ret = wait_any(fd, 0, NULL, 456, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ for (i = 0; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i) -+ objs[i] = sem_args.sem; -+ -+ ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ ret = wait_any(fd, -1, objs, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ close(sem_args.sem); -+ close(mutex_args.mutex); -+ -+ close(fd); -+} -+ -+TEST(test_wait_all) -+{ -+ struct ntsync_event_args event_args = {0}; -+ struct ntsync_mutex_args mutex_args = {0}; -+ struct ntsync_sem_args sem_args = {0}; -+ __u32 owner, index, count; -+ int objs[2], fd, ret; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 2; -+ sem_args.max = 3; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ -+ mutex_args.owner = 0; -+ mutex_args.count = 0; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ -+ event_args.manual = true; -+ event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ objs[0] = sem_args.sem; -+ objs[1] = mutex_args.mutex; -+ -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 1, 3); -+ check_mutex_state(mutex_args.mutex, 1, 123); -+ -+ ret = wait_all(fd, 2, objs, 456, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ check_sem_state(sem_args.sem, 1, 3); -+ check_mutex_state(mutex_args.mutex, 1, 123); -+ -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_mutex_state(mutex_args.mutex, 2, 123); -+ -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_mutex_state(mutex_args.mutex, 2, 123); -+ -+ count = 3; -+ ret = post_sem(sem_args.sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 2, 3); -+ check_mutex_state(mutex_args.mutex, 3, 123); -+ -+ owner = 123; -+ ret = ioctl(mutex_args.mutex, NTSYNC_IOC_MUTEX_KILL, &owner); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EOWNERDEAD, errno); -+ check_sem_state(sem_args.sem, 1, 3); -+ check_mutex_state(mutex_args.mutex, 1, 123); -+ -+ objs[0] = sem_args.sem; -+ objs[1] = event_args.event; -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ check_sem_state(sem_args.sem, 0, 3); -+ check_event_state(event_args.event, 1, 1); -+ -+ /* test waiting on the same object twice */ -+ objs[0] = objs[1] = sem_args.sem; -+ ret = wait_all(fd, 2, objs, 123, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno); -+ -+ close(sem_args.sem); -+ close(mutex_args.mutex); -+ close(event_args.event); -+ -+ close(fd); -+} -+ -+struct wake_args { -+ int fd; -+ int obj; -+}; -+ -+struct wait_args { -+ int fd; -+ unsigned long request; -+ struct ntsync_wait_args *args; -+ int ret; -+ int err; -+}; -+ -+static void *wait_thread(void *arg) -+{ -+ struct wait_args *args = arg; -+ -+ args->ret = ioctl(args->fd, args->request, args->args); -+ args->err = errno; -+ return NULL; -+} -+ -+static __u64 get_abs_timeout(unsigned int ms) -+{ -+ struct timespec timeout; -+ clock_gettime(CLOCK_MONOTONIC, &timeout); -+ return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000); -+} -+ -+static int wait_for_thread(pthread_t thread, unsigned int ms) -+{ -+ struct timespec timeout; -+ -+ clock_gettime(CLOCK_REALTIME, &timeout); -+ timeout.tv_nsec += ms * 1000000; -+ timeout.tv_sec += (timeout.tv_nsec / 1000000000); -+ timeout.tv_nsec %= 1000000000; -+ return pthread_timedjoin_np(thread, NULL, &timeout); -+} -+ -+TEST(wake_any) -+{ -+ struct ntsync_event_args event_args = {0}; -+ struct ntsync_mutex_args mutex_args = {0}; -+ struct ntsync_wait_args wait_args = {0}; -+ struct ntsync_sem_args sem_args = {0}; -+ struct wait_args thread_args; -+ __u32 count, index, signaled; -+ int objs[2], fd, ret; -+ pthread_t thread; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 0; -+ sem_args.max = 3; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ -+ mutex_args.owner = 123; -+ mutex_args.count = 1; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ -+ objs[0] = sem_args.sem; -+ objs[1] = mutex_args.mutex; -+ -+ /* test waking the semaphore */ -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ wait_args.objs = (uintptr_t)objs; -+ wait_args.count = 2; -+ wait_args.owner = 456; -+ wait_args.index = 0xdeadbeef; -+ thread_args.fd = fd; -+ thread_args.args = &wait_args; -+ thread_args.request = NTSYNC_IOC_WAIT_ANY; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ count = 1; -+ ret = post_sem(sem_args.sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ check_sem_state(sem_args.sem, 0, 3); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(0, wait_args.index); -+ -+ /* test waking the mutex */ -+ -+ /* first grab it again for owner 123 */ -+ ret = wait_any(fd, 1, &mutex_args.mutex, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ wait_args.owner = 456; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = unlock_mutex(mutex_args.mutex, 123, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, count); -+ -+ ret = pthread_tryjoin_np(thread, NULL); -+ EXPECT_EQ(EBUSY, ret); -+ -+ ret = unlock_mutex(mutex_args.mutex, 123, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, mutex_args.count); -+ check_mutex_state(mutex_args.mutex, 1, 456); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(1, wait_args.index); -+ -+ /* test waking events */ -+ -+ event_args.manual = false; -+ event_args.signaled = false; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ objs[1] = event_args.event; -+ wait_args.timeout = get_abs_timeout(1000); -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event_args.event, 0, 0); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(1, wait_args.index); -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_PULSE, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event_args.event, 0, 0); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(1, wait_args.index); -+ -+ close(event_args.event); -+ -+ event_args.manual = true; -+ event_args.signaled = false; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ objs[1] = event_args.event; -+ wait_args.timeout = get_abs_timeout(1000); -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event_args.event, 1, 1); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(1, wait_args.index); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_PULSE, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ check_event_state(event_args.event, 0, 1); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(1, wait_args.index); -+ -+ close(event_args.event); -+ -+ /* delete an object while it's being waited on */ -+ -+ wait_args.timeout = get_abs_timeout(200); -+ wait_args.owner = 123; -+ objs[1] = mutex_args.mutex; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ close(sem_args.sem); -+ close(mutex_args.mutex); -+ -+ ret = wait_for_thread(thread, 200); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(-1, thread_args.ret); -+ EXPECT_EQ(ETIMEDOUT, thread_args.err); -+ -+ close(fd); -+} -+ -+TEST(wake_all) -+{ -+ struct ntsync_event_args manual_event_args = {0}; -+ struct ntsync_event_args auto_event_args = {0}; -+ struct ntsync_mutex_args mutex_args = {0}; -+ struct ntsync_wait_args wait_args = {0}; -+ struct ntsync_sem_args sem_args = {0}; -+ struct wait_args thread_args; -+ __u32 count, index, signaled; -+ int objs[4], fd, ret; -+ pthread_t thread; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 0; -+ sem_args.max = 3; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ -+ mutex_args.owner = 123; -+ mutex_args.count = 1; -+ mutex_args.mutex = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, mutex_args.mutex); -+ -+ manual_event_args.manual = true; -+ manual_event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args); -+ EXPECT_EQ(0, ret); -+ -+ auto_event_args.manual = false; -+ auto_event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args); -+ EXPECT_EQ(0, ret); -+ -+ objs[0] = sem_args.sem; -+ objs[1] = mutex_args.mutex; -+ objs[2] = manual_event_args.event; -+ objs[3] = auto_event_args.event; -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ wait_args.objs = (uintptr_t)objs; -+ wait_args.count = 4; -+ wait_args.owner = 456; -+ thread_args.fd = fd; -+ thread_args.args = &wait_args; -+ thread_args.request = NTSYNC_IOC_WAIT_ALL; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ count = 1; -+ ret = post_sem(sem_args.sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ -+ ret = pthread_tryjoin_np(thread, NULL); -+ EXPECT_EQ(EBUSY, ret); -+ -+ check_sem_state(sem_args.sem, 1, 3); -+ -+ ret = wait_any(fd, 1, &sem_args.sem, 123, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ ret = unlock_mutex(mutex_args.mutex, 123, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, count); -+ -+ ret = pthread_tryjoin_np(thread, NULL); -+ EXPECT_EQ(EBUSY, ret); -+ -+ check_mutex_state(mutex_args.mutex, 0, 0); -+ -+ ret = ioctl(manual_event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ -+ count = 2; -+ ret = post_sem(sem_args.sem, &count); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, count); -+ check_sem_state(sem_args.sem, 2, 3); -+ -+ ret = ioctl(auto_event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, signaled); -+ -+ ret = ioctl(manual_event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ -+ ret = ioctl(auto_event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, signaled); -+ -+ check_sem_state(sem_args.sem, 1, 3); -+ check_mutex_state(mutex_args.mutex, 1, 456); -+ check_event_state(manual_event_args.event, 1, 1); -+ check_event_state(auto_event_args.event, 0, 0); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ -+ /* delete an object while it's being waited on */ -+ -+ wait_args.timeout = get_abs_timeout(200); -+ wait_args.owner = 123; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ close(sem_args.sem); -+ close(mutex_args.mutex); -+ close(manual_event_args.event); -+ close(auto_event_args.event); -+ -+ ret = wait_for_thread(thread, 200); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(-1, thread_args.ret); -+ EXPECT_EQ(ETIMEDOUT, thread_args.err); -+ -+ close(fd); -+} -+ -+TEST(alert_any) -+{ -+ struct ntsync_event_args event_args = {0}; -+ struct ntsync_wait_args wait_args = {0}; -+ struct ntsync_sem_args sem_args = {0}; -+ __u32 index, count, signaled; -+ struct wait_args thread_args; -+ int objs[2], fd, ret; -+ pthread_t thread; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 0; -+ sem_args.max = 2; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ objs[0] = sem_args.sem; -+ -+ sem_args.count = 1; -+ sem_args.max = 2; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ objs[1] = sem_args.sem; -+ -+ event_args.manual = true; -+ event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(1, index); -+ -+ ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, index); -+ -+ /* test wakeup via alert */ -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ wait_args.objs = (uintptr_t)objs; -+ wait_args.count = 2; -+ wait_args.owner = 123; -+ wait_args.index = 0xdeadbeef; -+ wait_args.alert = event_args.event; -+ thread_args.fd = fd; -+ thread_args.args = &wait_args; -+ thread_args.request = NTSYNC_IOC_WAIT_ANY; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(2, wait_args.index); -+ -+ close(event_args.event); -+ -+ /* test with an auto-reset event */ -+ -+ event_args.manual = false; -+ event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ count = 1; -+ ret = post_sem(objs[0], &count); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, index); -+ -+ ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ close(event_args.event); -+ -+ close(objs[0]); -+ close(objs[1]); -+ -+ close(fd); -+} -+ -+TEST(alert_all) -+{ -+ struct ntsync_event_args event_args = {0}; -+ struct ntsync_wait_args wait_args = {0}; -+ struct ntsync_sem_args sem_args = {0}; -+ struct wait_args thread_args; -+ __u32 index, count, signaled; -+ int objs[2], fd, ret; -+ pthread_t thread; -+ -+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, fd); -+ -+ sem_args.count = 2; -+ sem_args.max = 2; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ objs[0] = sem_args.sem; -+ -+ sem_args.count = 1; -+ sem_args.max = 2; -+ sem_args.sem = 0xdeadbeef; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); -+ EXPECT_EQ(0, ret); -+ EXPECT_NE(0xdeadbeef, sem_args.sem); -+ objs[1] = sem_args.sem; -+ -+ event_args.manual = true; -+ event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, index); -+ -+ /* test wakeup via alert */ -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ wait_args.timeout = get_abs_timeout(1000); -+ wait_args.objs = (uintptr_t)objs; -+ wait_args.count = 2; -+ wait_args.owner = 123; -+ wait_args.index = 0xdeadbeef; -+ wait_args.alert = event_args.event; -+ thread_args.fd = fd; -+ thread_args.args = &wait_args; -+ thread_args.request = NTSYNC_IOC_WAIT_ALL; -+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(ETIMEDOUT, ret); -+ -+ ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_for_thread(thread, 100); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, thread_args.ret); -+ EXPECT_EQ(2, wait_args.index); -+ -+ close(event_args.event); -+ -+ /* test with an auto-reset event */ -+ -+ event_args.manual = false; -+ event_args.signaled = true; -+ ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ -+ count = 2; -+ ret = post_sem(objs[1], &count); -+ EXPECT_EQ(0, ret); -+ -+ ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(0, index); -+ -+ ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(0, ret); -+ EXPECT_EQ(2, index); -+ -+ ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(ETIMEDOUT, errno); -+ -+ close(event_args.event); -+ -+ close(objs[0]); -+ close(objs[1]); -+ -+ close(fd); -+} -+ -+#define STRESS_LOOPS 10000 -+#define STRESS_THREADS 4 -+ -+static unsigned int stress_counter; -+static int stress_device, stress_start_event, stress_mutex; -+ -+static void *stress_thread(void *arg) -+{ -+ struct ntsync_wait_args wait_args = {0}; -+ __u32 index, count, i; -+ int ret; -+ -+ wait_args.timeout = UINT64_MAX; -+ wait_args.count = 1; -+ wait_args.objs = (uintptr_t)&stress_start_event; -+ wait_args.owner = gettid(); -+ wait_args.index = 0xdeadbeef; -+ -+ ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args); -+ -+ wait_args.objs = (uintptr_t)&stress_mutex; -+ -+ for (i = 0; i < STRESS_LOOPS; ++i) { -+ ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args); -+ -+ ++stress_counter; -+ -+ unlock_mutex(stress_mutex, wait_args.owner, &count); -+ } -+ -+ return NULL; -+} -+ -+TEST(stress_wait) -+{ -+ struct ntsync_event_args event_args; -+ struct ntsync_mutex_args mutex_args; -+ pthread_t threads[STRESS_THREADS]; -+ __u32 signaled, i; -+ int ret; -+ -+ stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); -+ ASSERT_LE(0, stress_device); -+ -+ mutex_args.owner = 0; -+ mutex_args.count = 0; -+ ret = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); -+ EXPECT_EQ(0, ret); -+ stress_mutex = mutex_args.mutex; -+ -+ event_args.manual = 1; -+ event_args.signaled = 0; -+ ret = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args); -+ EXPECT_EQ(0, ret); -+ stress_start_event = event_args.event; -+ -+ for (i = 0; i < STRESS_THREADS; ++i) -+ pthread_create(&threads[i], NULL, stress_thread, NULL); -+ -+ ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled); -+ EXPECT_EQ(0, ret); -+ -+ for (i = 0; i < STRESS_THREADS; ++i) { -+ ret = pthread_join(threads[i], NULL); -+ EXPECT_EQ(0, ret); -+ } -+ -+ EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter); -+ -+ close(stress_start_event); -+ close(stress_mutex); -+ close(stress_device); -+} -+ -+TEST_HARNESS_MAIN --- -2.46.0.rc1 - -From 628f7cedd3a6dbd0c2b09bc027cc62e889ccdd57 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:26:57 +0200 -Subject: [PATCH 09/11] perf-per-core - -Signed-off-by: Peter Jung ---- - Documentation/arch/x86/topology.rst | 4 + - arch/x86/events/rapl.c | 418 ++++++++++++++++++-------- - arch/x86/include/asm/processor.h | 1 + - arch/x86/include/asm/topology.h | 1 + - arch/x86/kernel/cpu/debugfs.c | 1 + - arch/x86/kernel/cpu/topology_common.c | 1 + - 6 files changed, 305 insertions(+), 121 deletions(-) - -diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst -index 7352ab89a55a..c12837e61bda 100644 ---- a/Documentation/arch/x86/topology.rst -+++ b/Documentation/arch/x86/topology.rst -@@ -135,6 +135,10 @@ Thread-related topology information in the kernel: - The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo - "core_id." - -+ - topology_logical_core_id(); -+ -+ The logical core ID to which a thread belongs. -+ - - - System topology examples -diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c -index 0c5e7a7c43ac..cd808b699ccc 100644 ---- a/arch/x86/events/rapl.c -+++ b/arch/x86/events/rapl.c -@@ -39,6 +39,10 @@ - * event: rapl_energy_psys - * perf code: 0x5 - * -+ * per_core counter: consumption of a single physical core -+ * event: rapl_energy_per_core (power_per_core PMU) -+ * perf code: 0x1 -+ * - * We manage those counters as free running (read-only). They may be - * use simultaneously by other tools, such as turbostat. - * -@@ -70,18 +74,25 @@ MODULE_LICENSE("GPL"); - /* - * RAPL energy status counters - */ --enum perf_rapl_events { -+enum perf_rapl_pkg_events { - PERF_RAPL_PP0 = 0, /* all cores */ - PERF_RAPL_PKG, /* entire package */ - PERF_RAPL_RAM, /* DRAM */ - PERF_RAPL_PP1, /* gpu */ - PERF_RAPL_PSYS, /* psys */ - -- PERF_RAPL_MAX, -- NR_RAPL_DOMAINS = PERF_RAPL_MAX, -+ PERF_RAPL_PKG_EVENTS_MAX, -+ NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX, -+}; -+ -+enum perf_rapl_core_events { -+ PERF_RAPL_PER_CORE = 0, /* per-core */ -+ -+ PERF_RAPL_CORE_EVENTS_MAX, -+ NR_RAPL_CORE_DOMAINS = PERF_RAPL_CORE_EVENTS_MAX, - }; - --static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { -+static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = { - "pp0-core", - "package", - "dram", -@@ -89,6 +100,10 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { - "psys", - }; - -+static const char *const rapl_core_domain_names[NR_RAPL_CORE_DOMAINS] __initconst = { -+ "per-core", -+}; -+ - /* - * event code: LSB 8 bits, passed in attr->config - * any other bit is reserved -@@ -103,6 +118,10 @@ static struct perf_pmu_events_attr event_attr_##v = { \ - .event_str = str, \ - }; - -+#define rapl_pmu_is_pkg_scope() \ -+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ -+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) -+ - struct rapl_pmu { - raw_spinlock_t lock; - int n_active; -@@ -115,8 +134,9 @@ struct rapl_pmu { - - struct rapl_pmus { - struct pmu pmu; -+ cpumask_t cpumask; - unsigned int nr_rapl_pmu; -- struct rapl_pmu *pmus[] __counted_by(nr_rapl_pmu); -+ struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu); - }; - - enum rapl_unit_quirk { -@@ -126,29 +146,45 @@ enum rapl_unit_quirk { - }; - - struct rapl_model { -- struct perf_msr *rapl_msrs; -- unsigned long events; -+ struct perf_msr *rapl_pkg_msrs; -+ struct perf_msr *rapl_core_msrs; -+ unsigned long pkg_events; -+ unsigned long core_events; - unsigned int msr_power_unit; - enum rapl_unit_quirk unit_quirk; - }; - - /* 1/2^hw_unit Joule */ --static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; --static struct rapl_pmus *rapl_pmus; --static cpumask_t rapl_cpu_mask; --static unsigned int rapl_cntr_mask; -+static int rapl_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly; -+static struct rapl_pmus *rapl_pmus_pkg; -+static struct rapl_pmus *rapl_pmus_core; -+static unsigned int rapl_pkg_cntr_mask; -+static unsigned int rapl_core_cntr_mask; - static u64 rapl_timer_ms; --static struct perf_msr *rapl_msrs; -+static struct rapl_model *rapl_model; -+ -+static inline unsigned int get_rapl_pmu_idx(int cpu) -+{ -+ return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) : -+ topology_logical_die_id(cpu); -+} -+ -+static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu) -+{ -+ return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) : -+ topology_die_cpumask(cpu); -+} - - static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) - { -- unsigned int rapl_pmu_idx = topology_logical_die_id(cpu); -+ unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu); - - /* - * The unsigned check also catches the '-1' return value for non - * existent mappings in the topology map. - */ -- return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL; -+ return rapl_pmu_idx < rapl_pmus_pkg->nr_rapl_pmu ? -+ rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx] : NULL; - } - - static inline u64 rapl_read_counter(struct perf_event *event) -@@ -160,7 +196,7 @@ static inline u64 rapl_read_counter(struct perf_event *event) - - static inline u64 rapl_scale(u64 v, int cfg) - { -- if (cfg > NR_RAPL_DOMAINS) { -+ if (cfg > NR_RAPL_PKG_DOMAINS) { - pr_warn("Invalid domain %d, failed to scale data\n", cfg); - return v; - } -@@ -212,34 +248,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) - - static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) - { -- struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); -+ struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); - struct perf_event *event; - unsigned long flags; - -- if (!pmu->n_active) -+ if (!rapl_pmu->n_active) - return HRTIMER_NORESTART; - -- raw_spin_lock_irqsave(&pmu->lock, flags); -+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags); - -- list_for_each_entry(event, &pmu->active_list, active_entry) -+ list_for_each_entry(event, &rapl_pmu->active_list, active_entry) - rapl_event_update(event); - -- raw_spin_unlock_irqrestore(&pmu->lock, flags); -+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); - -- hrtimer_forward_now(hrtimer, pmu->timer_interval); -+ hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval); - - return HRTIMER_RESTART; - } - --static void rapl_hrtimer_init(struct rapl_pmu *pmu) -+static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu) - { -- struct hrtimer *hr = &pmu->hrtimer; -+ struct hrtimer *hr = &rapl_pmu->hrtimer; - - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hr->function = rapl_hrtimer_handle; - } - --static void __rapl_pmu_event_start(struct rapl_pmu *pmu, -+static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu, - struct perf_event *event) - { - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) -@@ -247,39 +283,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, - - event->hw.state = 0; - -- list_add_tail(&event->active_entry, &pmu->active_list); -+ list_add_tail(&event->active_entry, &rapl_pmu->active_list); - - local64_set(&event->hw.prev_count, rapl_read_counter(event)); - -- pmu->n_active++; -- if (pmu->n_active == 1) -- rapl_start_hrtimer(pmu); -+ rapl_pmu->n_active++; -+ if (rapl_pmu->n_active == 1) -+ rapl_start_hrtimer(rapl_pmu); - } - - static void rapl_pmu_event_start(struct perf_event *event, int mode) - { -- struct rapl_pmu *pmu = event->pmu_private; -+ struct rapl_pmu *rapl_pmu = event->pmu_private; - unsigned long flags; - -- raw_spin_lock_irqsave(&pmu->lock, flags); -- __rapl_pmu_event_start(pmu, event); -- raw_spin_unlock_irqrestore(&pmu->lock, flags); -+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags); -+ __rapl_pmu_event_start(rapl_pmu, event); -+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); - } - - static void rapl_pmu_event_stop(struct perf_event *event, int mode) - { -- struct rapl_pmu *pmu = event->pmu_private; -+ struct rapl_pmu *rapl_pmu = event->pmu_private; - struct hw_perf_event *hwc = &event->hw; - unsigned long flags; - -- raw_spin_lock_irqsave(&pmu->lock, flags); -+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags); - - /* mark event as deactivated and stopped */ - if (!(hwc->state & PERF_HES_STOPPED)) { -- WARN_ON_ONCE(pmu->n_active <= 0); -- pmu->n_active--; -- if (pmu->n_active == 0) -- hrtimer_cancel(&pmu->hrtimer); -+ WARN_ON_ONCE(rapl_pmu->n_active <= 0); -+ rapl_pmu->n_active--; -+ if (rapl_pmu->n_active == 0) -+ hrtimer_cancel(&rapl_pmu->hrtimer); - - list_del(&event->active_entry); - -@@ -297,23 +333,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) - hwc->state |= PERF_HES_UPTODATE; - } - -- raw_spin_unlock_irqrestore(&pmu->lock, flags); -+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); - } - - static int rapl_pmu_event_add(struct perf_event *event, int mode) - { -- struct rapl_pmu *pmu = event->pmu_private; -+ struct rapl_pmu *rapl_pmu = event->pmu_private; - struct hw_perf_event *hwc = &event->hw; - unsigned long flags; - -- raw_spin_lock_irqsave(&pmu->lock, flags); -+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags); - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - if (mode & PERF_EF_START) -- __rapl_pmu_event_start(pmu, event); -+ __rapl_pmu_event_start(rapl_pmu, event); - -- raw_spin_unlock_irqrestore(&pmu->lock, flags); -+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); - - return 0; - } -@@ -327,10 +363,14 @@ static int rapl_pmu_event_init(struct perf_event *event) - { - u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, ret = 0; -- struct rapl_pmu *pmu; -+ struct rapl_pmu *rapl_pmu; -+ struct rapl_pmus *curr_rapl_pmus; - - /* only look at RAPL events */ -- if (event->attr.type != rapl_pmus->pmu.type) -+ if (event->attr.type == rapl_pmus_pkg->pmu.type || -+ (rapl_pmus_core && event->attr.type == rapl_pmus_core->pmu.type)) -+ curr_rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu); -+ else - return -ENOENT; - - /* check only supported bits are set */ -@@ -340,16 +380,18 @@ static int rapl_pmu_event_init(struct perf_event *event) - if (event->cpu < 0) - return -EINVAL; - -- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; -+ if (curr_rapl_pmus == rapl_pmus_pkg) -+ event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - -- if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) -+ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) - return -EINVAL; - -- cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); -+ cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); - bit = cfg - 1; - - /* check event supported */ -- if (!(rapl_cntr_mask & (1 << bit))) -+ if (!(rapl_pkg_cntr_mask & (1 << bit)) && -+ !(rapl_core_cntr_mask & (1 << bit))) - return -EINVAL; - - /* unsupported modes and filters */ -@@ -357,12 +399,18 @@ static int rapl_pmu_event_init(struct perf_event *event) - return -EINVAL; - - /* must be done before validate_group */ -- pmu = cpu_to_rapl_pmu(event->cpu); -- if (!pmu) -+ if (curr_rapl_pmus == rapl_pmus_core) { -+ rapl_pmu = curr_rapl_pmus->rapl_pmu[topology_logical_core_id(event->cpu)]; -+ event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr; -+ } else { -+ rapl_pmu = curr_rapl_pmus->rapl_pmu[get_rapl_pmu_idx(event->cpu)]; -+ event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; -+ } -+ -+ if (!rapl_pmu) - return -EINVAL; -- event->cpu = pmu->cpu; -- event->pmu_private = pmu; -- event->hw.event_base = rapl_msrs[bit].msr; -+ event->cpu = rapl_pmu->cpu; -+ event->pmu_private = rapl_pmu; - event->hw.config = cfg; - event->hw.idx = bit; - -@@ -377,7 +425,7 @@ static void rapl_pmu_event_read(struct perf_event *event) - static ssize_t rapl_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, char *buf) - { -- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); -+ return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_pkg->cpumask); - } - - static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); -@@ -391,17 +439,38 @@ static struct attribute_group rapl_pmu_attr_group = { - .attrs = rapl_pmu_attrs, - }; - -+static ssize_t rapl_get_attr_per_core_cpumask(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_core->cpumask); -+} -+ -+static struct device_attribute dev_attr_per_core_cpumask = __ATTR(cpumask, 0444, -+ rapl_get_attr_per_core_cpumask, -+ NULL); -+ -+static struct attribute *rapl_pmu_per_core_attrs[] = { -+ &dev_attr_per_core_cpumask.attr, -+ NULL, -+}; -+ -+static struct attribute_group rapl_pmu_per_core_attr_group = { -+ .attrs = rapl_pmu_per_core_attrs, -+}; -+ - RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); - RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); - RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); - RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); - RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); -+RAPL_EVENT_ATTR_STR(energy-per-core, rapl_per_core, "event=0x01"); - - RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); - RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); - RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); - RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); - RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); -+RAPL_EVENT_ATTR_STR(energy-per-core.unit, rapl_per_core_unit, "Joules"); - - /* - * we compute in 0.23 nJ increments regardless of MSR -@@ -411,6 +480,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 - RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); - RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); - RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); -+RAPL_EVENT_ATTR_STR(energy-per-core.scale, rapl_per_core_scale, "2.3283064365386962890625e-10"); - - /* - * There are no default events, but we need to create -@@ -444,6 +514,13 @@ static const struct attribute_group *rapl_attr_groups[] = { - NULL, - }; - -+static const struct attribute_group *rapl_per_core_attr_groups[] = { -+ &rapl_pmu_per_core_attr_group, -+ &rapl_pmu_format_group, -+ &rapl_pmu_events_group, -+ NULL, -+}; -+ - static struct attribute *rapl_events_cores[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_cores_unit), -@@ -504,6 +581,18 @@ static struct attribute_group rapl_events_psys_group = { - .attrs = rapl_events_psys, - }; - -+static struct attribute *rapl_events_per_core[] = { -+ EVENT_PTR(rapl_per_core), -+ EVENT_PTR(rapl_per_core_unit), -+ EVENT_PTR(rapl_per_core_scale), -+ NULL, -+}; -+ -+static struct attribute_group rapl_events_per_core_group = { -+ .name = "events", -+ .attrs = rapl_events_per_core, -+}; -+ - static bool test_msr(int idx, void *data) - { - return test_bit(idx, (unsigned long *) data); -@@ -529,11 +618,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { - }; - - /* -- * Force to PERF_RAPL_MAX size due to: -- * - perf_msr_probe(PERF_RAPL_MAX) -+ * Force to PERF_RAPL_PKG_EVENTS_MAX size due to: -+ * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX) - * - want to use same event codes across both architectures - */ --static struct perf_msr amd_rapl_msrs[] = { -+static struct perf_msr amd_rapl_pkg_msrs[] = { - [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, - [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, - [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, -@@ -541,72 +630,104 @@ static struct perf_msr amd_rapl_msrs[] = { - [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, - }; - --static int rapl_cpu_offline(unsigned int cpu) -+static struct perf_msr amd_rapl_core_msrs[] = { -+ [PERF_RAPL_PER_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_per_core_group, -+ test_msr, false, RAPL_MSR_MASK }, -+}; -+ -+static int __rapl_cpu_offline(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx, -+ const struct cpumask *event_cpumask, unsigned int cpu) - { -- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); -+ struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; - int target; - - /* Check if exiting cpu is used for collecting rapl events */ -- if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) -+ if (!cpumask_test_and_clear_cpu(cpu, &rapl_pmus->cpumask)) - return 0; - -- pmu->cpu = -1; -+ rapl_pmu->cpu = -1; - /* Find a new cpu to collect rapl events */ -- target = cpumask_any_but(topology_die_cpumask(cpu), cpu); -+ target = cpumask_any_but(event_cpumask, cpu); - - /* Migrate rapl events to the new target */ - if (target < nr_cpu_ids) { -- cpumask_set_cpu(target, &rapl_cpu_mask); -- pmu->cpu = target; -- perf_pmu_migrate_context(pmu->pmu, cpu, target); -+ cpumask_set_cpu(target, &rapl_pmus->cpumask); -+ rapl_pmu->cpu = target; -+ perf_pmu_migrate_context(rapl_pmu->pmu, cpu, target); - } - return 0; - } - --static int rapl_cpu_online(unsigned int cpu) -+static int rapl_cpu_offline(unsigned int cpu) -+{ -+ int ret = __rapl_cpu_offline(rapl_pmus_pkg, get_rapl_pmu_idx(cpu), -+ get_rapl_pmu_cpumask(cpu), cpu); -+ -+ if (ret == 0 && rapl_model->core_events) -+ ret = __rapl_cpu_offline(rapl_pmus_core, topology_logical_core_id(cpu), -+ topology_sibling_cpumask(cpu), cpu); -+ -+ return ret; -+} -+ -+static int __rapl_cpu_online(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx, -+ const struct cpumask *event_cpumask, unsigned int cpu) - { -- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); -+ struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; - int target; - -- if (!pmu) { -- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); -- if (!pmu) -+ if (!rapl_pmu) { -+ rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu)); -+ if (!rapl_pmu) - return -ENOMEM; - -- raw_spin_lock_init(&pmu->lock); -- INIT_LIST_HEAD(&pmu->active_list); -- pmu->pmu = &rapl_pmus->pmu; -- pmu->timer_interval = ms_to_ktime(rapl_timer_ms); -- rapl_hrtimer_init(pmu); -+ raw_spin_lock_init(&rapl_pmu->lock); -+ INIT_LIST_HEAD(&rapl_pmu->active_list); -+ rapl_pmu->pmu = &rapl_pmus->pmu; -+ rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms); -+ rapl_hrtimer_init(rapl_pmu); - -- rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; -+ rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu; - } - - /* - * Check if there is an online cpu in the package which collects rapl - * events already. - */ -- target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); -+ target = cpumask_any_and(&rapl_pmus->cpumask, event_cpumask); - if (target < nr_cpu_ids) - return 0; - -- cpumask_set_cpu(cpu, &rapl_cpu_mask); -- pmu->cpu = cpu; -+ cpumask_set_cpu(cpu, &rapl_pmus->cpumask); -+ rapl_pmu->cpu = cpu; - return 0; - } - --static int rapl_check_hw_unit(struct rapl_model *rm) -+static int rapl_cpu_online(unsigned int cpu) -+{ -+ int ret = __rapl_cpu_online(rapl_pmus_pkg, get_rapl_pmu_idx(cpu), -+ get_rapl_pmu_cpumask(cpu), cpu); -+ -+ if (ret == 0 && rapl_model->core_events) -+ ret = __rapl_cpu_online(rapl_pmus_core, topology_logical_core_id(cpu), -+ topology_sibling_cpumask(cpu), cpu); -+ -+ return ret; -+} -+ -+ -+static int rapl_check_hw_unit(void) - { - u64 msr_rapl_power_unit_bits; - int i; - - /* protect rdmsrl() to handle virtualization */ -- if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits)) -+ if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits)) - return -1; -- for (i = 0; i < NR_RAPL_DOMAINS; i++) -+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) - rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; - -- switch (rm->unit_quirk) { -+ switch (rapl_model->unit_quirk) { - /* - * DRAM domain on HSW server and KNL has fixed energy unit which can be - * different than the unit from power unit MSR. See -@@ -645,22 +766,29 @@ static void __init rapl_advertise(void) - int i; - - pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", -- hweight32(rapl_cntr_mask), rapl_timer_ms); -+ hweight32(rapl_pkg_cntr_mask) + hweight32(rapl_core_cntr_mask), rapl_timer_ms); - -- for (i = 0; i < NR_RAPL_DOMAINS; i++) { -- if (rapl_cntr_mask & (1 << i)) { -+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) { -+ if (rapl_pkg_cntr_mask & (1 << i)) { - pr_info("hw unit of domain %s 2^-%d Joules\n", -- rapl_domain_names[i], rapl_hw_unit[i]); -+ rapl_pkg_domain_names[i], rapl_hw_unit[i]); -+ } -+ } -+ -+ for (i = 0; i < NR_RAPL_CORE_DOMAINS; i++) { -+ if (rapl_core_cntr_mask & (1 << i)) { -+ pr_info("hw unit of domain %s 2^-%d Joules\n", -+ rapl_core_domain_names[i], rapl_hw_unit[i]); - } - } - } - --static void cleanup_rapl_pmus(void) -+static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus) - { - int i; - - for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++) -- kfree(rapl_pmus->pmus[i]); -+ kfree(rapl_pmus->rapl_pmu[i]); - kfree(rapl_pmus); - } - -@@ -673,11 +801,17 @@ static const struct attribute_group *rapl_attr_update[] = { - NULL, - }; - --static int __init init_rapl_pmus(void) -+static const struct attribute_group *rapl_per_core_attr_update[] = { -+ &rapl_events_per_core_group, -+}; -+ -+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int nr_rapl_pmu, -+ const struct attribute_group **rapl_attr_groups, -+ const struct attribute_group **rapl_attr_update) - { -- int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package(); -+ struct rapl_pmus *rapl_pmus; - -- rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL); -+ rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL); - if (!rapl_pmus) - return -ENOMEM; - -@@ -693,75 +827,80 @@ static int __init init_rapl_pmus(void) - rapl_pmus->pmu.read = rapl_pmu_event_read; - rapl_pmus->pmu.module = THIS_MODULE; - rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; -+ -+ *rapl_pmus_ptr = rapl_pmus; -+ - return 0; - } - - static struct rapl_model model_snb = { -- .events = BIT(PERF_RAPL_PP0) | -+ .pkg_events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_PP1), - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, - }; - - static struct rapl_model model_snbep = { -- .events = BIT(PERF_RAPL_PP0) | -+ .pkg_events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM), - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, - }; - - static struct rapl_model model_hsw = { -- .events = BIT(PERF_RAPL_PP0) | -+ .pkg_events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM) | - BIT(PERF_RAPL_PP1), - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, - }; - - static struct rapl_model model_hsx = { -- .events = BIT(PERF_RAPL_PP0) | -+ .pkg_events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM), - .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, - }; - - static struct rapl_model model_knl = { -- .events = BIT(PERF_RAPL_PKG) | -+ .pkg_events = BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM), - .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, - }; - - static struct rapl_model model_skl = { -- .events = BIT(PERF_RAPL_PP0) | -+ .pkg_events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM) | - BIT(PERF_RAPL_PP1) | - BIT(PERF_RAPL_PSYS), - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, - }; - - static struct rapl_model model_spr = { -- .events = BIT(PERF_RAPL_PP0) | -+ .pkg_events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM) | - BIT(PERF_RAPL_PSYS), - .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, - .msr_power_unit = MSR_RAPL_POWER_UNIT, -- .rapl_msrs = intel_rapl_spr_msrs, -+ .rapl_pkg_msrs = intel_rapl_spr_msrs, - }; - - static struct rapl_model model_amd_hygon = { -- .events = BIT(PERF_RAPL_PKG), -+ .pkg_events = BIT(PERF_RAPL_PKG), -+ .core_events = BIT(PERF_RAPL_PER_CORE), - .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, -- .rapl_msrs = amd_rapl_msrs, -+ .rapl_pkg_msrs = amd_rapl_pkg_msrs, -+ .rapl_core_msrs = amd_rapl_core_msrs, - }; - - static const struct x86_cpu_id rapl_model_match[] __initconst = { -@@ -817,28 +956,47 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); - static int __init rapl_pmu_init(void) - { - const struct x86_cpu_id *id; -- struct rapl_model *rm; - int ret; -+ int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package(); -+ int nr_cores = topology_max_packages() * topology_num_cores_per_package(); -+ -+ if (rapl_pmu_is_pkg_scope()) -+ nr_rapl_pmu = topology_max_packages(); - - id = x86_match_cpu(rapl_model_match); - if (!id) - return -ENODEV; - -- rm = (struct rapl_model *) id->driver_data; -- -- rapl_msrs = rm->rapl_msrs; -+ rapl_model = (struct rapl_model *) id->driver_data; - -- rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, -- false, (void *) &rm->events); -+ rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX, -+ false, (void *) &rapl_model->pkg_events); - -- ret = rapl_check_hw_unit(rm); -+ ret = rapl_check_hw_unit(); - if (ret) - return ret; - -- ret = init_rapl_pmus(); -+ ret = init_rapl_pmus(&rapl_pmus_pkg, nr_rapl_pmu, rapl_attr_groups, rapl_attr_update); - if (ret) - return ret; - -+ if (rapl_model->core_events) { -+ rapl_core_cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs, -+ PERF_RAPL_CORE_EVENTS_MAX, false, -+ (void *) &rapl_model->core_events); -+ -+ ret = init_rapl_pmus(&rapl_pmus_core, nr_cores, -+ rapl_per_core_attr_groups, rapl_per_core_attr_update); -+ if (ret) { -+ /* -+ * If initialization of per_core PMU fails, reset per_core -+ * flag, and continue with power PMU initialization. -+ */ -+ pr_warn("Per-core PMU initialization failed (%d)\n", ret); -+ rapl_model->core_events = 0UL; -+ } -+ } -+ - /* - * Install callbacks. Core will call them for each online cpu. - */ -@@ -848,10 +1006,24 @@ static int __init rapl_pmu_init(void) - if (ret) - goto out; - -- ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); -+ ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1); - if (ret) - goto out1; - -+ if (rapl_model->core_events) { -+ ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_per_core", -1); -+ if (ret) { -+ /* -+ * If registration of per_core PMU fails, cleanup per_core PMU -+ * variables, reset the per_core flag and keep the -+ * power PMU untouched. -+ */ -+ pr_warn("Per-core PMU registration failed (%d)\n", ret); -+ cleanup_rapl_pmus(rapl_pmus_core); -+ rapl_model->core_events = 0UL; -+ } -+ } -+ - rapl_advertise(); - return 0; - -@@ -859,7 +1031,7 @@ static int __init rapl_pmu_init(void) - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); - out: - pr_warn("Initialization failed (%d), disabled\n", ret); -- cleanup_rapl_pmus(); -+ cleanup_rapl_pmus(rapl_pmus_pkg); - return ret; - } - module_init(rapl_pmu_init); -@@ -867,7 +1039,11 @@ module_init(rapl_pmu_init); - static void __exit intel_rapl_exit(void) - { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); -- perf_pmu_unregister(&rapl_pmus->pmu); -- cleanup_rapl_pmus(); -+ perf_pmu_unregister(&rapl_pmus_pkg->pmu); -+ cleanup_rapl_pmus(rapl_pmus_pkg); -+ if (rapl_model->core_events) { -+ perf_pmu_unregister(&rapl_pmus_core->pmu); -+ cleanup_rapl_pmus(rapl_pmus_core); -+ } - } - module_exit(intel_rapl_exit); -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index cb4f6c513c48..1ffe4260bef6 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -98,6 +98,7 @@ struct cpuinfo_topology { - // Logical ID mappings - u32 logical_pkg_id; - u32 logical_die_id; -+ u32 logical_core_id; - - // AMD Node ID and Nodes per Package info - u32 amd_node_id; -diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h -index abe3a8f22cbd..2a6dbf965d92 100644 ---- a/arch/x86/include/asm/topology.h -+++ b/arch/x86/include/asm/topology.h -@@ -137,6 +137,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); - #define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id) - #define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id) - #define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id) -+#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id) - #define topology_die_id(cpu) (cpu_data(cpu).topo.die_id) - #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) - #define topology_ppin(cpu) (cpu_data(cpu).ppin) -diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c -index 3baf3e435834..b1eb6d7828db 100644 ---- a/arch/x86/kernel/cpu/debugfs.c -+++ b/arch/x86/kernel/cpu/debugfs.c -@@ -24,6 +24,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) - seq_printf(m, "core_id: %u\n", c->topo.core_id); - seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); - seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); -+ seq_printf(m, "logical_core_id: %u\n", c->topo.logical_core_id); - seq_printf(m, "llc_id: %u\n", c->topo.llc_id); - seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); - seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); -diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c -index 9a6069e7133c..23722aa21e2f 100644 ---- a/arch/x86/kernel/cpu/topology_common.c -+++ b/arch/x86/kernel/cpu/topology_common.c -@@ -151,6 +151,7 @@ static void topo_set_ids(struct topo_scan *tscan, bool early) - if (!early) { - c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); - c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); -+ c->topo.logical_core_id = topology_get_logical_id(apicid, TOPO_CORE_DOMAIN); - } - - /* Package relative core ID */ --- -2.46.0.rc1 - -From c9314e79325672ebbcf4955ec4b995fd52f07e4c Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:27:08 +0200 -Subject: [PATCH 10/11] t2 - -Signed-off-by: Peter Jung ---- - .../ABI/testing/sysfs-driver-hid-appletb-kbd | 13 + - .../admin-guide/kernel-parameters.txt | 2 + - Documentation/core-api/printk-formats.rst | 32 + - Documentation/leds/well-known-leds.txt | 8 + - MAINTAINERS | 18 + - drivers/acpi/video_detect.c | 16 + - .../firmware/efi/libstub/efi-stub-helper.c | 3 + - drivers/firmware/efi/libstub/efistub.h | 14 + - drivers/firmware/efi/libstub/x86-stub.c | 27 + - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 + - drivers/gpu/drm/drm_format_helper.c | 54 + - drivers/gpu/drm/i915/display/intel_ddi.c | 4 + - drivers/gpu/drm/i915/display/intel_fbdev.c | 6 +- - drivers/gpu/drm/i915/display/intel_quirks.c | 15 + - drivers/gpu/drm/i915/display/intel_quirks.h | 1 + - .../gpu/drm/tests/drm_format_helper_test.c | 81 ++ - drivers/gpu/drm/tiny/Kconfig | 12 + - drivers/gpu/drm/tiny/Makefile | 1 + - drivers/gpu/drm/tiny/appletbdrm.c | 624 +++++++++ - drivers/gpu/vga/vga_switcheroo.c | 7 +- - drivers/hid/Kconfig | 35 + - drivers/hid/Makefile | 3 + - drivers/hid/hid-apple-magic-backlight.c | 120 ++ - drivers/hid/hid-appletb-bl.c | 193 +++ - drivers/hid/hid-appletb-kbd.c | 289 +++++ - drivers/hid/hid-core.c | 25 + - drivers/hid/hid-google-hammer.c | 27 +- - drivers/hid/hid-multitouch.c | 60 +- - drivers/hid/hid-quirks.c | 8 +- - drivers/hwmon/applesmc.c | 1138 ++++++++++++----- - drivers/input/mouse/bcm5974.c | 138 ++ - drivers/pci/vgaarb.c | 1 + - drivers/platform/x86/apple-gmux.c | 18 + - drivers/staging/Kconfig | 2 + - drivers/staging/Makefile | 1 + - drivers/staging/apple-bce/Kconfig | 18 + - drivers/staging/apple-bce/Makefile | 28 + - drivers/staging/apple-bce/apple_bce.c | 444 +++++++ - drivers/staging/apple-bce/apple_bce.h | 38 + - drivers/staging/apple-bce/audio/audio.c | 711 ++++++++++ - drivers/staging/apple-bce/audio/audio.h | 125 ++ - drivers/staging/apple-bce/audio/description.h | 42 + - drivers/staging/apple-bce/audio/pcm.c | 308 +++++ - drivers/staging/apple-bce/audio/pcm.h | 16 + - drivers/staging/apple-bce/audio/protocol.c | 347 +++++ - drivers/staging/apple-bce/audio/protocol.h | 147 +++ - .../staging/apple-bce/audio/protocol_bce.c | 226 ++++ - .../staging/apple-bce/audio/protocol_bce.h | 72 ++ - drivers/staging/apple-bce/mailbox.c | 151 +++ - drivers/staging/apple-bce/mailbox.h | 53 + - drivers/staging/apple-bce/queue.c | 390 ++++++ - drivers/staging/apple-bce/queue.h | 177 +++ - drivers/staging/apple-bce/queue_dma.c | 220 ++++ - drivers/staging/apple-bce/queue_dma.h | 50 + - drivers/staging/apple-bce/vhci/command.h | 204 +++ - drivers/staging/apple-bce/vhci/queue.c | 268 ++++ - drivers/staging/apple-bce/vhci/queue.h | 76 ++ - drivers/staging/apple-bce/vhci/transfer.c | 661 ++++++++++ - drivers/staging/apple-bce/vhci/transfer.h | 73 ++ - drivers/staging/apple-bce/vhci/vhci.c | 759 +++++++++++ - drivers/staging/apple-bce/vhci/vhci.h | 52 + - drivers/usb/core/driver.c | 14 + - drivers/usb/storage/uas.c | 5 +- - include/drm/drm_format_helper.h | 3 + - include/linux/efi.h | 1 + - include/linux/hid.h | 2 + - include/linux/usb.h | 3 + - lib/test_printf.c | 20 +- - lib/vsprintf.c | 36 +- - scripts/checkpatch.pl | 2 +- - 70 files changed, 8377 insertions(+), 364 deletions(-) - create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd - create mode 100644 drivers/gpu/drm/tiny/appletbdrm.c - create mode 100644 drivers/hid/hid-apple-magic-backlight.c - create mode 100644 drivers/hid/hid-appletb-bl.c - create mode 100644 drivers/hid/hid-appletb-kbd.c - create mode 100644 drivers/staging/apple-bce/Kconfig - create mode 100644 drivers/staging/apple-bce/Makefile - create mode 100644 drivers/staging/apple-bce/apple_bce.c - create mode 100644 drivers/staging/apple-bce/apple_bce.h - create mode 100644 drivers/staging/apple-bce/audio/audio.c - create mode 100644 drivers/staging/apple-bce/audio/audio.h - create mode 100644 drivers/staging/apple-bce/audio/description.h - create mode 100644 drivers/staging/apple-bce/audio/pcm.c - create mode 100644 drivers/staging/apple-bce/audio/pcm.h - create mode 100644 drivers/staging/apple-bce/audio/protocol.c - create mode 100644 drivers/staging/apple-bce/audio/protocol.h - create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.c - create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.h - create mode 100644 drivers/staging/apple-bce/mailbox.c - create mode 100644 drivers/staging/apple-bce/mailbox.h - create mode 100644 drivers/staging/apple-bce/queue.c - create mode 100644 drivers/staging/apple-bce/queue.h - create mode 100644 drivers/staging/apple-bce/queue_dma.c - create mode 100644 drivers/staging/apple-bce/queue_dma.h - create mode 100644 drivers/staging/apple-bce/vhci/command.h - create mode 100644 drivers/staging/apple-bce/vhci/queue.c - create mode 100644 drivers/staging/apple-bce/vhci/queue.h - create mode 100644 drivers/staging/apple-bce/vhci/transfer.c - create mode 100644 drivers/staging/apple-bce/vhci/transfer.h - create mode 100644 drivers/staging/apple-bce/vhci/vhci.c - create mode 100644 drivers/staging/apple-bce/vhci/vhci.h - -diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd -new file mode 100644 -index 000000000000..2a19584d091e ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd -@@ -0,0 +1,13 @@ -+What: /sys/bus/hid/drivers/hid-appletb-kbd//mode -+Date: September, 2023 -+KernelVersion: 6.5 -+Contact: linux-input@vger.kernel.org -+Description: -+ The set of keys displayed on the Touch Bar. -+ Valid values are: -+ == ================= -+ 0 Escape key only -+ 1 Function keys -+ 2 Media/brightness keys -+ 3 None -+ == ================= -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 07ac4c81a7dd..c083c476013f 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -415,6 +415,8 @@ - useful so that a dump capture kernel won't be - shot down by NMI - -+ apple_set_os [KNL] Report that macOS is being booted to the firmware -+ - autoconf= [IPV6] - See Documentation/networking/ipv6.rst. - -diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst -index 4451ef501936..c726a846f752 100644 ---- a/Documentation/core-api/printk-formats.rst -+++ b/Documentation/core-api/printk-formats.rst -@@ -632,6 +632,38 @@ Examples:: - %p4cc Y10 little-endian (0x20303159) - %p4cc NV12 big-endian (0xb231564e) - -+Generic FourCC code -+------------------- -+ -+:: -+ %p4c[hnbl] gP00 (0x67503030) -+ -+Print a generic FourCC code, as both ASCII characters and its numerical -+value as hexadecimal. -+ -+The additional ``h``, ``r``, ``b``, and ``l`` specifiers are used to specify -+host, reversed, big or little endian order data respectively. Host endian -+order means the data is interpreted as a 32-bit integer and the most -+significant byte is printed first; that is, the character code as printed -+matches the byte order stored in memory on big-endian systems, and is reversed -+on little-endian systems. -+ -+Passed by reference. -+ -+Examples for a little-endian machine, given &(u32)0x67503030:: -+ -+ %p4ch gP00 (0x67503030) -+ %p4cl gP00 (0x67503030) -+ %p4cb 00Pg (0x30305067) -+ %p4cr 00Pg (0x30305067) -+ -+Examples for a big-endian machine, given &(u32)0x67503030:: -+ -+ %p4ch gP00 (0x67503030) -+ %p4cl 00Pg (0x30305067) -+ %p4cb gP00 (0x67503030) -+ %p4cr 00Pg (0x30305067) -+ - Rust - ---- - -diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt -index 67b44704801f..34e472b363d7 100644 ---- a/Documentation/leds/well-known-leds.txt -+++ b/Documentation/leds/well-known-leds.txt -@@ -44,6 +44,14 @@ Legacy: "lp5523:kb{1,2,3,4,5,6}" (Nokia N900) - - Frontlight/backlight of main keyboard. - -+Good: ":*:kbd_backlight" -+Good: "input*:*:kbd_backlight" -+Legacy: "*:*:kbd_backlight" -+ -+Many drivers have the vendor or product name as the first field of the led name, -+this makes names inconsistent and is redundant as that information is already in -+sysfs. -+ - Legacy: "button-backlight" (Motorola Droid 4) - - Some phones have touch buttons below screen; it is different from main -diff --git a/MAINTAINERS b/MAINTAINERS -index b25b2a731512..94540127a563 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -6728,6 +6728,12 @@ S: Supported - T: git https://gitlab.freedesktop.org/drm/misc/kernel.git - F: drivers/gpu/drm/sun4i/sun8i* - -+DRM DRIVER FOR APPLE TOUCH BARS -+M: Kerem Karabay -+L: dri-devel@lists.freedesktop.org -+S: Maintained -+F: drivers/gpu/drm/tiny/appletbdrm.c -+ - DRM DRIVER FOR ARM PL111 CLCD - S: Orphan - T: git https://gitlab.freedesktop.org/drm/misc/kernel.git -@@ -9733,6 +9739,18 @@ F: include/linux/pm.h - F: include/linux/suspend.h - F: kernel/power/ - -+HID APPLE TOUCH BAR DRIVERS -+M: Kerem Karabay -+L: linux-input@vger.kernel.org -+S: Maintained -+F: drivers/hid/hid-appletb-* -+ -+HID APPLE MAGIC BACKLIGHT DRIVER -+M: Orlando Chamberlain -+L: linux-input@vger.kernel.org -+S: Maintained -+F: drivers/hid/apple-magic-backlight.c -+ - HID CORE LAYER - M: Jiri Kosina - M: Benjamin Tissoires -diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c -index 2cc3821b2b16..c11cbe5b6eaa 100644 ---- a/drivers/acpi/video_detect.c -+++ b/drivers/acpi/video_detect.c -@@ -539,6 +539,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { - DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,2"), - }, - }, -+ { -+ .callback = video_detect_force_native, -+ /* Apple MacBook Air 9,1 */ -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), -+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir9,1"), -+ }, -+ }, - { - /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */ - .callback = video_detect_force_native, -@@ -548,6 +556,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { - DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro12,1"), - }, - }, -+ { -+ .callback = video_detect_force_native, -+ /* Apple MacBook Pro 16,2 */ -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), -+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,2"), -+ }, -+ }, - { - .callback = video_detect_force_native, - /* Dell Inspiron N4010 */ -diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c -index de659f6a815f..f00a419a29be 100644 ---- a/drivers/firmware/efi/libstub/efi-stub-helper.c -+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c -@@ -20,6 +20,7 @@ - bool efi_nochunk; - bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE); - bool efi_novamap; -+bool efi_apple_set_os; - - static bool efi_noinitrd; - static bool efi_nosoftreserve; -@@ -76,6 +77,8 @@ efi_status_t efi_parse_options(char const *cmdline) - efi_loglevel = CONSOLE_LOGLEVEL_QUIET; - } else if (!strcmp(param, "noinitrd")) { - efi_noinitrd = true; -+ } else if (!strcmp(param, "apple_set_os")) { -+ efi_apple_set_os = true; - } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { - efi_no5lvl = true; - } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) && -diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h -index 27abb4ce0291..89750d043ed8 100644 ---- a/drivers/firmware/efi/libstub/efistub.h -+++ b/drivers/firmware/efi/libstub/efistub.h -@@ -39,6 +39,7 @@ extern bool efi_nokaslr; - extern int efi_loglevel; - extern int efi_mem_encrypt; - extern bool efi_novamap; -+extern bool efi_apple_set_os; - extern const efi_system_table_t *efi_system_table; - - typedef union efi_dxe_services_table efi_dxe_services_table_t; -@@ -825,6 +826,19 @@ union apple_properties_protocol { - } mixed_mode; - }; - -+typedef struct apple_set_os_protocol apple_set_os_protocol_t; -+ -+struct apple_set_os_protocol { -+ u64 version; -+ efi_status_t (__efiapi *set_os_version) (const char *); -+ efi_status_t (__efiapi *set_os_vendor) (const char *); -+ struct { -+ u32 version; -+ u32 set_os_version; -+ u32 set_os_vendor; -+ } mixed_mode; -+}; -+ - typedef u32 efi_tcg2_event_log_format; - - #define INITRD_EVENT_TAG_ID 0x8F3B22ECU -diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c -index 1983fd3bf392..49a89a844df7 100644 ---- a/drivers/firmware/efi/libstub/x86-stub.c -+++ b/drivers/firmware/efi/libstub/x86-stub.c -@@ -225,6 +225,30 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) - } - } - -+static void apple_set_os(void) -+{ -+ efi_guid_t guid = APPLE_SET_OS_PROTOCOL_GUID; -+ apple_set_os_protocol_t *set_os; -+ efi_status_t status; -+ -+ status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&set_os); -+ if (status != EFI_SUCCESS) -+ return; -+ -+ if (efi_table_attr(set_os, version) >= 2) { -+ status = efi_fn_call(set_os, set_os_vendor, "Apple Inc."); -+ if (status != EFI_SUCCESS) -+ efi_err("Failed to set OS vendor via apple_set_os\n"); -+ } -+ -+ /* The version being set doesn't seem to matter */ -+ if (efi_table_attr(set_os, version) > 0) { -+ status = efi_fn_call(set_os, set_os_version, "Mac OS X 10.9"); -+ if (status != EFI_SUCCESS) -+ efi_err("Failed to set OS version via apple_set_os\n"); -+ } -+} -+ - efi_status_t efi_adjust_memory_range_protection(unsigned long start, - unsigned long size) - { -@@ -338,6 +362,9 @@ static void setup_quirks(struct boot_params *boot_params) - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && - !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) - retrieve_apple_device_properties(boot_params); -+ -+ if (efi_apple_set_os) -+ apple_set_os(); - } - - /* -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index bb0b636d0d75..a05ed98da785 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -2211,6 +2211,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, - int ret, retry = 0, i; - bool supports_atomic = false; - -+ if (vga_switcheroo_client_probe_defer(pdev)) -+ return -EPROBE_DEFER; -+ - /* skip devices which are owned by radeon */ - for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { - if (amdgpu_unsupported_pciidlist[i] == pdev->device) -diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c -index b1be458ed4dd..28c0e76a1e88 100644 ---- a/drivers/gpu/drm/drm_format_helper.c -+++ b/drivers/gpu/drm/drm_format_helper.c -@@ -702,6 +702,57 @@ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pi - } - EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); - -+static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels) -+{ -+ u8 *dbuf8 = dbuf; -+ const __le32 *sbuf32 = sbuf; -+ unsigned int x; -+ u32 pix; -+ -+ for (x = 0; x < pixels; x++) { -+ pix = le32_to_cpu(sbuf32[x]); -+ /* write red-green-blue to output in little endianness */ -+ *dbuf8++ = (pix & 0x00FF0000) >> 16; -+ *dbuf8++ = (pix & 0x0000FF00) >> 8; -+ *dbuf8++ = (pix & 0x000000FF) >> 0; -+ } -+} -+ -+/** -+ * drm_fb_xrgb8888_to_bgr888 - Convert XRGB8888 to BGR888 clip buffer -+ * @dst: Array of BGR888 destination buffers -+ * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines -+ * within @dst; can be NULL if scanlines are stored next to each other. -+ * @src: Array of XRGB8888 source buffers -+ * @fb: DRM framebuffer -+ * @clip: Clip rectangle area to copy -+ * @state: Transform and conversion state -+ * -+ * This function copies parts of a framebuffer to display memory and converts the -+ * color format during the process. Destination and framebuffer formats must match. The -+ * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at -+ * least as many entries as there are planes in @fb's format. Each entry stores the -+ * value for the format's respective color plane at the same index. -+ * -+ * This function does not apply clipping on @dst (i.e. the destination is at the -+ * top-left corner). -+ * -+ * Drivers can use this function for BGR888 devices that don't natively -+ * support XRGB8888. -+ */ -+void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch, -+ const struct iosys_map *src, const struct drm_framebuffer *fb, -+ const struct drm_rect *clip, struct drm_format_conv_state *state) -+{ -+ static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { -+ 3, -+ }; -+ -+ drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, -+ drm_fb_xrgb8888_to_bgr888_line); -+} -+EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888); -+ - static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels) - { - __le32 *dbuf32 = dbuf; -@@ -1035,6 +1086,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d - } else if (dst_format == DRM_FORMAT_RGB888) { - drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state); - return 0; -+ } else if (dst_format == DRM_FORMAT_BGR888) { -+ drm_fb_xrgb8888_to_bgr888(dst, dst_pitch, src, fb, clip, state); -+ return 0; - } else if (dst_format == DRM_FORMAT_ARGB8888) { - drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); - return 0; -diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c -index 6bff169fa8d4..8d80ae00b838 100644 ---- a/drivers/gpu/drm/i915/display/intel_ddi.c -+++ b/drivers/gpu/drm/i915/display/intel_ddi.c -@@ -4648,6 +4648,7 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port) - - static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) - { -+ struct intel_display *display = to_intel_display(dig_port); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - - if (dig_port->base.port != PORT_A) -@@ -4656,6 +4657,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) - if (dig_port->saved_port_bits & DDI_A_4_LANES) - return false; - -+ if (intel_has_quirk(display, QUIRK_DDI_A_FORCE_4_LANES)) -+ return true; -+ - /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only - * supported configuration - */ -diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c -index bda702c2cab8..1647e141ae78 100644 ---- a/drivers/gpu/drm/i915/display/intel_fbdev.c -+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c -@@ -196,10 +196,10 @@ static int intelfb_create(struct drm_fb_helper *helper, - return ret; - - if (intel_fb && -- (sizes->fb_width > intel_fb->base.width || -- sizes->fb_height > intel_fb->base.height)) { -+ (sizes->fb_width != intel_fb->base.width || -+ sizes->fb_height != intel_fb->base.height)) { - drm_dbg_kms(&dev_priv->drm, -- "BIOS fb too small (%dx%d), we require (%dx%d)," -+ "BIOS fb not valid (%dx%d), we require (%dx%d)," - " releasing it\n", - intel_fb->base.width, intel_fb->base.height, - sizes->fb_width, sizes->fb_height); -diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c -index 14d5fefc9c5b..727639b8f6a6 100644 ---- a/drivers/gpu/drm/i915/display/intel_quirks.c -+++ b/drivers/gpu/drm/i915/display/intel_quirks.c -@@ -59,6 +59,18 @@ static void quirk_increase_ddi_disabled_time(struct intel_display *display) - drm_info(display->drm, "Applying Increase DDI Disabled quirk\n"); - } - -+/* -+ * In some cases, the firmware might not set the lane count to 4 (for example, -+ * when booting in some dual GPU Macs with the dGPU as the default GPU), this -+ * quirk is used to force it as otherwise it might not be possible to compute a -+ * valid link configuration. -+ */ -+static void quirk_ddi_a_force_4_lanes(struct intel_display *display) -+{ -+ intel_set_quirk(display, QUIRK_DDI_A_FORCE_4_LANES); -+ drm_info(display->drm, "Applying DDI A Forced 4 Lanes quirk\n"); -+} -+ - static void quirk_no_pps_backlight_power_hook(struct intel_display *display) - { - intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK); -@@ -201,6 +213,9 @@ static struct intel_quirk intel_quirks[] = { - { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - /* HP Notebook - 14-r206nv */ - { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, -+ -+ /* Apple MacBookPro15,1 */ -+ { 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes }, - }; - - void intel_init_quirks(struct intel_display *display) -diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h -index 151c8f4ae576..46e7feba88f4 100644 ---- a/drivers/gpu/drm/i915/display/intel_quirks.h -+++ b/drivers/gpu/drm/i915/display/intel_quirks.h -@@ -17,6 +17,7 @@ enum intel_quirk_id { - QUIRK_INVERT_BRIGHTNESS, - QUIRK_LVDS_SSC_DISABLE, - QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK, -+ QUIRK_DDI_A_FORCE_4_LANES, - }; - - void intel_init_quirks(struct intel_display *display); -diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c -index 08992636ec05..35cd3405d045 100644 ---- a/drivers/gpu/drm/tests/drm_format_helper_test.c -+++ b/drivers/gpu/drm/tests/drm_format_helper_test.c -@@ -60,6 +60,11 @@ struct convert_to_rgb888_result { - const u8 expected[TEST_BUF_SIZE]; - }; - -+struct convert_to_bgr888_result { -+ unsigned int dst_pitch; -+ const u8 expected[TEST_BUF_SIZE]; -+}; -+ - struct convert_to_argb8888_result { - unsigned int dst_pitch; - const u32 expected[TEST_BUF_SIZE]; -@@ -107,6 +112,7 @@ struct convert_xrgb8888_case { - struct convert_to_argb1555_result argb1555_result; - struct convert_to_rgba5551_result rgba5551_result; - struct convert_to_rgb888_result rgb888_result; -+ struct convert_to_bgr888_result bgr888_result; - struct convert_to_argb8888_result argb8888_result; - struct convert_to_xrgb2101010_result xrgb2101010_result; - struct convert_to_argb2101010_result argb2101010_result; -@@ -151,6 +157,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { - .dst_pitch = TEST_USE_DEFAULT_PITCH, - .expected = { 0x00, 0x00, 0xFF }, - }, -+ .bgr888_result = { -+ .dst_pitch = TEST_USE_DEFAULT_PITCH, -+ .expected = { 0xFF, 0x00, 0x00 }, -+ }, - .argb8888_result = { - .dst_pitch = TEST_USE_DEFAULT_PITCH, - .expected = { 0xFFFF0000 }, -@@ -217,6 +227,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { - .dst_pitch = TEST_USE_DEFAULT_PITCH, - .expected = { 0x00, 0x00, 0xFF }, - }, -+ .bgr888_result = { -+ .dst_pitch = TEST_USE_DEFAULT_PITCH, -+ .expected = { 0xFF, 0x00, 0x00 }, -+ }, - .argb8888_result = { - .dst_pitch = TEST_USE_DEFAULT_PITCH, - .expected = { 0xFFFF0000 }, -@@ -330,6 +344,15 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { - 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, - }, - }, -+ .bgr888_result = { -+ .dst_pitch = TEST_USE_DEFAULT_PITCH, -+ .expected = { -+ 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, -+ 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, -+ 0x00, 0x00, 0xFF, 0xFF, 0x00, 0xFF, -+ 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, -+ }, -+ }, - .argb8888_result = { - .dst_pitch = TEST_USE_DEFAULT_PITCH, - .expected = { -@@ -468,6 +491,17 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, - }, -+ .bgr888_result = { -+ .dst_pitch = 15, -+ .expected = { -+ 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, 0xA8, 0xF3, 0x03, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0xA8, 0x03, 0x03, 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ }, -+ }, - .argb8888_result = { - .dst_pitch = 20, - .expected = { -@@ -914,6 +948,52 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) - KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); - } - -+static void drm_test_fb_xrgb8888_to_bgr888(struct kunit *test) -+{ -+ const struct convert_xrgb8888_case *params = test->param_value; -+ const struct convert_to_bgr888_result *result = ¶ms->bgr888_result; -+ size_t dst_size; -+ u8 *buf = NULL; -+ __le32 *xrgb8888 = NULL; -+ struct iosys_map dst, src; -+ -+ struct drm_framebuffer fb = { -+ .format = drm_format_info(DRM_FORMAT_XRGB8888), -+ .pitches = { params->pitch, 0, 0 }, -+ }; -+ -+ dst_size = conversion_buf_size(DRM_FORMAT_BGR888, result->dst_pitch, -+ ¶ms->clip, 0); -+ KUNIT_ASSERT_GT(test, dst_size, 0); -+ -+ buf = kunit_kzalloc(test, dst_size, GFP_KERNEL); -+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf); -+ iosys_map_set_vaddr(&dst, buf); -+ -+ xrgb8888 = cpubuf_to_le32(test, params->xrgb8888, TEST_BUF_SIZE); -+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xrgb8888); -+ iosys_map_set_vaddr(&src, xrgb8888); -+ -+ /* -+ * BGR888 expected results are already in little-endian -+ * order, so there's no need to convert the test output. -+ */ -+ drm_fb_xrgb8888_to_bgr888(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, -+ &fmtcnv_state); -+ KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); -+ -+ buf = dst.vaddr; /* restore original value of buf */ -+ memset(buf, 0, dst_size); -+ -+ int blit_result = 0; -+ -+ blit_result = drm_fb_blit(&dst, &result->dst_pitch, DRM_FORMAT_BGR888, &src, &fb, ¶ms->clip, -+ &fmtcnv_state); -+ -+ KUNIT_EXPECT_FALSE(test, blit_result); -+ KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); -+} -+ - static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test) - { - const struct convert_xrgb8888_case *params = test->param_value; -@@ -1851,6 +1931,7 @@ static struct kunit_case drm_format_helper_test_cases[] = { - KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb1555, convert_xrgb8888_gen_params), - KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgba5551, convert_xrgb8888_gen_params), - KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgb888, convert_xrgb8888_gen_params), -+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_bgr888, convert_xrgb8888_gen_params), - KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb8888, convert_xrgb8888_gen_params), - KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_xrgb2101010, convert_xrgb8888_gen_params), - KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb2101010, convert_xrgb8888_gen_params), -diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig -index f6889f649bc1..559a97bce12c 100644 ---- a/drivers/gpu/drm/tiny/Kconfig -+++ b/drivers/gpu/drm/tiny/Kconfig -@@ -1,5 +1,17 @@ - # SPDX-License-Identifier: GPL-2.0-only - -+config DRM_APPLETBDRM -+ tristate "DRM support for Apple Touch Bars" -+ depends on DRM && USB && MMU -+ select DRM_KMS_HELPER -+ select DRM_GEM_SHMEM_HELPER -+ help -+ Say Y here if you want support for the display of Touch Bars on x86 -+ MacBook Pros. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called appletbdrm. -+ - config DRM_ARCPGU - tristate "ARC PGU" - depends on DRM && OF -diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile -index 76dde89a044b..9a1b412e764a 100644 ---- a/drivers/gpu/drm/tiny/Makefile -+++ b/drivers/gpu/drm/tiny/Makefile -@@ -1,5 +1,6 @@ - # SPDX-License-Identifier: GPL-2.0-only - -+obj-$(CONFIG_DRM_APPLETBDRM) += appletbdrm.o - obj-$(CONFIG_DRM_ARCPGU) += arcpgu.o - obj-$(CONFIG_DRM_BOCHS) += bochs.o - obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus.o -diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c -new file mode 100644 -index 000000000000..b9440ce0064e ---- /dev/null -+++ b/drivers/gpu/drm/tiny/appletbdrm.c -@@ -0,0 +1,624 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Apple Touch Bar DRM Driver -+ * -+ * Copyright (c) 2023 Kerem Karabay -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define _APPLETBDRM_FOURCC(s) (((s)[0] << 24) | ((s)[1] << 16) | ((s)[2] << 8) | (s)[3]) -+#define APPLETBDRM_FOURCC(s) _APPLETBDRM_FOURCC(#s) -+ -+#define APPLETBDRM_PIXEL_FORMAT APPLETBDRM_FOURCC(RGBA) /* The actual format is BGR888 */ -+#define APPLETBDRM_BITS_PER_PIXEL 24 -+ -+#define APPLETBDRM_MSG_CLEAR_DISPLAY APPLETBDRM_FOURCC(CLRD) -+#define APPLETBDRM_MSG_GET_INFORMATION APPLETBDRM_FOURCC(GINF) -+#define APPLETBDRM_MSG_UPDATE_COMPLETE APPLETBDRM_FOURCC(UDCL) -+#define APPLETBDRM_MSG_SIGNAL_READINESS APPLETBDRM_FOURCC(REDY) -+ -+#define APPLETBDRM_BULK_MSG_TIMEOUT 1000 -+ -+#define drm_to_adev(_drm) container_of(_drm, struct appletbdrm_device, drm) -+#define adev_to_udev(adev) interface_to_usbdev(to_usb_interface(adev->dev)) -+ -+struct appletbdrm_device { -+ struct device *dev; -+ -+ u8 in_ep; -+ u8 out_ep; -+ -+ u32 width; -+ u32 height; -+ -+ struct drm_device drm; -+ struct drm_display_mode mode; -+ struct drm_connector connector; -+ struct drm_simple_display_pipe pipe; -+ -+ bool readiness_signal_received; -+}; -+ -+struct appletbdrm_request_header { -+ __le16 unk_00; -+ __le16 unk_02; -+ __le32 unk_04; -+ __le32 unk_08; -+ __le32 size; -+} __packed; -+ -+struct appletbdrm_response_header { -+ u8 unk_00[16]; -+ u32 msg; -+} __packed; -+ -+struct appletbdrm_simple_request { -+ struct appletbdrm_request_header header; -+ u32 msg; -+ u8 unk_14[8]; -+ __le32 size; -+} __packed; -+ -+struct appletbdrm_information { -+ struct appletbdrm_response_header header; -+ u8 unk_14[12]; -+ __le32 width; -+ __le32 height; -+ u8 bits_per_pixel; -+ __le32 bytes_per_row; -+ __le32 orientation; -+ __le32 bitmap_info; -+ u32 pixel_format; -+ __le32 width_inches; /* floating point */ -+ __le32 height_inches; /* floating point */ -+} __packed; -+ -+struct appletbdrm_frame { -+ __le16 begin_x; -+ __le16 begin_y; -+ __le16 width; -+ __le16 height; -+ __le32 buf_size; -+ u8 buf[]; -+} __packed; -+ -+struct appletbdrm_fb_request_footer { -+ u8 unk_00[12]; -+ __le32 unk_0c; -+ u8 unk_10[12]; -+ __le32 unk_1c; -+ __le64 timestamp; -+ u8 unk_28[12]; -+ __le32 unk_34; -+ u8 unk_38[20]; -+ __le32 unk_4c; -+} __packed; -+ -+struct appletbdrm_fb_request { -+ struct appletbdrm_request_header header; -+ __le16 unk_10; -+ u8 msg_id; -+ u8 unk_13[29]; -+ /* -+ * Contents of `data`: -+ * - struct appletbdrm_frame frames[]; -+ * - struct appletbdrm_fb_request_footer footer; -+ * - padding to make the total size a multiple of 16 -+ */ -+ u8 data[]; -+} __packed; -+ -+struct appletbdrm_fb_request_response { -+ struct appletbdrm_response_header header; -+ u8 unk_14[12]; -+ __le64 timestamp; -+} __packed; -+ -+static int appletbdrm_send_request(struct appletbdrm_device *adev, -+ struct appletbdrm_request_header *request, size_t size) -+{ -+ struct usb_device *udev = adev_to_udev(adev); -+ struct drm_device *drm = &adev->drm; -+ int ret, actual_size; -+ -+ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, adev->out_ep), -+ request, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT); -+ if (ret) { -+ drm_err(drm, "Failed to send message (%pe)\n", ERR_PTR(ret)); -+ return ret; -+ } -+ -+ if (actual_size != size) { -+ drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n", -+ actual_size, size); -+ return -EIO; -+ } -+ -+ return ret; -+} -+ -+static int appletbdrm_read_response(struct appletbdrm_device *adev, -+ struct appletbdrm_response_header *response, -+ size_t size, u32 expected_response) -+{ -+ struct usb_device *udev = adev_to_udev(adev); -+ struct drm_device *drm = &adev->drm; -+ int ret, actual_size; -+ -+retry: -+ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, adev->in_ep), -+ response, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT); -+ if (ret) { -+ drm_err(drm, "Failed to read response (%pe)\n", ERR_PTR(ret)); -+ return ret; -+ } -+ -+ /* -+ * The device responds to the first request sent in a particular -+ * timeframe after the USB device configuration is set with a readiness -+ * signal, in which case the response should be read again -+ */ -+ if (response->msg == APPLETBDRM_MSG_SIGNAL_READINESS) { -+ if (!adev->readiness_signal_received) { -+ adev->readiness_signal_received = true; -+ goto retry; -+ } -+ -+ drm_err(drm, "Encountered unexpected readiness signal\n"); -+ return -EIO; -+ } -+ -+ if (actual_size != size) { -+ drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n", -+ actual_size, size); -+ return -EIO; -+ } -+ -+ if (response->msg != expected_response) { -+ drm_err(drm, "Unexpected response from device (expected %p4ch found %p4ch)\n", -+ &expected_response, &response->msg); -+ return -EIO; -+ } -+ -+ return 0; -+} -+ -+static int appletbdrm_send_msg(struct appletbdrm_device *adev, u32 msg) -+{ -+ struct appletbdrm_simple_request *request; -+ int ret; -+ -+ request = kzalloc(sizeof(*request), GFP_KERNEL); -+ if (!request) -+ return -ENOMEM; -+ -+ request->header.unk_00 = cpu_to_le16(2); -+ request->header.unk_02 = cpu_to_le16(0x1512); -+ request->header.size = cpu_to_le32(sizeof(*request) - sizeof(request->header)); -+ request->msg = msg; -+ request->size = request->header.size; -+ -+ ret = appletbdrm_send_request(adev, &request->header, sizeof(*request)); -+ -+ kfree(request); -+ -+ return ret; -+} -+ -+static int appletbdrm_clear_display(struct appletbdrm_device *adev) -+{ -+ return appletbdrm_send_msg(adev, APPLETBDRM_MSG_CLEAR_DISPLAY); -+} -+ -+static int appletbdrm_signal_readiness(struct appletbdrm_device *adev) -+{ -+ return appletbdrm_send_msg(adev, APPLETBDRM_MSG_SIGNAL_READINESS); -+} -+ -+static int appletbdrm_get_information(struct appletbdrm_device *adev) -+{ -+ struct appletbdrm_information *info; -+ struct drm_device *drm = &adev->drm; -+ u8 bits_per_pixel; -+ u32 pixel_format; -+ int ret; -+ -+ info = kzalloc(sizeof(*info), GFP_KERNEL); -+ if (!info) -+ return -ENOMEM; -+ -+ ret = appletbdrm_send_msg(adev, APPLETBDRM_MSG_GET_INFORMATION); -+ if (ret) -+ return ret; -+ -+ ret = appletbdrm_read_response(adev, &info->header, sizeof(*info), -+ APPLETBDRM_MSG_GET_INFORMATION); -+ if (ret) -+ goto free_info; -+ -+ bits_per_pixel = info->bits_per_pixel; -+ pixel_format = get_unaligned(&info->pixel_format); -+ -+ adev->width = get_unaligned_le32(&info->width); -+ adev->height = get_unaligned_le32(&info->height); -+ -+ if (bits_per_pixel != APPLETBDRM_BITS_PER_PIXEL) { -+ drm_err(drm, "Encountered unexpected bits per pixel value (%d)\n", bits_per_pixel); -+ ret = -EINVAL; -+ goto free_info; -+ } -+ -+ if (pixel_format != APPLETBDRM_PIXEL_FORMAT) { -+ drm_err(drm, "Encountered unknown pixel format (%p4ch)\n", &pixel_format); -+ ret = -EINVAL; -+ goto free_info; -+ } -+ -+free_info: -+ kfree(info); -+ -+ return ret; -+} -+ -+static u32 rect_size(struct drm_rect *rect) -+{ -+ return drm_rect_width(rect) * drm_rect_height(rect) * (APPLETBDRM_BITS_PER_PIXEL / 8); -+} -+ -+static int appletbdrm_flush_damage(struct appletbdrm_device *adev, -+ struct drm_plane_state *old_state, -+ struct drm_plane_state *state) -+{ -+ struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); -+ struct appletbdrm_fb_request_response *response; -+ struct appletbdrm_fb_request_footer *footer; -+ struct drm_atomic_helper_damage_iter iter; -+ struct drm_framebuffer *fb = state->fb; -+ struct appletbdrm_fb_request *request; -+ struct drm_device *drm = &adev->drm; -+ struct appletbdrm_frame *frame; -+ u64 timestamp = ktime_get_ns(); -+ struct drm_rect damage; -+ size_t frames_size = 0; -+ size_t request_size; -+ int ret; -+ -+ drm_atomic_helper_damage_iter_init(&iter, old_state, state); -+ drm_atomic_for_each_plane_damage(&iter, &damage) { -+ frames_size += struct_size(frame, buf, rect_size(&damage)); -+ } -+ -+ if (!frames_size) -+ return 0; -+ -+ request_size = ALIGN(sizeof(*request) + frames_size + sizeof(*footer), 16); -+ -+ request = kzalloc(request_size, GFP_KERNEL); -+ if (!request) -+ return -ENOMEM; -+ -+ response = kzalloc(sizeof(*response), GFP_KERNEL); -+ if (!response) { -+ ret = -ENOMEM; -+ goto free_request; -+ } -+ -+ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); -+ if (ret) { -+ drm_err(drm, "Failed to start CPU framebuffer access (%pe)\n", ERR_PTR(ret)); -+ goto free_response; -+ } -+ -+ request->header.unk_00 = cpu_to_le16(2); -+ request->header.unk_02 = cpu_to_le16(0x12); -+ request->header.unk_04 = cpu_to_le32(9); -+ request->header.size = cpu_to_le32(request_size - sizeof(request->header)); -+ request->unk_10 = cpu_to_le16(1); -+ request->msg_id = timestamp & 0xff; -+ -+ frame = (struct appletbdrm_frame *)request->data; -+ -+ drm_atomic_helper_damage_iter_init(&iter, old_state, state); -+ drm_atomic_for_each_plane_damage(&iter, &damage) { -+ struct iosys_map dst = IOSYS_MAP_INIT_VADDR(frame->buf); -+ u32 buf_size = rect_size(&damage); -+ -+ /* -+ * The coordinates need to be translated to the coordinate -+ * system the device expects, see the comment in -+ * appletbdrm_setup_mode_config -+ */ -+ frame->begin_x = cpu_to_le16(damage.y1); -+ frame->begin_y = cpu_to_le16(adev->height - damage.x2); -+ frame->width = cpu_to_le16(drm_rect_height(&damage)); -+ frame->height = cpu_to_le16(drm_rect_width(&damage)); -+ frame->buf_size = cpu_to_le32(buf_size); -+ -+ ret = drm_fb_blit(&dst, NULL, DRM_FORMAT_BGR888, -+ &shadow_plane_state->data[0], fb, &damage, &shadow_plane_state->fmtcnv_state); -+ if (ret) { -+ drm_err(drm, "Failed to copy damage clip (%pe)\n", ERR_PTR(ret)); -+ goto end_fb_cpu_access; -+ } -+ -+ frame = (void *)frame + struct_size(frame, buf, buf_size); -+ } -+ -+ footer = (struct appletbdrm_fb_request_footer *)&request->data[frames_size]; -+ -+ footer->unk_0c = cpu_to_le32(0xfffe); -+ footer->unk_1c = cpu_to_le32(0x80001); -+ footer->unk_34 = cpu_to_le32(0x80002); -+ footer->unk_4c = cpu_to_le32(0xffff); -+ footer->timestamp = cpu_to_le64(timestamp); -+ -+ ret = appletbdrm_send_request(adev, &request->header, request_size); -+ if (ret) -+ goto end_fb_cpu_access; -+ -+ ret = appletbdrm_read_response(adev, &response->header, sizeof(*response), -+ APPLETBDRM_MSG_UPDATE_COMPLETE); -+ if (ret) -+ goto end_fb_cpu_access; -+ -+ if (response->timestamp != footer->timestamp) { -+ drm_err(drm, "Response timestamp (%llu) doesn't match request timestamp (%llu)\n", -+ le64_to_cpu(response->timestamp), timestamp); -+ goto end_fb_cpu_access; -+ } -+ -+end_fb_cpu_access: -+ drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); -+free_response: -+ kfree(response); -+free_request: -+ kfree(request); -+ -+ return ret; -+} -+ -+static int appletbdrm_connector_helper_get_modes(struct drm_connector *connector) -+{ -+ struct appletbdrm_device *adev = drm_to_adev(connector->dev); -+ -+ return drm_connector_helper_get_modes_fixed(connector, &adev->mode); -+} -+ -+static enum drm_mode_status appletbdrm_pipe_mode_valid(struct drm_simple_display_pipe *pipe, -+ const struct drm_display_mode *mode) -+{ -+ struct drm_crtc *crtc = &pipe->crtc; -+ struct appletbdrm_device *adev = drm_to_adev(crtc->dev); -+ -+ return drm_crtc_helper_mode_valid_fixed(crtc, mode, &adev->mode); -+} -+ -+static void appletbdrm_pipe_disable(struct drm_simple_display_pipe *pipe) -+{ -+ struct appletbdrm_device *adev = drm_to_adev(pipe->crtc.dev); -+ int idx; -+ -+ if (!drm_dev_enter(&adev->drm, &idx)) -+ return; -+ -+ appletbdrm_clear_display(adev); -+ -+ drm_dev_exit(idx); -+} -+ -+static void appletbdrm_pipe_update(struct drm_simple_display_pipe *pipe, -+ struct drm_plane_state *old_state) -+{ -+ struct drm_crtc *crtc = &pipe->crtc; -+ struct appletbdrm_device *adev = drm_to_adev(crtc->dev); -+ int idx; -+ -+ if (!crtc->state->active || !drm_dev_enter(&adev->drm, &idx)) -+ return; -+ -+ appletbdrm_flush_damage(adev, old_state, pipe->plane.state); -+ -+ drm_dev_exit(idx); -+} -+ -+static const u32 appletbdrm_formats[] = { -+ DRM_FORMAT_BGR888, -+ DRM_FORMAT_XRGB8888, /* emulated */ -+}; -+ -+static const struct drm_mode_config_funcs appletbdrm_mode_config_funcs = { -+ .fb_create = drm_gem_fb_create_with_dirty, -+ .atomic_check = drm_atomic_helper_check, -+ .atomic_commit = drm_atomic_helper_commit, -+}; -+ -+static const struct drm_connector_funcs appletbdrm_connector_funcs = { -+ .reset = drm_atomic_helper_connector_reset, -+ .destroy = drm_connector_cleanup, -+ .fill_modes = drm_helper_probe_single_connector_modes, -+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, -+}; -+ -+static const struct drm_connector_helper_funcs appletbdrm_connector_helper_funcs = { -+ .get_modes = appletbdrm_connector_helper_get_modes, -+}; -+ -+static const struct drm_simple_display_pipe_funcs appletbdrm_pipe_funcs = { -+ DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, -+ .update = appletbdrm_pipe_update, -+ .disable = appletbdrm_pipe_disable, -+ .mode_valid = appletbdrm_pipe_mode_valid, -+}; -+ -+DEFINE_DRM_GEM_FOPS(appletbdrm_drm_fops); -+ -+static const struct drm_driver appletbdrm_drm_driver = { -+ DRM_GEM_SHMEM_DRIVER_OPS, -+ .name = "appletbdrm", -+ .desc = "Apple Touch Bar DRM Driver", -+ .date = "20230910", -+ .major = 1, -+ .minor = 0, -+ .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, -+ .fops = &appletbdrm_drm_fops, -+}; -+ -+static int appletbdrm_setup_mode_config(struct appletbdrm_device *adev) -+{ -+ struct drm_connector *connector = &adev->connector; -+ struct drm_device *drm = &adev->drm; -+ struct device *dev = adev->dev; -+ int ret; -+ -+ ret = drmm_mode_config_init(drm); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to initialize mode configuration\n"); -+ -+ /* -+ * The coordinate system used by the device is different from the -+ * coordinate system of the framebuffer in that the x and y axes are -+ * swapped, and that the y axis is inverted; so what the device reports -+ * as the height is actually the width of the framebuffer and vice -+ * versa -+ */ -+ drm->mode_config.min_width = 0; -+ drm->mode_config.min_height = 0; -+ drm->mode_config.max_width = max(adev->height, DRM_SHADOW_PLANE_MAX_WIDTH); -+ drm->mode_config.max_height = max(adev->width, DRM_SHADOW_PLANE_MAX_HEIGHT); -+ drm->mode_config.preferred_depth = APPLETBDRM_BITS_PER_PIXEL; -+ drm->mode_config.funcs = &appletbdrm_mode_config_funcs; -+ -+ adev->mode = (struct drm_display_mode) { -+ DRM_MODE_INIT(60, adev->height, adev->width, -+ DRM_MODE_RES_MM(adev->height, 218), -+ DRM_MODE_RES_MM(adev->width, 218)) -+ }; -+ -+ ret = drm_connector_init(drm, connector, -+ &appletbdrm_connector_funcs, DRM_MODE_CONNECTOR_USB); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to initialize connector\n"); -+ -+ drm_connector_helper_add(connector, &appletbdrm_connector_helper_funcs); -+ -+ ret = drm_connector_set_panel_orientation(connector, -+ DRM_MODE_PANEL_ORIENTATION_RIGHT_UP); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to set panel orientation\n"); -+ -+ connector->display_info.non_desktop = true; -+ ret = drm_object_property_set_value(&connector->base, -+ drm->mode_config.non_desktop_property, true); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to set non-desktop property\n"); -+ -+ ret = drm_simple_display_pipe_init(drm, &adev->pipe, &appletbdrm_pipe_funcs, -+ appletbdrm_formats, ARRAY_SIZE(appletbdrm_formats), -+ NULL, &adev->connector); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to initialize simple display pipe\n"); -+ -+ drm_plane_enable_fb_damage_clips(&adev->pipe.plane); -+ -+ drm_mode_config_reset(drm); -+ -+ ret = drm_dev_register(drm, 0); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to register DRM device\n"); -+ -+ return 0; -+} -+ -+static int appletbdrm_probe(struct usb_interface *intf, -+ const struct usb_device_id *id) -+{ -+ struct usb_endpoint_descriptor *bulk_in, *bulk_out; -+ struct device *dev = &intf->dev; -+ struct appletbdrm_device *adev; -+ int ret; -+ -+ ret = usb_find_common_endpoints(intf->cur_altsetting, &bulk_in, &bulk_out, NULL, NULL); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to find bulk endpoints\n"); -+ -+ adev = devm_drm_dev_alloc(dev, &appletbdrm_drm_driver, struct appletbdrm_device, drm); -+ if (IS_ERR(adev)) -+ return PTR_ERR(adev); -+ -+ adev->dev = dev; -+ adev->in_ep = bulk_in->bEndpointAddress; -+ adev->out_ep = bulk_out->bEndpointAddress; -+ -+ usb_set_intfdata(intf, adev); -+ -+ ret = appletbdrm_get_information(adev); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to get display information\n"); -+ -+ ret = appletbdrm_signal_readiness(adev); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to signal readiness\n"); -+ -+ ret = appletbdrm_clear_display(adev); -+ if (ret) -+ return dev_err_probe(dev, ret, "Failed to clear display\n"); -+ -+ return appletbdrm_setup_mode_config(adev); -+} -+ -+static void appletbdrm_disconnect(struct usb_interface *intf) -+{ -+ struct appletbdrm_device *adev = usb_get_intfdata(intf); -+ struct drm_device *drm = &adev->drm; -+ -+ drm_dev_unplug(drm); -+ drm_atomic_helper_shutdown(drm); -+} -+ -+static void appletbdrm_shutdown(struct usb_interface *intf) -+{ -+ struct appletbdrm_device *adev = usb_get_intfdata(intf); -+ -+ /* -+ * The framebuffer needs to be cleared on shutdown since its content -+ * persists across boots -+ */ -+ drm_atomic_helper_shutdown(&adev->drm); -+} -+ -+static const struct usb_device_id appletbdrm_usb_id_table[] = { -+ { USB_DEVICE_INTERFACE_CLASS(0x05ac, 0x8302, USB_CLASS_AUDIO_VIDEO) }, -+ {} -+}; -+MODULE_DEVICE_TABLE(usb, appletbdrm_usb_id_table); -+ -+static struct usb_driver appletbdrm_usb_driver = { -+ .name = "appletbdrm", -+ .probe = appletbdrm_probe, -+ .disconnect = appletbdrm_disconnect, -+ .shutdown = appletbdrm_shutdown, -+ .id_table = appletbdrm_usb_id_table, -+}; -+module_usb_driver(appletbdrm_usb_driver); -+ -+MODULE_AUTHOR("Kerem Karabay "); -+MODULE_DESCRIPTION("Apple Touch Bar DRM Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c -index 365e6ddbe90f..cf357cd3389d 100644 ---- a/drivers/gpu/vga/vga_switcheroo.c -+++ b/drivers/gpu/vga/vga_switcheroo.c -@@ -438,12 +438,7 @@ find_active_client(struct list_head *head) - bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev) - { - if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { -- /* -- * apple-gmux is needed on pre-retina MacBook Pro -- * to probe the panel if pdev is the inactive GPU. -- */ -- if (apple_gmux_present() && pdev != vga_default_device() && -- !vgasr_priv.handler_flags) -+ if (apple_gmux_present() && !vgasr_priv.handler_flags) - return true; - } - -diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 08446c89eff6..f26e36dffe51 100644 ---- a/drivers/hid/Kconfig -+++ b/drivers/hid/Kconfig -@@ -148,6 +148,40 @@ config HID_APPLEIR - - Say Y here if you want support for Apple infrared remote control. - -+config HID_APPLETB_BL -+ tristate "Apple Touch Bar Backlight" -+ depends on BACKLIGHT_CLASS_DEVICE -+ help -+ Say Y here if you want support for the backlight of Touch Bars on x86 -+ MacBook Pros. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called hid-appletb-bl. -+ -+config HID_APPLETB_KBD -+ tristate "Apple Touch Bar Keyboard Mode" -+ depends on USB_HID -+ help -+ Say Y here if you want support for the keyboard mode (escape, -+ function, media and brightness keys) of Touch Bars on x86 MacBook -+ Pros. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called hid-appletb-kbd. -+ -+config HID_APPLE_MAGIC_BACKLIGHT -+ tristate "Apple Magic Keyboard Backlight" -+ depends on USB_HID -+ depends on LEDS_CLASS -+ depends on NEW_LEDS -+ help -+ Say Y here if you want support for the keyboard backlight on Macs with -+ the magic keyboard (MacBookPro16,x and MacBookAir9,1). Note that this -+ driver is not for external magic keyboards. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called hid-apple-magic-backlight. -+ - config HID_ASUS - tristate "Asus" - depends on USB_HID -@@ -723,6 +757,7 @@ config HID_MULTITOUCH - Say Y here if you have one of the following devices: - - 3M PCT touch screens - - ActionStar dual touch panels -+ - Touch Bars on x86 MacBook Pros - - Atmel panels - - Cando dual touch panels - - Chunghwa panels -diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index ce71b53ea6c5..685b7c8416a8 100644 ---- a/drivers/hid/Makefile -+++ b/drivers/hid/Makefile -@@ -29,6 +29,9 @@ obj-$(CONFIG_HID_ALPS) += hid-alps.o - obj-$(CONFIG_HID_ACRUX) += hid-axff.o - obj-$(CONFIG_HID_APPLE) += hid-apple.o - obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o -+obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o -+obj-$(CONFIG_HID_APPLETB_KBD) += hid-appletb-kbd.o -+obj-$(CONFIG_HID_APPLE_MAGIC_BACKLIGHT) += hid-apple-magic-backlight.o - obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o - obj-$(CONFIG_HID_ASUS) += hid-asus.o - obj-$(CONFIG_HID_AUREAL) += hid-aureal.o -diff --git a/drivers/hid/hid-apple-magic-backlight.c b/drivers/hid/hid-apple-magic-backlight.c -new file mode 100644 -index 000000000000..f0fc02ff3b2d ---- /dev/null -+++ b/drivers/hid/hid-apple-magic-backlight.c -@@ -0,0 +1,120 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Apple Magic Keyboard Backlight Driver -+ * -+ * For Intel Macs with internal Magic Keyboard (MacBookPro16,1-4 and MacBookAir9,1) -+ * -+ * Copyright (c) 2022 Kerem Karabay -+ * Copyright (c) 2023 Orlando Chamberlain -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "hid-ids.h" -+ -+#define HID_USAGE_MAGIC_BL 0xff00000f -+ -+#define APPLE_MAGIC_REPORT_ID_POWER 3 -+#define APPLE_MAGIC_REPORT_ID_BRIGHTNESS 1 -+ -+struct apple_magic_backlight { -+ struct led_classdev cdev; -+ struct hid_report *brightness; -+ struct hid_report *power; -+}; -+ -+static void apple_magic_backlight_report_set(struct hid_report *rep, s32 value, u8 rate) -+{ -+ rep->field[0]->value[0] = value; -+ rep->field[1]->value[0] = 0x5e; /* Mimic Windows */ -+ rep->field[1]->value[0] |= rate << 8; -+ -+ hid_hw_request(rep->device, rep, HID_REQ_SET_REPORT); -+} -+ -+static void apple_magic_backlight_set(struct apple_magic_backlight *backlight, -+ int brightness, char rate) -+{ -+ apple_magic_backlight_report_set(backlight->power, brightness ? 1 : 0, rate); -+ if (brightness) -+ apple_magic_backlight_report_set(backlight->brightness, brightness, rate); -+} -+ -+static int apple_magic_backlight_led_set(struct led_classdev *led_cdev, -+ enum led_brightness brightness) -+{ -+ struct apple_magic_backlight *backlight = container_of(led_cdev, -+ struct apple_magic_backlight, cdev); -+ -+ apple_magic_backlight_set(backlight, brightness, 1); -+ return 0; -+} -+ -+static int apple_magic_backlight_probe(struct hid_device *hdev, -+ const struct hid_device_id *id) -+{ -+ struct apple_magic_backlight *backlight; -+ int rc; -+ -+ rc = hid_parse(hdev); -+ if (rc) -+ return rc; -+ -+ /* -+ * Ensure this usb endpoint is for the keyboard backlight, not touchbar -+ * backlight. -+ */ -+ if (hdev->collection[0].usage != HID_USAGE_MAGIC_BL) -+ return -ENODEV; -+ -+ backlight = devm_kzalloc(&hdev->dev, sizeof(*backlight), GFP_KERNEL); -+ if (!backlight) -+ return -ENOMEM; -+ -+ rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT); -+ if (rc) -+ return rc; -+ -+ backlight->brightness = hid_register_report(hdev, HID_FEATURE_REPORT, -+ APPLE_MAGIC_REPORT_ID_BRIGHTNESS, 0); -+ backlight->power = hid_register_report(hdev, HID_FEATURE_REPORT, -+ APPLE_MAGIC_REPORT_ID_POWER, 0); -+ -+ if (!backlight->brightness || !backlight->power) { -+ rc = -ENODEV; -+ goto hw_stop; -+ } -+ -+ backlight->cdev.name = ":white:" LED_FUNCTION_KBD_BACKLIGHT; -+ backlight->cdev.max_brightness = backlight->brightness->field[0]->logical_maximum; -+ backlight->cdev.brightness_set_blocking = apple_magic_backlight_led_set; -+ -+ apple_magic_backlight_set(backlight, 0, 0); -+ -+ return devm_led_classdev_register(&hdev->dev, &backlight->cdev); -+ -+hw_stop: -+ hid_hw_stop(hdev); -+ return rc; -+} -+ -+static const struct hid_device_id apple_magic_backlight_hid_ids[] = { -+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, -+ { } -+}; -+MODULE_DEVICE_TABLE(hid, apple_magic_backlight_hid_ids); -+ -+static struct hid_driver apple_magic_backlight_hid_driver = { -+ .name = "hid-apple-magic-backlight", -+ .id_table = apple_magic_backlight_hid_ids, -+ .probe = apple_magic_backlight_probe, -+}; -+module_hid_driver(apple_magic_backlight_hid_driver); -+ -+MODULE_DESCRIPTION("MacBook Magic Keyboard Backlight"); -+MODULE_AUTHOR("Orlando Chamberlain "); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/hid/hid-appletb-bl.c b/drivers/hid/hid-appletb-bl.c -new file mode 100644 -index 000000000000..0c5e4b776851 ---- /dev/null -+++ b/drivers/hid/hid-appletb-bl.c -@@ -0,0 +1,193 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Apple Touch Bar Backlight Driver -+ * -+ * Copyright (c) 2017-2018 Ronald Tschalär -+ * Copyright (c) 2022-2023 Kerem Karabay -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+ -+#include "hid-ids.h" -+ -+#define APPLETB_BL_ON 1 -+#define APPLETB_BL_DIM 3 -+#define APPLETB_BL_OFF 4 -+ -+#define HID_UP_APPLEVENDOR_TB_BL 0xff120000 -+ -+#define HID_VD_APPLE_TB_BRIGHTNESS 0xff120001 -+#define HID_USAGE_AUX1 0xff120020 -+#define HID_USAGE_BRIGHTNESS 0xff120021 -+ -+struct appletb_bl { -+ struct hid_field *aux1_field, *brightness_field; -+ struct backlight_device *bdev; -+ -+ bool full_on; -+}; -+ -+const u8 appletb_bl_brightness_map[] = { -+ APPLETB_BL_OFF, -+ APPLETB_BL_DIM, -+ APPLETB_BL_ON -+}; -+ -+static int appletb_bl_set_brightness(struct appletb_bl *bl, u8 brightness) -+{ -+ struct hid_report *report = bl->brightness_field->report; -+ struct hid_device *hdev = report->device; -+ int ret; -+ -+ ret = hid_set_field(bl->aux1_field, 0, 1); -+ if (ret) { -+ hid_err(hdev, "Failed to set auxiliary field (%pe)\n", ERR_PTR(ret)); -+ return ret; -+ } -+ -+ ret = hid_set_field(bl->brightness_field, 0, brightness); -+ if (ret) { -+ hid_err(hdev, "Failed to set brightness field (%pe)\n", ERR_PTR(ret)); -+ return ret; -+ } -+ -+ if (!bl->full_on) { -+ ret = hid_hw_power(hdev, PM_HINT_FULLON); -+ if (ret < 0) { -+ hid_err(hdev, "Device didn't power on (%pe)\n", ERR_PTR(ret)); -+ return ret; -+ } -+ -+ bl->full_on = true; -+ } -+ -+ hid_hw_request(hdev, report, HID_REQ_SET_REPORT); -+ -+ if (brightness == APPLETB_BL_OFF) { -+ hid_hw_power(hdev, PM_HINT_NORMAL); -+ bl->full_on = false; -+ } -+ -+ return 0; -+} -+ -+static int appletb_bl_update_status(struct backlight_device *bdev) -+{ -+ struct appletb_bl *bl = bl_get_data(bdev); -+ u16 brightness; -+ -+ if (bdev->props.state & BL_CORE_SUSPENDED) -+ brightness = 0; -+ else -+ brightness = backlight_get_brightness(bdev); -+ -+ return appletb_bl_set_brightness(bl, appletb_bl_brightness_map[brightness]); -+} -+ -+static const struct backlight_ops appletb_bl_backlight_ops = { -+ .options = BL_CORE_SUSPENDRESUME, -+ .update_status = appletb_bl_update_status, -+}; -+ -+static int appletb_bl_probe(struct hid_device *hdev, const struct hid_device_id *id) -+{ -+ struct hid_field *aux1_field, *brightness_field; -+ struct backlight_properties bl_props = { 0 }; -+ struct device *dev = &hdev->dev; -+ struct appletb_bl *bl; -+ int ret; -+ -+ ret = hid_parse(hdev); -+ if (ret) -+ return dev_err_probe(dev, ret, "HID parse failed\n"); -+ -+ aux1_field = hid_find_field(hdev, HID_FEATURE_REPORT, -+ HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_AUX1); -+ -+ brightness_field = hid_find_field(hdev, HID_FEATURE_REPORT, -+ HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_BRIGHTNESS); -+ -+ if (!aux1_field || !brightness_field) -+ return -ENODEV; -+ -+ if (aux1_field->report != brightness_field->report) -+ return dev_err_probe(dev, -ENODEV, "Encountered unexpected report structure\n"); -+ -+ bl = devm_kzalloc(dev, sizeof(*bl), GFP_KERNEL); -+ if (!bl) -+ return -ENOMEM; -+ -+ ret = hid_hw_start(hdev, HID_CONNECT_DRIVER); -+ if (ret) -+ return dev_err_probe(dev, ret, "HID hardware start failed\n"); -+ -+ ret = hid_hw_open(hdev); -+ if (ret) { -+ dev_err_probe(dev, ret, "HID hardware open failed\n"); -+ goto stop_hw; -+ } -+ -+ bl->aux1_field = aux1_field; -+ bl->brightness_field = brightness_field; -+ -+ ret = appletb_bl_set_brightness(bl, APPLETB_BL_OFF); -+ if (ret) { -+ dev_err_probe(dev, ret, "Failed to set touch bar brightness to off\n"); -+ goto close_hw; -+ } -+ -+ bl_props.type = BACKLIGHT_RAW; -+ bl_props.max_brightness = ARRAY_SIZE(appletb_bl_brightness_map) - 1; -+ -+ bl->bdev = devm_backlight_device_register(dev, "appletb_backlight", dev, bl, -+ &appletb_bl_backlight_ops, &bl_props); -+ if (IS_ERR(bl->bdev)) { -+ ret = PTR_ERR(bl->bdev); -+ dev_err_probe(dev, ret, "Failed to register backlight device\n"); -+ goto close_hw; -+ } -+ -+ hid_set_drvdata(hdev, bl); -+ -+ return 0; -+ -+close_hw: -+ hid_hw_close(hdev); -+stop_hw: -+ hid_hw_stop(hdev); -+ -+ return ret; -+} -+ -+static void appletb_bl_remove(struct hid_device *hdev) -+{ -+ struct appletb_bl *bl = hid_get_drvdata(hdev); -+ -+ appletb_bl_set_brightness(bl, APPLETB_BL_OFF); -+ -+ hid_hw_close(hdev); -+ hid_hw_stop(hdev); -+} -+ -+static const struct hid_device_id appletb_bl_hid_ids[] = { -+ /* MacBook Pro's 2018, 2019, with T2 chip: iBridge DFR Brightness */ -+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, -+ { } -+}; -+MODULE_DEVICE_TABLE(hid, appletb_bl_hid_ids); -+ -+static struct hid_driver appletb_bl_hid_driver = { -+ .name = "hid-appletb-bl", -+ .id_table = appletb_bl_hid_ids, -+ .probe = appletb_bl_probe, -+ .remove = appletb_bl_remove, -+}; -+module_hid_driver(appletb_bl_hid_driver); -+ -+MODULE_AUTHOR("Ronald Tschalär"); -+MODULE_AUTHOR("Kerem Karabay "); -+MODULE_DESCRIPTION("MacBookPro Touch Bar Backlight Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c -new file mode 100644 -index 000000000000..bc004c40805f ---- /dev/null -+++ b/drivers/hid/hid-appletb-kbd.c -@@ -0,0 +1,289 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Apple Touch Bar Keyboard Mode Driver -+ * -+ * Copyright (c) 2017-2018 Ronald Tschalär -+ * Copyright (c) 2022-2023 Kerem Karabay -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "hid-ids.h" -+ -+#define APPLETB_KBD_MODE_ESC 0 -+#define APPLETB_KBD_MODE_FN 1 -+#define APPLETB_KBD_MODE_SPCL 2 -+#define APPLETB_KBD_MODE_OFF 3 -+#define APPLETB_KBD_MODE_MAX APPLETB_KBD_MODE_OFF -+ -+#define HID_USAGE_MODE 0x00ff0004 -+ -+struct appletb_kbd { -+ struct hid_field *mode_field; -+ -+ u8 saved_mode; -+ u8 current_mode; -+}; -+ -+static const struct key_entry appletb_kbd_keymap[] = { -+ { KE_KEY, KEY_ESC, { KEY_ESC } }, -+ { KE_KEY, KEY_F1, { KEY_BRIGHTNESSDOWN } }, -+ { KE_KEY, KEY_F2, { KEY_BRIGHTNESSUP } }, -+ { KE_KEY, KEY_F3, { KEY_RESERVED } }, -+ { KE_KEY, KEY_F4, { KEY_RESERVED } }, -+ { KE_KEY, KEY_F5, { KEY_KBDILLUMDOWN } }, -+ { KE_KEY, KEY_F6, { KEY_KBDILLUMUP } }, -+ { KE_KEY, KEY_F7, { KEY_PREVIOUSSONG } }, -+ { KE_KEY, KEY_F8, { KEY_PLAYPAUSE } }, -+ { KE_KEY, KEY_F9, { KEY_NEXTSONG } }, -+ { KE_KEY, KEY_F10, { KEY_MUTE } }, -+ { KE_KEY, KEY_F11, { KEY_VOLUMEDOWN } }, -+ { KE_KEY, KEY_F12, { KEY_VOLUMEUP } }, -+ { KE_END, 0 } -+}; -+ -+static int appletb_kbd_set_mode(struct appletb_kbd *kbd, u8 mode) -+{ -+ struct hid_report *report = kbd->mode_field->report; -+ struct hid_device *hdev = report->device; -+ int ret; -+ -+ ret = hid_hw_power(hdev, PM_HINT_FULLON); -+ if (ret) { -+ hid_err(hdev, "Device didn't resume (%pe)\n", ERR_PTR(ret)); -+ return ret; -+ } -+ -+ ret = hid_set_field(kbd->mode_field, 0, mode); -+ if (ret) { -+ hid_err(hdev, "Failed to set mode field to %u (%pe)\n", mode, ERR_PTR(ret)); -+ goto power_normal; -+ } -+ -+ hid_hw_request(hdev, report, HID_REQ_SET_REPORT); -+ -+ kbd->current_mode = mode; -+ -+power_normal: -+ hid_hw_power(hdev, PM_HINT_NORMAL); -+ -+ return ret; -+} -+ -+static ssize_t mode_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct appletb_kbd *kbd = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%d\n", kbd->current_mode); -+} -+ -+static ssize_t mode_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t size) -+{ -+ struct appletb_kbd *kbd = dev_get_drvdata(dev); -+ u8 mode; -+ int ret; -+ -+ ret = kstrtou8(buf, 0, &mode); -+ if (ret) -+ return ret; -+ -+ if (mode > APPLETB_KBD_MODE_MAX) -+ return -EINVAL; -+ -+ ret = appletb_kbd_set_mode(kbd, mode); -+ -+ return ret < 0 ? ret : size; -+} -+static DEVICE_ATTR_RW(mode); -+ -+struct attribute *appletb_kbd_attrs[] = { -+ &dev_attr_mode.attr, -+ NULL -+}; -+ATTRIBUTE_GROUPS(appletb_kbd); -+ -+static int appletb_tb_key_to_slot(unsigned int code) -+{ -+ switch (code) { -+ case KEY_ESC: -+ return 0; -+ case KEY_F1 ... KEY_F10: -+ return code - KEY_F1 + 1; -+ case KEY_F11 ... KEY_F12: -+ return code - KEY_F11 + 11; -+ -+ default: -+ return -EINVAL; -+ } -+} -+ -+static int appletb_kbd_hid_event(struct hid_device *hdev, struct hid_field *field, -+ struct hid_usage *usage, __s32 value) -+{ -+ struct appletb_kbd *kbd = hid_get_drvdata(hdev); -+ struct key_entry *translation; -+ struct input_dev *input; -+ int slot; -+ -+ if ((usage->hid & HID_USAGE_PAGE) != HID_UP_KEYBOARD || usage->type != EV_KEY) -+ return 0; -+ -+ input = field->hidinput->input; -+ -+ /* -+ * Skip non-touch-bar keys. -+ * -+ * Either the touch bar itself or usbhid generate a slew of key-down -+ * events for all the meta keys. None of which we're at all interested -+ * in. -+ */ -+ slot = appletb_tb_key_to_slot(usage->code); -+ if (slot < 0) -+ return 0; -+ -+ translation = sparse_keymap_entry_from_scancode(input, usage->code); -+ -+ if (translation && kbd->current_mode == APPLETB_KBD_MODE_SPCL) { -+ input_event(input, usage->type, translation->keycode, value); -+ -+ return 1; -+ } -+ -+ return kbd->current_mode == APPLETB_KBD_MODE_OFF; -+} -+ -+static int appletb_kbd_input_configured(struct hid_device *hdev, struct hid_input *hidinput) -+{ -+ struct input_dev *input = hidinput->input; -+ -+ /* -+ * Clear various input capabilities that are blindly set by the hid -+ * driver (usbkbd.c) -+ */ -+ memset(input->evbit, 0, sizeof(input->evbit)); -+ memset(input->keybit, 0, sizeof(input->keybit)); -+ memset(input->ledbit, 0, sizeof(input->ledbit)); -+ -+ __set_bit(EV_REP, input->evbit); -+ -+ return sparse_keymap_setup(input, appletb_kbd_keymap, NULL); -+} -+ -+static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) -+{ -+ struct appletb_kbd *kbd; -+ struct device *dev = &hdev->dev; -+ struct hid_field *mode_field; -+ int ret; -+ -+ ret = hid_parse(hdev); -+ if (ret) -+ return dev_err_probe(dev, ret, "HID parse failed\n"); -+ -+ mode_field = hid_find_field(hdev, HID_OUTPUT_REPORT, -+ HID_GD_KEYBOARD, HID_USAGE_MODE); -+ if (!mode_field) -+ return -ENODEV; -+ -+ kbd = devm_kzalloc(dev, sizeof(*kbd), GFP_KERNEL); -+ if (!kbd) -+ return -ENOMEM; -+ -+ kbd->mode_field = mode_field; -+ -+ ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT); -+ if (ret) -+ return dev_err_probe(dev, ret, "HID hw start failed\n"); -+ -+ ret = hid_hw_open(hdev); -+ if (ret) { -+ dev_err_probe(dev, ret, "HID hw open failed\n"); -+ goto stop_hw; -+ } -+ -+ ret = appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); -+ if (ret) { -+ dev_err_probe(dev, ret, "Failed to set touchbar mode\n"); -+ goto close_hw; -+ } -+ -+ hid_set_drvdata(hdev, kbd); -+ -+ return 0; -+ -+close_hw: -+ hid_hw_close(hdev); -+stop_hw: -+ hid_hw_stop(hdev); -+ return ret; -+} -+ -+static void appletb_kbd_remove(struct hid_device *hdev) -+{ -+ struct appletb_kbd *kbd = hid_get_drvdata(hdev); -+ -+ appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); -+ -+ hid_hw_close(hdev); -+ hid_hw_stop(hdev); -+} -+ -+#ifdef CONFIG_PM -+static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg) -+{ -+ struct appletb_kbd *kbd = hid_get_drvdata(hdev); -+ -+ kbd->saved_mode = kbd->current_mode; -+ appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); -+ -+ return 0; -+} -+ -+static int appletb_kbd_reset_resume(struct hid_device *hdev) -+{ -+ struct appletb_kbd *kbd = hid_get_drvdata(hdev); -+ -+ appletb_kbd_set_mode(kbd, kbd->saved_mode); -+ -+ return 0; -+} -+#endif -+ -+static const struct hid_device_id appletb_kbd_hid_ids[] = { -+ /* MacBook Pro's 2018, 2019, with T2 chip: iBridge Display */ -+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, -+ { } -+}; -+MODULE_DEVICE_TABLE(hid, appletb_kbd_hid_ids); -+ -+static struct hid_driver appletb_kbd_hid_driver = { -+ .name = "hid-appletb-kbd", -+ .id_table = appletb_kbd_hid_ids, -+ .probe = appletb_kbd_probe, -+ .remove = appletb_kbd_remove, -+ .event = appletb_kbd_hid_event, -+ .input_configured = appletb_kbd_input_configured, -+#ifdef CONFIG_PM -+ .suspend = appletb_kbd_suspend, -+ .reset_resume = appletb_kbd_reset_resume, -+#endif -+ .driver.dev_groups = appletb_kbd_groups, -+}; -+module_hid_driver(appletb_kbd_hid_driver); -+ -+MODULE_AUTHOR("Ronald Tschalär"); -+MODULE_AUTHOR("Kerem Karabay "); -+MODULE_DESCRIPTION("MacBookPro Touch Bar Keyboard Mode Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c -index 74efda212c55..f4379efdbf30 100644 ---- a/drivers/hid/hid-core.c -+++ b/drivers/hid/hid-core.c -@@ -1912,6 +1912,31 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) - } - EXPORT_SYMBOL_GPL(hid_set_field); - -+struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, -+ unsigned int application, unsigned int usage) -+{ -+ struct list_head *report_list = &hdev->report_enum[report_type].report_list; -+ struct hid_report *report; -+ int i, j; -+ -+ list_for_each_entry(report, report_list, list) { -+ if (report->application != application) -+ continue; -+ -+ for (i = 0; i < report->maxfield; i++) { -+ struct hid_field *field = report->field[i]; -+ -+ for (j = 0; j < field->maxusage; j++) { -+ if (field->usage[j].hid == usage) -+ return field; -+ } -+ } -+ } -+ -+ return NULL; -+} -+EXPORT_SYMBOL_GPL(hid_find_field); -+ - static struct hid_report *hid_get_report(struct hid_report_enum *report_enum, - const u8 *data) - { -diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c -index 25331695ae32..3380694ba18c 100644 ---- a/drivers/hid/hid-google-hammer.c -+++ b/drivers/hid/hid-google-hammer.c -@@ -418,38 +418,15 @@ static int hammer_event(struct hid_device *hid, struct hid_field *field, - return 0; - } - --static bool hammer_has_usage(struct hid_device *hdev, unsigned int report_type, -- unsigned application, unsigned usage) --{ -- struct hid_report_enum *re = &hdev->report_enum[report_type]; -- struct hid_report *report; -- int i, j; -- -- list_for_each_entry(report, &re->report_list, list) { -- if (report->application != application) -- continue; -- -- for (i = 0; i < report->maxfield; i++) { -- struct hid_field *field = report->field[i]; -- -- for (j = 0; j < field->maxusage; j++) -- if (field->usage[j].hid == usage) -- return true; -- } -- } -- -- return false; --} -- - static bool hammer_has_folded_event(struct hid_device *hdev) - { -- return hammer_has_usage(hdev, HID_INPUT_REPORT, -+ return !!hid_find_field(hdev, HID_INPUT_REPORT, - HID_GD_KEYBOARD, HID_USAGE_KBD_FOLDED); - } - - static bool hammer_has_backlight_control(struct hid_device *hdev) - { -- return hammer_has_usage(hdev, HID_OUTPUT_REPORT, -+ return !!hid_find_field(hdev, HID_OUTPUT_REPORT, - HID_GD_KEYBOARD, HID_AD_BRIGHTNESS); - } - -diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 56fc78841f24..0fed955364c3 100644 ---- a/drivers/hid/hid-multitouch.c -+++ b/drivers/hid/hid-multitouch.c -@@ -72,6 +72,7 @@ MODULE_LICENSE("GPL"); - #define MT_QUIRK_FORCE_MULTI_INPUT BIT(20) - #define MT_QUIRK_DISABLE_WAKEUP BIT(21) - #define MT_QUIRK_ORIENTATION_INVERT BIT(22) -+#define MT_QUIRK_TOUCH_IS_TIPSTATE BIT(23) - - #define MT_INPUTMODE_TOUCHSCREEN 0x02 - #define MT_INPUTMODE_TOUCHPAD 0x03 -@@ -145,6 +146,7 @@ struct mt_class { - __s32 sn_height; /* Signal/noise ratio for height events */ - __s32 sn_pressure; /* Signal/noise ratio for pressure events */ - __u8 maxcontacts; -+ bool is_direct; /* true for touchscreens */ - bool is_indirect; /* true for touchpads */ - bool export_all_inputs; /* do not ignore mouse, keyboards, etc... */ - }; -@@ -212,6 +214,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); - #define MT_CLS_GOOGLE 0x0111 - #define MT_CLS_RAZER_BLADE_STEALTH 0x0112 - #define MT_CLS_SMART_TECH 0x0113 -+#define MT_CLS_APPLE_TOUCHBAR 0x0114 - - #define MT_DEFAULT_MAXCONTACT 10 - #define MT_MAX_MAXCONTACT 250 -@@ -396,6 +399,13 @@ static const struct mt_class mt_classes[] = { - MT_QUIRK_CONTACT_CNT_ACCURATE | - MT_QUIRK_SEPARATE_APP_REPORT, - }, -+ { .name = MT_CLS_APPLE_TOUCHBAR, -+ .quirks = MT_QUIRK_HOVERING | -+ MT_QUIRK_TOUCH_IS_TIPSTATE | -+ MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE, -+ .is_direct = true, -+ .maxcontacts = 11, -+ }, - { } - }; - -@@ -489,9 +499,6 @@ static void mt_feature_mapping(struct hid_device *hdev, - if (!td->maxcontacts && - field->logical_maximum <= MT_MAX_MAXCONTACT) - td->maxcontacts = field->logical_maximum; -- if (td->mtclass.maxcontacts) -- /* check if the maxcontacts is given by the class */ -- td->maxcontacts = td->mtclass.maxcontacts; - - break; - case HID_DG_BUTTONTYPE: -@@ -565,13 +572,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td, - mt_application->application = application; - INIT_LIST_HEAD(&mt_application->mt_usages); - -- if (application == HID_DG_TOUCHSCREEN) -+ if (application == HID_DG_TOUCHSCREEN && !td->mtclass.is_indirect) - mt_application->mt_flags |= INPUT_MT_DIRECT; - - /* - * Model touchscreens providing buttons as touchpads. - */ -- if (application == HID_DG_TOUCHPAD) { -+ if (application == HID_DG_TOUCHPAD && !td->mtclass.is_direct) { - mt_application->mt_flags |= INPUT_MT_POINTER; - td->inputmode_value = MT_INPUTMODE_TOUCHPAD; - } -@@ -635,7 +642,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, - - if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) { - for (n = 0; n < field->report_count; n++) { -- if (field->usage[n].hid == HID_DG_CONTACTID) { -+ unsigned int hid = field->usage[n].hid; -+ -+ if (hid == HID_DG_CONTACTID || hid == HID_DG_TRANSDUCER_INDEX) { - rdata->is_mt_collection = true; - break; - } -@@ -807,6 +816,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, - - MT_STORE_FIELD(confidence_state); - return 1; -+ case HID_DG_TOUCH: -+ /* -+ * Legacy devices use TIPSWITCH and not TOUCH. -+ * Let's just ignore this field unless the quirk is set. -+ */ -+ if (!(cls->quirks & MT_QUIRK_TOUCH_IS_TIPSTATE)) -+ return -1; -+ -+ fallthrough; - case HID_DG_TIPSWITCH: - if (field->application != HID_GD_SYSTEM_MULTIAXIS) - input_set_capability(hi->input, -@@ -814,6 +832,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, - MT_STORE_FIELD(tip_state); - return 1; - case HID_DG_CONTACTID: -+ case HID_DG_TRANSDUCER_INDEX: - MT_STORE_FIELD(contactid); - app->touches_by_report++; - return 1; -@@ -869,10 +888,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, - case HID_DG_CONTACTMAX: - /* contact max are global to the report */ - return -1; -- case HID_DG_TOUCH: -- /* Legacy devices use TIPSWITCH and not TOUCH. -- * Let's just ignore this field. */ -- return -1; - } - /* let hid-input decide for the others */ - return 0; -@@ -1300,6 +1315,10 @@ static int mt_touch_input_configured(struct hid_device *hdev, - struct input_dev *input = hi->input; - int ret; - -+ /* check if the maxcontacts is given by the class */ -+ if (cls->maxcontacts) -+ td->maxcontacts = cls->maxcontacts; -+ - if (!td->maxcontacts) - td->maxcontacts = MT_DEFAULT_MAXCONTACT; - -@@ -1307,6 +1326,9 @@ static int mt_touch_input_configured(struct hid_device *hdev, - if (td->serial_maybe) - mt_post_parse_default_settings(td, app); - -+ if (cls->is_direct) -+ app->mt_flags |= INPUT_MT_DIRECT; -+ - if (cls->is_indirect) - app->mt_flags |= INPUT_MT_POINTER; - -@@ -1733,6 +1755,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - } - } - -+ ret = hid_parse(hdev); -+ if (ret != 0) -+ return ret; -+ -+ if (mtclass->name == MT_CLS_APPLE_TOUCHBAR && -+ !hid_find_field(hdev, HID_INPUT_REPORT, -+ HID_DG_TOUCHPAD, HID_DG_TRANSDUCER_INDEX)) -+ return -ENODEV; -+ - td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL); - if (!td) { - dev_err(&hdev->dev, "cannot allocate multitouch data\n"); -@@ -1780,10 +1811,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - - timer_setup(&td->release_timer, mt_expired_timeout, 0); - -- ret = hid_parse(hdev); -- if (ret != 0) -- return ret; -- - if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) - mt_fix_const_fields(hdev, HID_DG_CONTACTID); - -@@ -2235,6 +2262,11 @@ static const struct hid_device_id mt_devices[] = { - MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, - USB_DEVICE_ID_XIROKU_CSR2) }, - -+ /* Apple Touch Bars */ -+ { .driver_data = MT_CLS_APPLE_TOUCHBAR, -+ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, -+ USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, -+ - /* Google MT devices */ - { .driver_data = MT_CLS_GOOGLE, - HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, -diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c -index e0bbf0c6345d..7c576d6540fe 100644 ---- a/drivers/hid/hid-quirks.c -+++ b/drivers/hid/hid-quirks.c -@@ -328,8 +328,6 @@ static const struct hid_device_id hid_have_special_driver[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, - { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) }, - { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) }, -- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, -- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, - #endif - #if IS_ENABLED(CONFIG_HID_APPLEIR) - { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) }, -@@ -338,6 +336,12 @@ static const struct hid_device_id hid_have_special_driver[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) }, - { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) }, - #endif -+#if IS_ENABLED(CONFIG_HID_APPLETB_BL) -+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, -+#endif -+#if IS_ENABLED(CONFIG_HID_APPLETB_KBD) -+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, -+#endif - #if IS_ENABLED(CONFIG_HID_ASUS) - { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) }, -diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c -index fc6d6a9053ce..698f44794453 100644 ---- a/drivers/hwmon/applesmc.c -+++ b/drivers/hwmon/applesmc.c -@@ -6,6 +6,7 @@ - * - * Copyright (C) 2007 Nicolas Boichat - * Copyright (C) 2010 Henrik Rydberg -+ * Copyright (C) 2019 Paul Pawlowski - * - * Based on hdaps.c driver: - * Copyright (C) 2005 Robert Love -@@ -18,7 +19,7 @@ - #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - - #include --#include -+#include - #include - #include - #include -@@ -35,12 +36,24 @@ - #include - - /* data port used by Apple SMC */ --#define APPLESMC_DATA_PORT 0x300 -+#define APPLESMC_DATA_PORT 0 - /* command/status port used by Apple SMC */ --#define APPLESMC_CMD_PORT 0x304 -+#define APPLESMC_CMD_PORT 4 - - #define APPLESMC_NR_PORTS 32 /* 0x300-0x31f */ - -+#define APPLESMC_IOMEM_KEY_DATA 0 -+#define APPLESMC_IOMEM_KEY_STATUS 0x4005 -+#define APPLESMC_IOMEM_KEY_NAME 0x78 -+#define APPLESMC_IOMEM_KEY_DATA_LEN 0x7D -+#define APPLESMC_IOMEM_KEY_SMC_ID 0x7E -+#define APPLESMC_IOMEM_KEY_CMD 0x7F -+#define APPLESMC_IOMEM_MIN_SIZE 0x4006 -+ -+#define APPLESMC_IOMEM_KEY_TYPE_CODE 0 -+#define APPLESMC_IOMEM_KEY_TYPE_DATA_LEN 5 -+#define APPLESMC_IOMEM_KEY_TYPE_FLAGS 6 -+ - #define APPLESMC_MAX_DATA_LENGTH 32 - - /* Apple SMC status bits */ -@@ -74,6 +87,7 @@ - #define FAN_ID_FMT "F%dID" /* r-o char[16] */ - - #define TEMP_SENSOR_TYPE "sp78" -+#define FLOAT_TYPE "flt " - - /* List of keys used to read/write fan speeds */ - static const char *const fan_speed_fmt[] = { -@@ -83,6 +97,7 @@ static const char *const fan_speed_fmt[] = { - "F%dSf", /* safe speed - not all models */ - "F%dTg", /* target speed (manual: rw) */ - }; -+#define FAN_MANUAL_FMT "F%dMd" - - #define INIT_TIMEOUT_MSECS 5000 /* wait up to 5s for device init ... */ - #define INIT_WAIT_MSECS 50 /* ... in 50ms increments */ -@@ -119,7 +134,7 @@ struct applesmc_entry { - }; - - /* Register lookup and registers common to all SMCs */ --static struct applesmc_registers { -+struct applesmc_registers { - struct mutex mutex; /* register read/write mutex */ - unsigned int key_count; /* number of SMC registers */ - unsigned int fan_count; /* number of fans */ -@@ -133,26 +148,38 @@ static struct applesmc_registers { - bool init_complete; /* true when fully initialized */ - struct applesmc_entry *cache; /* cached key entries */ - const char **index; /* temperature key index */ --} smcreg = { -- .mutex = __MUTEX_INITIALIZER(smcreg.mutex), - }; - --static const int debug; --static struct platform_device *pdev; --static s16 rest_x; --static s16 rest_y; --static u8 backlight_state[2]; -+struct applesmc_device { -+ struct acpi_device *dev; -+ struct device *ldev; -+ struct applesmc_registers reg; - --static struct device *hwmon_dev; --static struct input_dev *applesmc_idev; -+ bool port_base_set, iomem_base_set; -+ u16 port_base; -+ u8 *__iomem iomem_base; -+ u32 iomem_base_addr, iomem_base_size; - --/* -- * Last index written to key_at_index sysfs file, and value to use for all other -- * key_at_index_* sysfs files. -- */ --static unsigned int key_at_index; -+ s16 rest_x; -+ s16 rest_y; -+ -+ u8 backlight_state[2]; -+ -+ struct device *hwmon_dev; -+ struct input_dev *idev; -+ -+ /* -+ * Last index written to key_at_index sysfs file, and value to use for all other -+ * key_at_index_* sysfs files. -+ */ -+ unsigned int key_at_index; - --static struct workqueue_struct *applesmc_led_wq; -+ struct workqueue_struct *backlight_wq; -+ struct work_struct backlight_work; -+ struct led_classdev backlight_dev; -+}; -+ -+static const int debug; - - /* - * Wait for specific status bits with a mask on the SMC. -@@ -162,7 +189,7 @@ static struct workqueue_struct *applesmc_led_wq; - * run out past 500ms. - */ - --static int wait_status(u8 val, u8 mask) -+static int port_wait_status(struct applesmc_device *smc, u8 val, u8 mask) - { - u8 status; - int us; -@@ -170,7 +197,7 @@ static int wait_status(u8 val, u8 mask) - - us = APPLESMC_MIN_WAIT; - for (i = 0; i < 24 ; i++) { -- status = inb(APPLESMC_CMD_PORT); -+ status = inb(smc->port_base + APPLESMC_CMD_PORT); - if ((status & mask) == val) - return 0; - usleep_range(us, us * 2); -@@ -180,13 +207,13 @@ static int wait_status(u8 val, u8 mask) - return -EIO; - } - --/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ -+/* port_send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ - --static int send_byte(u8 cmd, u16 port) -+static int port_send_byte(struct applesmc_device *smc, u8 cmd, u16 port) - { - int status; - -- status = wait_status(0, SMC_STATUS_IB_CLOSED); -+ status = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED); - if (status) - return status; - /* -@@ -195,24 +222,25 @@ static int send_byte(u8 cmd, u16 port) - * this extra read may not happen if status returns both - * simultaneously and this would appear to be required. - */ -- status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY); -+ status = port_wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY); - if (status) - return status; - -- outb(cmd, port); -+ outb(cmd, smc->port_base + port); - return 0; - } - --/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ -+/* port_send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ - --static int send_command(u8 cmd) -+static int port_send_command(struct applesmc_device *smc, u8 cmd) - { - int ret; - -- ret = wait_status(0, SMC_STATUS_IB_CLOSED); -+ ret = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED); - if (ret) - return ret; -- outb(cmd, APPLESMC_CMD_PORT); -+ -+ outb(cmd, smc->port_base + APPLESMC_CMD_PORT); - return 0; - } - -@@ -222,110 +250,304 @@ static int send_command(u8 cmd) - * If busy is stuck high after the command then the SMC is jammed. - */ - --static int smc_sane(void) -+static int port_smc_sane(struct applesmc_device *smc) - { - int ret; - -- ret = wait_status(0, SMC_STATUS_BUSY); -+ ret = port_wait_status(smc, 0, SMC_STATUS_BUSY); - if (!ret) - return ret; -- ret = send_command(APPLESMC_READ_CMD); -+ ret = port_send_command(smc, APPLESMC_READ_CMD); - if (ret) - return ret; -- return wait_status(0, SMC_STATUS_BUSY); -+ return port_wait_status(smc, 0, SMC_STATUS_BUSY); - } - --static int send_argument(const char *key) -+static int port_send_argument(struct applesmc_device *smc, const char *key) - { - int i; - - for (i = 0; i < 4; i++) -- if (send_byte(key[i], APPLESMC_DATA_PORT)) -+ if (port_send_byte(smc, key[i], APPLESMC_DATA_PORT)) - return -EIO; - return 0; - } - --static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) -+static int port_read_smc(struct applesmc_device *smc, u8 cmd, const char *key, -+ u8 *buffer, u8 len) - { - u8 status, data = 0; - int i; - int ret; - -- ret = smc_sane(); -+ ret = port_smc_sane(smc); - if (ret) - return ret; - -- if (send_command(cmd) || send_argument(key)) { -+ if (port_send_command(smc, cmd) || port_send_argument(smc, key)) { - pr_warn("%.4s: read arg fail\n", key); - return -EIO; - } - - /* This has no effect on newer (2012) SMCs */ -- if (send_byte(len, APPLESMC_DATA_PORT)) { -+ if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) { - pr_warn("%.4s: read len fail\n", key); - return -EIO; - } - - for (i = 0; i < len; i++) { -- if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, -+ if (port_wait_status(smc, -+ SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, - SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) { - pr_warn("%.4s: read data[%d] fail\n", key, i); - return -EIO; - } -- buffer[i] = inb(APPLESMC_DATA_PORT); -+ buffer[i] = inb(smc->port_base + APPLESMC_DATA_PORT); - } - - /* Read the data port until bit0 is cleared */ - for (i = 0; i < 16; i++) { - udelay(APPLESMC_MIN_WAIT); -- status = inb(APPLESMC_CMD_PORT); -+ status = inb(smc->port_base + APPLESMC_CMD_PORT); - if (!(status & SMC_STATUS_AWAITING_DATA)) - break; -- data = inb(APPLESMC_DATA_PORT); -+ data = inb(smc->port_base + APPLESMC_DATA_PORT); - } - if (i) - pr_warn("flushed %d bytes, last value is: %d\n", i, data); - -- return wait_status(0, SMC_STATUS_BUSY); -+ return port_wait_status(smc, 0, SMC_STATUS_BUSY); - } - --static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len) -+static int port_write_smc(struct applesmc_device *smc, u8 cmd, const char *key, -+ const u8 *buffer, u8 len) - { - int i; - int ret; - -- ret = smc_sane(); -+ ret = port_smc_sane(smc); - if (ret) - return ret; - -- if (send_command(cmd) || send_argument(key)) { -+ if (port_send_command(smc, cmd) || port_send_argument(smc, key)) { - pr_warn("%s: write arg fail\n", key); - return -EIO; - } - -- if (send_byte(len, APPLESMC_DATA_PORT)) { -+ if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) { - pr_warn("%.4s: write len fail\n", key); - return -EIO; - } - - for (i = 0; i < len; i++) { -- if (send_byte(buffer[i], APPLESMC_DATA_PORT)) { -+ if (port_send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) { - pr_warn("%s: write data fail\n", key); - return -EIO; - } - } - -- return wait_status(0, SMC_STATUS_BUSY); -+ return port_wait_status(smc, 0, SMC_STATUS_BUSY); - } - --static int read_register_count(unsigned int *count) -+static int port_get_smc_key_info(struct applesmc_device *smc, -+ const char *key, struct applesmc_entry *info) - { -- __be32 be; - int ret; -+ u8 raw[6]; - -- ret = read_smc(APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4); -+ ret = port_read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, raw, 6); - if (ret) - return ret; -+ info->len = raw[0]; -+ memcpy(info->type, &raw[1], 4); -+ info->flags = raw[5]; -+ return 0; -+} -+ -+ -+/* -+ * MMIO based communication. -+ * TODO: Use updated mechanism for cmd timeout/retry -+ */ -+ -+static void iomem_clear_status(struct applesmc_device *smc) -+{ -+ if (ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS)) -+ iowrite8(0, smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS); -+} -+ -+static int iomem_wait_read(struct applesmc_device *smc) -+{ -+ u8 status; -+ int us; -+ int i; -+ -+ us = APPLESMC_MIN_WAIT; -+ for (i = 0; i < 24 ; i++) { -+ status = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS); -+ if (status & 0x20) -+ return 0; -+ usleep_range(us, us * 2); -+ if (i > 9) -+ us <<= 1; -+ } -+ -+ dev_warn(smc->ldev, "%s... timeout\n", __func__); -+ return -EIO; -+} -+ -+static int iomem_read_smc(struct applesmc_device *smc, u8 cmd, const char *key, -+ u8 *buffer, u8 len) -+{ -+ u8 err, remote_len; -+ u32 key_int = *((u32 *) key); -+ -+ iomem_clear_status(smc); -+ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME); -+ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID); -+ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); -+ -+ if (iomem_wait_read(smc)) -+ return -EIO; -+ -+ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); -+ if (err != 0) { -+ dev_warn(smc->ldev, "read_smc_mmio(%x %8x/%.4s) failed: %u\n", -+ cmd, key_int, key, err); -+ return -EIO; -+ } -+ -+ if (cmd == APPLESMC_READ_CMD) { -+ remote_len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN); -+ if (remote_len != len) { -+ dev_warn(smc->ldev, -+ "read_smc_mmio(%x %8x/%.4s) failed: buffer length mismatch (remote = %u, requested = %u)\n", -+ cmd, key_int, key, remote_len, len); -+ return -EINVAL; -+ } -+ } else { -+ remote_len = len; -+ } -+ -+ memcpy_fromio(buffer, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, -+ remote_len); -+ -+ dev_dbg(smc->ldev, "read_smc_mmio(%x %8x/%.4s): buflen=%u reslen=%u\n", -+ cmd, key_int, key, len, remote_len); -+ print_hex_dump_bytes("read_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, remote_len); -+ return 0; -+} -+ -+static int iomem_get_smc_key_type(struct applesmc_device *smc, const char *key, -+ struct applesmc_entry *e) -+{ -+ u8 err; -+ u8 cmd = APPLESMC_GET_KEY_TYPE_CMD; -+ u32 key_int = *((u32 *) key); -+ -+ iomem_clear_status(smc); -+ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME); -+ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID); -+ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); -+ -+ if (iomem_wait_read(smc)) -+ return -EIO; -+ -+ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); -+ if (err != 0) { -+ dev_warn(smc->ldev, "get_smc_key_type_mmio(%.4s) failed: %u\n", key, err); -+ return -EIO; -+ } -+ -+ e->len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_DATA_LEN); -+ *((uint32_t *) e->type) = ioread32( -+ smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_CODE); -+ e->flags = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_FLAGS); -+ -+ dev_dbg(smc->ldev, "get_smc_key_type_mmio(%.4s): len=%u type=%.4s flags=%x\n", -+ key, e->len, e->type, e->flags); -+ return 0; -+} -+ -+static int iomem_write_smc(struct applesmc_device *smc, u8 cmd, const char *key, -+ const u8 *buffer, u8 len) -+{ -+ u8 err; -+ u32 key_int = *((u32 *) key); -+ -+ iomem_clear_status(smc); -+ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME); -+ memcpy_toio(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, buffer, len); -+ iowrite32(len, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN); -+ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID); -+ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); -+ -+ if (iomem_wait_read(smc)) -+ return -EIO; -+ -+ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); -+ if (err != 0) { -+ dev_warn(smc->ldev, "write_smc_mmio(%x %.4s) failed: %u\n", cmd, key, err); -+ print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len); -+ return -EIO; -+ } -+ -+ dev_dbg(smc->ldev, "write_smc_mmio(%x %.4s): buflen=%u\n", cmd, key, len); -+ print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len); -+ return 0; -+} -+ -+ -+static int read_smc(struct applesmc_device *smc, const char *key, -+ u8 *buffer, u8 len) -+{ -+ if (smc->iomem_base_set) -+ return iomem_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len); -+ else -+ return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len); -+} -+ -+static int write_smc(struct applesmc_device *smc, const char *key, -+ const u8 *buffer, u8 len) -+{ -+ if (smc->iomem_base_set) -+ return iomem_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len); -+ else -+ return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len); -+} -+ -+static int get_smc_key_by_index(struct applesmc_device *smc, -+ unsigned int index, char *key) -+{ -+ __be32 be; -+ -+ be = cpu_to_be32(index); -+ if (smc->iomem_base_set) -+ return iomem_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, -+ (const char *) &be, (u8 *) key, 4); -+ else -+ return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, -+ (const char *) &be, (u8 *) key, 4); -+} -+ -+static int get_smc_key_info(struct applesmc_device *smc, const char *key, -+ struct applesmc_entry *info) -+{ -+ if (smc->iomem_base_set) -+ return iomem_get_smc_key_type(smc, key, info); -+ else -+ return port_get_smc_key_info(smc, key, info); -+} -+ -+static int read_register_count(struct applesmc_device *smc, -+ unsigned int *count) -+{ -+ __be32 be; -+ int ret; -+ -+ ret = read_smc(smc, KEY_COUNT_KEY, (u8 *)&be, 4); -+ if (ret < 0) -+ return ret; - - *count = be32_to_cpu(be); - return 0; -@@ -338,76 +560,73 @@ static int read_register_count(unsigned int *count) - * All functions below are concurrency safe - callers should NOT hold lock. - */ - --static int applesmc_read_entry(const struct applesmc_entry *entry, -- u8 *buf, u8 len) -+static int applesmc_read_entry(struct applesmc_device *smc, -+ const struct applesmc_entry *entry, u8 *buf, u8 len) - { - int ret; - - if (entry->len != len) - return -EINVAL; -- mutex_lock(&smcreg.mutex); -- ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len); -- mutex_unlock(&smcreg.mutex); -+ mutex_lock(&smc->reg.mutex); -+ ret = read_smc(smc, entry->key, buf, len); -+ mutex_unlock(&smc->reg.mutex); - - return ret; - } - --static int applesmc_write_entry(const struct applesmc_entry *entry, -- const u8 *buf, u8 len) -+static int applesmc_write_entry(struct applesmc_device *smc, -+ const struct applesmc_entry *entry, const u8 *buf, u8 len) - { - int ret; - - if (entry->len != len) - return -EINVAL; -- mutex_lock(&smcreg.mutex); -- ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len); -- mutex_unlock(&smcreg.mutex); -+ mutex_lock(&smc->reg.mutex); -+ ret = write_smc(smc, entry->key, buf, len); -+ mutex_unlock(&smc->reg.mutex); - return ret; - } - --static const struct applesmc_entry *applesmc_get_entry_by_index(int index) -+static const struct applesmc_entry *applesmc_get_entry_by_index( -+ struct applesmc_device *smc, int index) - { -- struct applesmc_entry *cache = &smcreg.cache[index]; -- u8 key[4], info[6]; -- __be32 be; -+ struct applesmc_entry *cache = &smc->reg.cache[index]; -+ char key[4]; - int ret = 0; - - if (cache->valid) - return cache; - -- mutex_lock(&smcreg.mutex); -+ mutex_lock(&smc->reg.mutex); - - if (cache->valid) - goto out; -- be = cpu_to_be32(index); -- ret = read_smc(APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4); -+ ret = get_smc_key_by_index(smc, index, key); - if (ret) - goto out; -- ret = read_smc(APPLESMC_GET_KEY_TYPE_CMD, key, info, 6); -+ memcpy(cache->key, key, 4); -+ -+ ret = get_smc_key_info(smc, key, cache); - if (ret) - goto out; -- -- memcpy(cache->key, key, 4); -- cache->len = info[0]; -- memcpy(cache->type, &info[1], 4); -- cache->flags = info[5]; - cache->valid = true; - - out: -- mutex_unlock(&smcreg.mutex); -+ mutex_unlock(&smc->reg.mutex); - if (ret) - return ERR_PTR(ret); - return cache; - } - --static int applesmc_get_lower_bound(unsigned int *lo, const char *key) -+static int applesmc_get_lower_bound(struct applesmc_device *smc, -+ unsigned int *lo, const char *key) - { -- int begin = 0, end = smcreg.key_count; -+ int begin = 0, end = smc->reg.key_count; - const struct applesmc_entry *entry; - - while (begin != end) { - int middle = begin + (end - begin) / 2; -- entry = applesmc_get_entry_by_index(middle); -+ entry = applesmc_get_entry_by_index(smc, middle); - if (IS_ERR(entry)) { - *lo = 0; - return PTR_ERR(entry); -@@ -422,16 +641,17 @@ static int applesmc_get_lower_bound(unsigned int *lo, const char *key) - return 0; - } - --static int applesmc_get_upper_bound(unsigned int *hi, const char *key) -+static int applesmc_get_upper_bound(struct applesmc_device *smc, -+ unsigned int *hi, const char *key) - { -- int begin = 0, end = smcreg.key_count; -+ int begin = 0, end = smc->reg.key_count; - const struct applesmc_entry *entry; - - while (begin != end) { - int middle = begin + (end - begin) / 2; -- entry = applesmc_get_entry_by_index(middle); -+ entry = applesmc_get_entry_by_index(smc, middle); - if (IS_ERR(entry)) { -- *hi = smcreg.key_count; -+ *hi = smc->reg.key_count; - return PTR_ERR(entry); - } - if (strcmp(key, entry->key) < 0) -@@ -444,50 +664,54 @@ static int applesmc_get_upper_bound(unsigned int *hi, const char *key) - return 0; - } - --static const struct applesmc_entry *applesmc_get_entry_by_key(const char *key) -+static const struct applesmc_entry *applesmc_get_entry_by_key( -+ struct applesmc_device *smc, const char *key) - { - int begin, end; - int ret; - -- ret = applesmc_get_lower_bound(&begin, key); -+ ret = applesmc_get_lower_bound(smc, &begin, key); - if (ret) - return ERR_PTR(ret); -- ret = applesmc_get_upper_bound(&end, key); -+ ret = applesmc_get_upper_bound(smc, &end, key); - if (ret) - return ERR_PTR(ret); - if (end - begin != 1) - return ERR_PTR(-EINVAL); - -- return applesmc_get_entry_by_index(begin); -+ return applesmc_get_entry_by_index(smc, begin); - } - --static int applesmc_read_key(const char *key, u8 *buffer, u8 len) -+static int applesmc_read_key(struct applesmc_device *smc, -+ const char *key, u8 *buffer, u8 len) - { - const struct applesmc_entry *entry; - -- entry = applesmc_get_entry_by_key(key); -+ entry = applesmc_get_entry_by_key(smc, key); - if (IS_ERR(entry)) - return PTR_ERR(entry); - -- return applesmc_read_entry(entry, buffer, len); -+ return applesmc_read_entry(smc, entry, buffer, len); - } - --static int applesmc_write_key(const char *key, const u8 *buffer, u8 len) -+static int applesmc_write_key(struct applesmc_device *smc, -+ const char *key, const u8 *buffer, u8 len) - { - const struct applesmc_entry *entry; - -- entry = applesmc_get_entry_by_key(key); -+ entry = applesmc_get_entry_by_key(smc, key); - if (IS_ERR(entry)) - return PTR_ERR(entry); - -- return applesmc_write_entry(entry, buffer, len); -+ return applesmc_write_entry(smc, entry, buffer, len); - } - --static int applesmc_has_key(const char *key, bool *value) -+static int applesmc_has_key(struct applesmc_device *smc, -+ const char *key, bool *value) - { - const struct applesmc_entry *entry; - -- entry = applesmc_get_entry_by_key(key); -+ entry = applesmc_get_entry_by_key(smc, key); - if (IS_ERR(entry) && PTR_ERR(entry) != -EINVAL) - return PTR_ERR(entry); - -@@ -498,12 +722,13 @@ static int applesmc_has_key(const char *key, bool *value) - /* - * applesmc_read_s16 - Read 16-bit signed big endian register - */ --static int applesmc_read_s16(const char *key, s16 *value) -+static int applesmc_read_s16(struct applesmc_device *smc, -+ const char *key, s16 *value) - { - u8 buffer[2]; - int ret; - -- ret = applesmc_read_key(key, buffer, 2); -+ ret = applesmc_read_key(smc, key, buffer, 2); - if (ret) - return ret; - -@@ -511,31 +736,68 @@ static int applesmc_read_s16(const char *key, s16 *value) - return 0; - } - -+/** -+ * applesmc_float_to_u32 - Retrieve the integral part of a float. -+ * This is needed because Apple made fans use float values in the T2. -+ * The fractional point is not significantly useful though, and the integral -+ * part can be easily extracted. -+ */ -+static inline u32 applesmc_float_to_u32(u32 d) -+{ -+ u8 sign = (u8) ((d >> 31) & 1); -+ s32 exp = (s32) ((d >> 23) & 0xff) - 0x7f; -+ u32 fr = d & ((1u << 23) - 1); -+ -+ if (sign || exp < 0) -+ return 0; -+ -+ return (u32) ((1u << exp) + (fr >> (23 - exp))); -+} -+ -+/** -+ * applesmc_u32_to_float - Convert an u32 into a float. -+ * See applesmc_float_to_u32 for a rationale. -+ */ -+static inline u32 applesmc_u32_to_float(u32 d) -+{ -+ u32 dc = d, bc = 0, exp; -+ -+ if (!d) -+ return 0; -+ -+ while (dc >>= 1) -+ ++bc; -+ exp = 0x7f + bc; -+ -+ return (u32) ((exp << 23) | -+ ((d << (23 - (exp - 0x7f))) & ((1u << 23) - 1))); -+} - /* - * applesmc_device_init - initialize the accelerometer. Can sleep. - */ --static void applesmc_device_init(void) -+static void applesmc_device_init(struct applesmc_device *smc) - { - int total; - u8 buffer[2]; - -- if (!smcreg.has_accelerometer) -+ if (!smc->reg.has_accelerometer) - return; - - for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) { -- if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) && -+ if (!applesmc_read_key(smc, MOTION_SENSOR_KEY, buffer, 2) && - (buffer[0] != 0x00 || buffer[1] != 0x00)) - return; - buffer[0] = 0xe0; - buffer[1] = 0x00; -- applesmc_write_key(MOTION_SENSOR_KEY, buffer, 2); -+ applesmc_write_key(smc, MOTION_SENSOR_KEY, buffer, 2); - msleep(INIT_WAIT_MSECS); - } - - pr_warn("failed to init the device\n"); - } - --static int applesmc_init_index(struct applesmc_registers *s) -+static int applesmc_init_index(struct applesmc_device *smc, -+ struct applesmc_registers *s) - { - const struct applesmc_entry *entry; - unsigned int i; -@@ -548,7 +810,7 @@ static int applesmc_init_index(struct applesmc_registers *s) - return -ENOMEM; - - for (i = s->temp_begin; i < s->temp_end; i++) { -- entry = applesmc_get_entry_by_index(i); -+ entry = applesmc_get_entry_by_index(smc, i); - if (IS_ERR(entry)) - continue; - if (strcmp(entry->type, TEMP_SENSOR_TYPE)) -@@ -562,9 +824,9 @@ static int applesmc_init_index(struct applesmc_registers *s) - /* - * applesmc_init_smcreg_try - Try to initialize register cache. Idempotent. - */ --static int applesmc_init_smcreg_try(void) -+static int applesmc_init_smcreg_try(struct applesmc_device *smc) - { -- struct applesmc_registers *s = &smcreg; -+ struct applesmc_registers *s = &smc->reg; - bool left_light_sensor = false, right_light_sensor = false; - unsigned int count; - u8 tmp[1]; -@@ -573,7 +835,7 @@ static int applesmc_init_smcreg_try(void) - if (s->init_complete) - return 0; - -- ret = read_register_count(&count); -+ ret = read_register_count(smc, &count); - if (ret) - return ret; - -@@ -590,35 +852,35 @@ static int applesmc_init_smcreg_try(void) - if (!s->cache) - return -ENOMEM; - -- ret = applesmc_read_key(FANS_COUNT, tmp, 1); -+ ret = applesmc_read_key(smc, FANS_COUNT, tmp, 1); - if (ret) - return ret; - s->fan_count = tmp[0]; - if (s->fan_count > 10) - s->fan_count = 10; - -- ret = applesmc_get_lower_bound(&s->temp_begin, "T"); -+ ret = applesmc_get_lower_bound(smc, &s->temp_begin, "T"); - if (ret) - return ret; -- ret = applesmc_get_lower_bound(&s->temp_end, "U"); -+ ret = applesmc_get_lower_bound(smc, &s->temp_end, "U"); - if (ret) - return ret; - s->temp_count = s->temp_end - s->temp_begin; - -- ret = applesmc_init_index(s); -+ ret = applesmc_init_index(smc, s); - if (ret) - return ret; - -- ret = applesmc_has_key(LIGHT_SENSOR_LEFT_KEY, &left_light_sensor); -+ ret = applesmc_has_key(smc, LIGHT_SENSOR_LEFT_KEY, &left_light_sensor); - if (ret) - return ret; -- ret = applesmc_has_key(LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor); -+ ret = applesmc_has_key(smc, LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor); - if (ret) - return ret; -- ret = applesmc_has_key(MOTION_SENSOR_KEY, &s->has_accelerometer); -+ ret = applesmc_has_key(smc, MOTION_SENSOR_KEY, &s->has_accelerometer); - if (ret) - return ret; -- ret = applesmc_has_key(BACKLIGHT_KEY, &s->has_key_backlight); -+ ret = applesmc_has_key(smc, BACKLIGHT_KEY, &s->has_key_backlight); - if (ret) - return ret; - -@@ -634,13 +896,13 @@ static int applesmc_init_smcreg_try(void) - return 0; - } - --static void applesmc_destroy_smcreg(void) -+static void applesmc_destroy_smcreg(struct applesmc_device *smc) - { -- kfree(smcreg.index); -- smcreg.index = NULL; -- kfree(smcreg.cache); -- smcreg.cache = NULL; -- smcreg.init_complete = false; -+ kfree(smc->reg.index); -+ smc->reg.index = NULL; -+ kfree(smc->reg.cache); -+ smc->reg.cache = NULL; -+ smc->reg.init_complete = false; - } - - /* -@@ -649,12 +911,12 @@ static void applesmc_destroy_smcreg(void) - * Retries until initialization is successful, or the operation times out. - * - */ --static int applesmc_init_smcreg(void) -+static int applesmc_init_smcreg(struct applesmc_device *smc) - { - int ms, ret; - - for (ms = 0; ms < INIT_TIMEOUT_MSECS; ms += INIT_WAIT_MSECS) { -- ret = applesmc_init_smcreg_try(); -+ ret = applesmc_init_smcreg_try(smc); - if (!ret) { - if (ms) - pr_info("init_smcreg() took %d ms\n", ms); -@@ -663,50 +925,223 @@ static int applesmc_init_smcreg(void) - msleep(INIT_WAIT_MSECS); - } - -- applesmc_destroy_smcreg(); -+ applesmc_destroy_smcreg(smc); - - return ret; - } - - /* Device model stuff */ --static int applesmc_probe(struct platform_device *dev) -+ -+static int applesmc_init_resources(struct applesmc_device *smc); -+static void applesmc_free_resources(struct applesmc_device *smc); -+static int applesmc_create_modules(struct applesmc_device *smc); -+static void applesmc_destroy_modules(struct applesmc_device *smc); -+ -+static int applesmc_add(struct acpi_device *dev) - { -+ struct applesmc_device *smc; - int ret; - -- ret = applesmc_init_smcreg(); -+ smc = kzalloc(sizeof(struct applesmc_device), GFP_KERNEL); -+ if (!smc) -+ return -ENOMEM; -+ smc->dev = dev; -+ smc->ldev = &dev->dev; -+ mutex_init(&smc->reg.mutex); -+ -+ dev_set_drvdata(&dev->dev, smc); -+ -+ ret = applesmc_init_resources(smc); - if (ret) -- return ret; -+ goto out_mem; -+ -+ ret = applesmc_init_smcreg(smc); -+ if (ret) -+ goto out_res; -+ -+ applesmc_device_init(smc); -+ -+ ret = applesmc_create_modules(smc); -+ if (ret) -+ goto out_reg; -+ -+ return 0; -+ -+out_reg: -+ applesmc_destroy_smcreg(smc); -+out_res: -+ applesmc_free_resources(smc); -+out_mem: -+ dev_set_drvdata(&dev->dev, NULL); -+ mutex_destroy(&smc->reg.mutex); -+ kfree(smc); -+ -+ return ret; -+} -+ -+static void applesmc_remove(struct acpi_device *dev) -+{ -+ struct applesmc_device *smc = dev_get_drvdata(&dev->dev); -+ -+ applesmc_destroy_modules(smc); -+ applesmc_destroy_smcreg(smc); -+ applesmc_free_resources(smc); - -- applesmc_device_init(); -+ mutex_destroy(&smc->reg.mutex); -+ kfree(smc); -+ -+ return; -+} -+ -+static acpi_status applesmc_walk_resources(struct acpi_resource *res, -+ void *data) -+{ -+ struct applesmc_device *smc = data; -+ -+ switch (res->type) { -+ case ACPI_RESOURCE_TYPE_IO: -+ if (!smc->port_base_set) { -+ if (res->data.io.address_length < APPLESMC_NR_PORTS) -+ return AE_ERROR; -+ smc->port_base = res->data.io.minimum; -+ smc->port_base_set = true; -+ } -+ return AE_OK; -+ -+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: -+ if (!smc->iomem_base_set) { -+ if (res->data.fixed_memory32.address_length < -+ APPLESMC_IOMEM_MIN_SIZE) { -+ dev_warn(smc->ldev, "found iomem but it's too small: %u\n", -+ res->data.fixed_memory32.address_length); -+ return AE_OK; -+ } -+ smc->iomem_base_addr = res->data.fixed_memory32.address; -+ smc->iomem_base_size = res->data.fixed_memory32.address_length; -+ smc->iomem_base_set = true; -+ } -+ return AE_OK; -+ -+ case ACPI_RESOURCE_TYPE_END_TAG: -+ if (smc->port_base_set) -+ return AE_OK; -+ else -+ return AE_NOT_FOUND; -+ -+ default: -+ return AE_OK; -+ } -+} -+ -+static int applesmc_try_enable_iomem(struct applesmc_device *smc); -+ -+static int applesmc_init_resources(struct applesmc_device *smc) -+{ -+ int ret; -+ -+ ret = acpi_walk_resources(smc->dev->handle, METHOD_NAME__CRS, -+ applesmc_walk_resources, smc); -+ if (ACPI_FAILURE(ret)) -+ return -ENXIO; -+ -+ if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc")) -+ return -ENXIO; -+ -+ if (smc->iomem_base_set) { -+ if (applesmc_try_enable_iomem(smc)) -+ smc->iomem_base_set = false; -+ } -+ -+ return 0; -+} -+ -+static int applesmc_try_enable_iomem(struct applesmc_device *smc) -+{ -+ u8 test_val, ldkn_version; -+ -+ dev_dbg(smc->ldev, "Trying to enable iomem based communication\n"); -+ smc->iomem_base = ioremap(smc->iomem_base_addr, smc->iomem_base_size); -+ if (!smc->iomem_base) -+ goto out; -+ -+ /* Apple's driver does this check for some reason */ -+ test_val = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS); -+ if (test_val == 0xff) { -+ dev_warn(smc->ldev, -+ "iomem enable failed: initial status is 0xff (is %x)\n", -+ test_val); -+ goto out_iomem; -+ } -+ -+ if (read_smc(smc, "LDKN", &ldkn_version, 1)) { -+ dev_warn(smc->ldev, "iomem enable failed: ldkn read failed\n"); -+ goto out_iomem; -+ } -+ -+ if (ldkn_version < 2) { -+ dev_warn(smc->ldev, -+ "iomem enable failed: ldkn version %u is less than minimum (2)\n", -+ ldkn_version); -+ goto out_iomem; -+ } - - return 0; -+ -+out_iomem: -+ iounmap(smc->iomem_base); -+ -+out: -+ return -ENXIO; -+} -+ -+static void applesmc_free_resources(struct applesmc_device *smc) -+{ -+ if (smc->iomem_base_set) -+ iounmap(smc->iomem_base); -+ release_region(smc->port_base, APPLESMC_NR_PORTS); - } - - /* Synchronize device with memorized backlight state */ - static int applesmc_pm_resume(struct device *dev) - { -- if (smcreg.has_key_backlight) -- applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2); -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ -+ if (smc->reg.has_key_backlight) -+ applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2); -+ - return 0; - } - - /* Reinitialize device on resume from hibernation */ - static int applesmc_pm_restore(struct device *dev) - { -- applesmc_device_init(); -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ -+ applesmc_device_init(smc); -+ - return applesmc_pm_resume(dev); - } - -+static const struct acpi_device_id applesmc_ids[] = { -+ {"APP0001", 0}, -+ {"", 0}, -+}; -+ - static const struct dev_pm_ops applesmc_pm_ops = { - .resume = applesmc_pm_resume, - .restore = applesmc_pm_restore, - }; - --static struct platform_driver applesmc_driver = { -- .probe = applesmc_probe, -- .driver = { -- .name = "applesmc", -- .pm = &applesmc_pm_ops, -+static struct acpi_driver applesmc_driver = { -+ .name = "applesmc", -+ .class = "applesmc", -+ .ids = applesmc_ids, -+ .ops = { -+ .add = applesmc_add, -+ .remove = applesmc_remove -+ }, -+ .drv = { -+ .pm = &applesmc_pm_ops - }, - }; - -@@ -714,25 +1149,26 @@ static struct platform_driver applesmc_driver = { - * applesmc_calibrate - Set our "resting" values. Callers must - * hold applesmc_lock. - */ --static void applesmc_calibrate(void) -+static void applesmc_calibrate(struct applesmc_device *smc) - { -- applesmc_read_s16(MOTION_SENSOR_X_KEY, &rest_x); -- applesmc_read_s16(MOTION_SENSOR_Y_KEY, &rest_y); -- rest_x = -rest_x; -+ applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &smc->rest_x); -+ applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &smc->rest_y); -+ smc->rest_x = -smc->rest_x; - } - - static void applesmc_idev_poll(struct input_dev *idev) - { -+ struct applesmc_device *smc = dev_get_drvdata(&idev->dev); - s16 x, y; - -- if (applesmc_read_s16(MOTION_SENSOR_X_KEY, &x)) -+ if (applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x)) - return; -- if (applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y)) -+ if (applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y)) - return; - - x = -x; -- input_report_abs(idev, ABS_X, x - rest_x); -- input_report_abs(idev, ABS_Y, y - rest_y); -+ input_report_abs(idev, ABS_X, x - smc->rest_x); -+ input_report_abs(idev, ABS_Y, y - smc->rest_y); - input_sync(idev); - } - -@@ -747,16 +1183,17 @@ static ssize_t applesmc_name_show(struct device *dev, - static ssize_t applesmc_position_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - int ret; - s16 x, y, z; - -- ret = applesmc_read_s16(MOTION_SENSOR_X_KEY, &x); -+ ret = applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x); - if (ret) - goto out; -- ret = applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y); -+ ret = applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y); - if (ret) - goto out; -- ret = applesmc_read_s16(MOTION_SENSOR_Z_KEY, &z); -+ ret = applesmc_read_s16(smc, MOTION_SENSOR_Z_KEY, &z); - if (ret) - goto out; - -@@ -770,6 +1207,7 @@ static ssize_t applesmc_position_show(struct device *dev, - static ssize_t applesmc_light_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - const struct applesmc_entry *entry; - static int data_length; - int ret; -@@ -777,7 +1215,7 @@ static ssize_t applesmc_light_show(struct device *dev, - u8 buffer[10]; - - if (!data_length) { -- entry = applesmc_get_entry_by_key(LIGHT_SENSOR_LEFT_KEY); -+ entry = applesmc_get_entry_by_key(smc, LIGHT_SENSOR_LEFT_KEY); - if (IS_ERR(entry)) - return PTR_ERR(entry); - if (entry->len > 10) -@@ -786,7 +1224,7 @@ static ssize_t applesmc_light_show(struct device *dev, - pr_info("light sensor data length set to %d\n", data_length); - } - -- ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length); -+ ret = applesmc_read_key(smc, LIGHT_SENSOR_LEFT_KEY, buffer, data_length); - if (ret) - goto out; - /* newer macbooks report a single 10-bit bigendian value */ -@@ -796,7 +1234,7 @@ static ssize_t applesmc_light_show(struct device *dev, - } - left = buffer[2]; - -- ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length); -+ ret = applesmc_read_key(smc, LIGHT_SENSOR_RIGHT_KEY, buffer, data_length); - if (ret) - goto out; - right = buffer[2]; -@@ -812,7 +1250,8 @@ static ssize_t applesmc_light_show(struct device *dev, - static ssize_t applesmc_show_sensor_label(struct device *dev, - struct device_attribute *devattr, char *sysfsbuf) - { -- const char *key = smcreg.index[to_index(devattr)]; -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ const char *key = smc->reg.index[to_index(devattr)]; - - return sysfs_emit(sysfsbuf, "%s\n", key); - } -@@ -821,12 +1260,13 @@ static ssize_t applesmc_show_sensor_label(struct device *dev, - static ssize_t applesmc_show_temperature(struct device *dev, - struct device_attribute *devattr, char *sysfsbuf) - { -- const char *key = smcreg.index[to_index(devattr)]; -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ const char *key = smc->reg.index[to_index(devattr)]; - int ret; - s16 value; - int temp; - -- ret = applesmc_read_s16(key, &value); -+ ret = applesmc_read_s16(smc, key, &value); - if (ret) - return ret; - -@@ -838,6 +1278,8 @@ static ssize_t applesmc_show_temperature(struct device *dev, - static ssize_t applesmc_show_fan_speed(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ const struct applesmc_entry *entry; - int ret; - unsigned int speed = 0; - char newkey[5]; -@@ -846,11 +1288,21 @@ static ssize_t applesmc_show_fan_speed(struct device *dev, - scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)], - to_index(attr)); - -- ret = applesmc_read_key(newkey, buffer, 2); -+ entry = applesmc_get_entry_by_key(smc, newkey); -+ if (IS_ERR(entry)) -+ return PTR_ERR(entry); -+ -+ if (!strcmp(entry->type, FLOAT_TYPE)) { -+ ret = applesmc_read_entry(smc, entry, (u8 *) &speed, 4); -+ speed = applesmc_float_to_u32(speed); -+ } else { -+ ret = applesmc_read_entry(smc, entry, buffer, 2); -+ speed = ((buffer[0] << 8 | buffer[1]) >> 2); -+ } -+ - if (ret) - return ret; - -- speed = ((buffer[0] << 8 | buffer[1]) >> 2); - return sysfs_emit(sysfsbuf, "%u\n", speed); - } - -@@ -858,6 +1310,8 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, - struct device_attribute *attr, - const char *sysfsbuf, size_t count) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ const struct applesmc_entry *entry; - int ret; - unsigned long speed; - char newkey[5]; -@@ -869,9 +1323,18 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, - scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)], - to_index(attr)); - -- buffer[0] = (speed >> 6) & 0xff; -- buffer[1] = (speed << 2) & 0xff; -- ret = applesmc_write_key(newkey, buffer, 2); -+ entry = applesmc_get_entry_by_key(smc, newkey); -+ if (IS_ERR(entry)) -+ return PTR_ERR(entry); -+ -+ if (!strcmp(entry->type, FLOAT_TYPE)) { -+ speed = applesmc_u32_to_float(speed); -+ ret = applesmc_write_entry(smc, entry, (u8 *) &speed, 4); -+ } else { -+ buffer[0] = (speed >> 6) & 0xff; -+ buffer[1] = (speed << 2) & 0xff; -+ ret = applesmc_write_key(smc, newkey, buffer, 2); -+ } - - if (ret) - return ret; -@@ -882,15 +1345,30 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, - static ssize_t applesmc_show_fan_manual(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - int ret; - u16 manual = 0; - u8 buffer[2]; -+ char newkey[5]; -+ bool has_newkey = false; -+ -+ scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr)); -+ -+ ret = applesmc_has_key(smc, newkey, &has_newkey); -+ if (ret) -+ return ret; -+ -+ if (has_newkey) { -+ ret = applesmc_read_key(smc, newkey, buffer, 1); -+ manual = buffer[0]; -+ } else { -+ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); -+ manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01; -+ } - -- ret = applesmc_read_key(FANS_MANUAL, buffer, 2); - if (ret) - return ret; - -- manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01; - return sysfs_emit(sysfsbuf, "%d\n", manual); - } - -@@ -898,29 +1376,42 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, - struct device_attribute *attr, - const char *sysfsbuf, size_t count) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - int ret; - u8 buffer[2]; -+ char newkey[5]; -+ bool has_newkey = false; - unsigned long input; - u16 val; - - if (kstrtoul(sysfsbuf, 10, &input) < 0) - return -EINVAL; - -- ret = applesmc_read_key(FANS_MANUAL, buffer, 2); -+ scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr)); -+ -+ ret = applesmc_has_key(smc, newkey, &has_newkey); - if (ret) -- goto out; -+ return ret; - -- val = (buffer[0] << 8 | buffer[1]); -+ if (has_newkey) { -+ buffer[0] = input & 1; -+ ret = applesmc_write_key(smc, newkey, buffer, 1); -+ } else { -+ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); -+ val = (buffer[0] << 8 | buffer[1]); -+ if (ret) -+ goto out; - -- if (input) -- val = val | (0x01 << to_index(attr)); -- else -- val = val & ~(0x01 << to_index(attr)); -+ if (input) -+ val = val | (0x01 << to_index(attr)); -+ else -+ val = val & ~(0x01 << to_index(attr)); - -- buffer[0] = (val >> 8) & 0xFF; -- buffer[1] = val & 0xFF; -+ buffer[0] = (val >> 8) & 0xFF; -+ buffer[1] = val & 0xFF; - -- ret = applesmc_write_key(FANS_MANUAL, buffer, 2); -+ ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2); -+ } - - out: - if (ret) -@@ -932,13 +1423,14 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, - static ssize_t applesmc_show_fan_position(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - int ret; - char newkey[5]; - u8 buffer[17]; - - scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr)); - -- ret = applesmc_read_key(newkey, buffer, 16); -+ ret = applesmc_read_key(smc, newkey, buffer, 16); - buffer[16] = 0; - - if (ret) -@@ -950,43 +1442,79 @@ static ssize_t applesmc_show_fan_position(struct device *dev, - static ssize_t applesmc_calibrate_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -- return sysfs_emit(sysfsbuf, "(%d,%d)\n", rest_x, rest_y); -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ -+ return sysfs_emit(sysfsbuf, "(%d,%d)\n", smc->rest_x, smc->rest_y); - } - - static ssize_t applesmc_calibrate_store(struct device *dev, - struct device_attribute *attr, const char *sysfsbuf, size_t count) - { -- applesmc_calibrate(); -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ -+ applesmc_calibrate(smc); - - return count; - } - - static void applesmc_backlight_set(struct work_struct *work) - { -- applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2); -+ struct applesmc_device *smc = container_of(work, struct applesmc_device, backlight_work); -+ -+ applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2); - } --static DECLARE_WORK(backlight_work, &applesmc_backlight_set); - - static void applesmc_brightness_set(struct led_classdev *led_cdev, - enum led_brightness value) - { -+ struct applesmc_device *smc = dev_get_drvdata(led_cdev->dev); - int ret; - -- backlight_state[0] = value; -- ret = queue_work(applesmc_led_wq, &backlight_work); -+ smc->backlight_state[0] = value; -+ ret = queue_work(smc->backlight_wq, &smc->backlight_work); - - if (debug && (!ret)) - dev_dbg(led_cdev->dev, "work was already on the queue.\n"); - } - -+static ssize_t applesmc_BCLM_store(struct device *dev, -+ struct device_attribute *attr, char *sysfsbuf, size_t count) -+{ -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ u8 val; -+ -+ if (kstrtou8(sysfsbuf, 10, &val) < 0) -+ return -EINVAL; -+ -+ if (val < 0 || val > 100) -+ return -EINVAL; -+ -+ if (applesmc_write_key(smc, "BCLM", &val, 1)) -+ return -ENODEV; -+ return count; -+} -+ -+static ssize_t applesmc_BCLM_show(struct device *dev, -+ struct device_attribute *attr, char *sysfsbuf) -+{ -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ u8 val; -+ -+ if (applesmc_read_key(smc, "BCLM", &val, 1)) -+ return -ENODEV; -+ -+ return sysfs_emit(sysfsbuf, "%d\n", val); -+} -+ - static ssize_t applesmc_key_count_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - int ret; - u8 buffer[4]; - u32 count; - -- ret = applesmc_read_key(KEY_COUNT_KEY, buffer, 4); -+ ret = applesmc_read_key(smc, KEY_COUNT_KEY, buffer, 4); - if (ret) - return ret; - -@@ -998,13 +1526,14 @@ static ssize_t applesmc_key_count_show(struct device *dev, - static ssize_t applesmc_key_at_index_read_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - const struct applesmc_entry *entry; - int ret; - -- entry = applesmc_get_entry_by_index(key_at_index); -+ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); - if (IS_ERR(entry)) - return PTR_ERR(entry); -- ret = applesmc_read_entry(entry, sysfsbuf, entry->len); -+ ret = applesmc_read_entry(smc, entry, sysfsbuf, entry->len); - if (ret) - return ret; - -@@ -1014,9 +1543,10 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev, - static ssize_t applesmc_key_at_index_data_length_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - const struct applesmc_entry *entry; - -- entry = applesmc_get_entry_by_index(key_at_index); -+ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); - if (IS_ERR(entry)) - return PTR_ERR(entry); - -@@ -1026,9 +1556,10 @@ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev, - static ssize_t applesmc_key_at_index_type_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - const struct applesmc_entry *entry; - -- entry = applesmc_get_entry_by_index(key_at_index); -+ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); - if (IS_ERR(entry)) - return PTR_ERR(entry); - -@@ -1038,9 +1569,10 @@ static ssize_t applesmc_key_at_index_type_show(struct device *dev, - static ssize_t applesmc_key_at_index_name_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - const struct applesmc_entry *entry; - -- entry = applesmc_get_entry_by_index(key_at_index); -+ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); - if (IS_ERR(entry)) - return PTR_ERR(entry); - -@@ -1050,28 +1582,25 @@ static ssize_t applesmc_key_at_index_name_show(struct device *dev, - static ssize_t applesmc_key_at_index_show(struct device *dev, - struct device_attribute *attr, char *sysfsbuf) - { -- return sysfs_emit(sysfsbuf, "%d\n", key_at_index); -+ struct applesmc_device *smc = dev_get_drvdata(dev); -+ -+ return sysfs_emit(sysfsbuf, "%d\n", smc->key_at_index); - } - - static ssize_t applesmc_key_at_index_store(struct device *dev, - struct device_attribute *attr, const char *sysfsbuf, size_t count) - { -+ struct applesmc_device *smc = dev_get_drvdata(dev); - unsigned long newkey; - - if (kstrtoul(sysfsbuf, 10, &newkey) < 0 -- || newkey >= smcreg.key_count) -+ || newkey >= smc->reg.key_count) - return -EINVAL; - -- key_at_index = newkey; -+ smc->key_at_index = newkey; - return count; - } - --static struct led_classdev applesmc_backlight = { -- .name = "smc::kbd_backlight", -- .default_trigger = "nand-disk", -- .brightness_set = applesmc_brightness_set, --}; -- - static struct applesmc_node_group info_group[] = { - { "name", applesmc_name_show }, - { "key_count", applesmc_key_count_show }, -@@ -1111,19 +1640,25 @@ static struct applesmc_node_group temp_group[] = { - { } - }; - -+static struct applesmc_node_group BCLM_group[] = { -+ { "battery_charge_limit", applesmc_BCLM_show, applesmc_BCLM_store }, -+ { } -+}; -+ - /* Module stuff */ - - /* - * applesmc_destroy_nodes - remove files and free associated memory - */ --static void applesmc_destroy_nodes(struct applesmc_node_group *groups) -+static void applesmc_destroy_nodes(struct applesmc_device *smc, -+ struct applesmc_node_group *groups) - { - struct applesmc_node_group *grp; - struct applesmc_dev_attr *node; - - for (grp = groups; grp->nodes; grp++) { - for (node = grp->nodes; node->sda.dev_attr.attr.name; node++) -- sysfs_remove_file(&pdev->dev.kobj, -+ sysfs_remove_file(&smc->dev->dev.kobj, - &node->sda.dev_attr.attr); - kfree(grp->nodes); - grp->nodes = NULL; -@@ -1133,7 +1668,8 @@ static void applesmc_destroy_nodes(struct applesmc_node_group *groups) - /* - * applesmc_create_nodes - create a two-dimensional group of sysfs files - */ --static int applesmc_create_nodes(struct applesmc_node_group *groups, int num) -+static int applesmc_create_nodes(struct applesmc_device *smc, -+ struct applesmc_node_group *groups, int num) - { - struct applesmc_node_group *grp; - struct applesmc_dev_attr *node; -@@ -1157,7 +1693,7 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num) - sysfs_attr_init(attr); - attr->name = node->name; - attr->mode = 0444 | (grp->store ? 0200 : 0); -- ret = sysfs_create_file(&pdev->dev.kobj, attr); -+ ret = sysfs_create_file(&smc->dev->dev.kobj, attr); - if (ret) { - attr->name = NULL; - goto out; -@@ -1167,57 +1703,56 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num) - - return 0; - out: -- applesmc_destroy_nodes(groups); -+ applesmc_destroy_nodes(smc, groups); - return ret; - } - - /* Create accelerometer resources */ --static int applesmc_create_accelerometer(void) -+static int applesmc_create_accelerometer(struct applesmc_device *smc) - { - int ret; -- -- if (!smcreg.has_accelerometer) -+ if (!smc->reg.has_accelerometer) - return 0; - -- ret = applesmc_create_nodes(accelerometer_group, 1); -+ ret = applesmc_create_nodes(smc, accelerometer_group, 1); - if (ret) - goto out; - -- applesmc_idev = input_allocate_device(); -- if (!applesmc_idev) { -+ smc->idev = input_allocate_device(); -+ if (!smc->idev) { - ret = -ENOMEM; - goto out_sysfs; - } - - /* initial calibrate for the input device */ -- applesmc_calibrate(); -+ applesmc_calibrate(smc); - - /* initialize the input device */ -- applesmc_idev->name = "applesmc"; -- applesmc_idev->id.bustype = BUS_HOST; -- applesmc_idev->dev.parent = &pdev->dev; -- input_set_abs_params(applesmc_idev, ABS_X, -+ smc->idev->name = "applesmc"; -+ smc->idev->id.bustype = BUS_HOST; -+ smc->idev->dev.parent = &smc->dev->dev; -+ input_set_abs_params(smc->idev, ABS_X, - -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); -- input_set_abs_params(applesmc_idev, ABS_Y, -+ input_set_abs_params(smc->idev, ABS_Y, - -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); - -- ret = input_setup_polling(applesmc_idev, applesmc_idev_poll); -+ ret = input_setup_polling(smc->idev, applesmc_idev_poll); - if (ret) - goto out_idev; - -- input_set_poll_interval(applesmc_idev, APPLESMC_POLL_INTERVAL); -+ input_set_poll_interval(smc->idev, APPLESMC_POLL_INTERVAL); - -- ret = input_register_device(applesmc_idev); -+ ret = input_register_device(smc->idev); - if (ret) - goto out_idev; - - return 0; - - out_idev: -- input_free_device(applesmc_idev); -+ input_free_device(smc->idev); - - out_sysfs: -- applesmc_destroy_nodes(accelerometer_group); -+ applesmc_destroy_nodes(smc, accelerometer_group); - - out: - pr_warn("driver init failed (ret=%d)!\n", ret); -@@ -1225,44 +1760,55 @@ static int applesmc_create_accelerometer(void) - } - - /* Release all resources used by the accelerometer */ --static void applesmc_release_accelerometer(void) -+static void applesmc_release_accelerometer(struct applesmc_device *smc) - { -- if (!smcreg.has_accelerometer) -+ if (!smc->reg.has_accelerometer) - return; -- input_unregister_device(applesmc_idev); -- applesmc_destroy_nodes(accelerometer_group); -+ input_unregister_device(smc->idev); -+ applesmc_destroy_nodes(smc, accelerometer_group); - } - --static int applesmc_create_light_sensor(void) -+static int applesmc_create_light_sensor(struct applesmc_device *smc) - { -- if (!smcreg.num_light_sensors) -+ if (!smc->reg.num_light_sensors) - return 0; -- return applesmc_create_nodes(light_sensor_group, 1); -+ return applesmc_create_nodes(smc, light_sensor_group, 1); - } - --static void applesmc_release_light_sensor(void) -+static void applesmc_release_light_sensor(struct applesmc_device *smc) - { -- if (!smcreg.num_light_sensors) -+ if (!smc->reg.num_light_sensors) - return; -- applesmc_destroy_nodes(light_sensor_group); -+ applesmc_destroy_nodes(smc, light_sensor_group); - } - --static int applesmc_create_key_backlight(void) -+static int applesmc_create_key_backlight(struct applesmc_device *smc) - { -- if (!smcreg.has_key_backlight) -+ int ret; -+ -+ if (!smc->reg.has_key_backlight) - return 0; -- applesmc_led_wq = create_singlethread_workqueue("applesmc-led"); -- if (!applesmc_led_wq) -+ smc->backlight_wq = create_singlethread_workqueue("applesmc-led"); -+ if (!smc->backlight_wq) - return -ENOMEM; -- return led_classdev_register(&pdev->dev, &applesmc_backlight); -+ -+ INIT_WORK(&smc->backlight_work, applesmc_backlight_set); -+ smc->backlight_dev.name = "smc::kbd_backlight"; -+ smc->backlight_dev.default_trigger = "nand-disk"; -+ smc->backlight_dev.brightness_set = applesmc_brightness_set; -+ ret = led_classdev_register(&smc->dev->dev, &smc->backlight_dev); -+ if (ret) -+ destroy_workqueue(smc->backlight_wq); -+ -+ return ret; - } - --static void applesmc_release_key_backlight(void) -+static void applesmc_release_key_backlight(struct applesmc_device *smc) - { -- if (!smcreg.has_key_backlight) -+ if (!smc->reg.has_key_backlight) - return; -- led_classdev_unregister(&applesmc_backlight); -- destroy_workqueue(applesmc_led_wq); -+ led_classdev_unregister(&smc->backlight_dev); -+ destroy_workqueue(smc->backlight_wq); - } - - static int applesmc_dmi_match(const struct dmi_system_id *id) -@@ -1291,6 +1837,10 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = { - DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), - DMI_MATCH(DMI_PRODUCT_NAME, "Macmini") }, - }, -+ { applesmc_dmi_match, "Apple iMacPro", { -+ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "iMacPro") }, -+ }, - { applesmc_dmi_match, "Apple MacPro", { - DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), - DMI_MATCH(DMI_PRODUCT_NAME, "MacPro") }, -@@ -1306,90 +1856,91 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = { - { .ident = NULL } - }; - --static int __init applesmc_init(void) -+static int applesmc_create_modules(struct applesmc_device *smc) - { - int ret; - -- if (!dmi_check_system(applesmc_whitelist)) { -- pr_warn("supported laptop not found!\n"); -- ret = -ENODEV; -- goto out; -- } -- -- if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS, -- "applesmc")) { -- ret = -ENXIO; -- goto out; -- } -- -- ret = platform_driver_register(&applesmc_driver); -- if (ret) -- goto out_region; -- -- pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT, -- NULL, 0); -- if (IS_ERR(pdev)) { -- ret = PTR_ERR(pdev); -- goto out_driver; -- } -- -- /* create register cache */ -- ret = applesmc_init_smcreg(); -+ ret = applesmc_create_nodes(smc, info_group, 1); - if (ret) -- goto out_device; -- -- ret = applesmc_create_nodes(info_group, 1); -+ goto out; -+ ret = applesmc_create_nodes(smc, BCLM_group, 1); - if (ret) -- goto out_smcreg; -+ goto out_info; - -- ret = applesmc_create_nodes(fan_group, smcreg.fan_count); -+ ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count); - if (ret) -- goto out_info; -+ goto out_bclm; - -- ret = applesmc_create_nodes(temp_group, smcreg.index_count); -+ ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count); - if (ret) - goto out_fans; - -- ret = applesmc_create_accelerometer(); -+ ret = applesmc_create_accelerometer(smc); - if (ret) - goto out_temperature; - -- ret = applesmc_create_light_sensor(); -+ ret = applesmc_create_light_sensor(smc); - if (ret) - goto out_accelerometer; - -- ret = applesmc_create_key_backlight(); -+ ret = applesmc_create_key_backlight(smc); - if (ret) - goto out_light_sysfs; - -- hwmon_dev = hwmon_device_register(&pdev->dev); -- if (IS_ERR(hwmon_dev)) { -- ret = PTR_ERR(hwmon_dev); -+ smc->hwmon_dev = hwmon_device_register(&smc->dev->dev); -+ if (IS_ERR(smc->hwmon_dev)) { -+ ret = PTR_ERR(smc->hwmon_dev); - goto out_light_ledclass; - } - - return 0; - - out_light_ledclass: -- applesmc_release_key_backlight(); -+ applesmc_release_key_backlight(smc); - out_light_sysfs: -- applesmc_release_light_sensor(); -+ applesmc_release_light_sensor(smc); - out_accelerometer: -- applesmc_release_accelerometer(); -+ applesmc_release_accelerometer(smc); - out_temperature: -- applesmc_destroy_nodes(temp_group); -+ applesmc_destroy_nodes(smc, temp_group); - out_fans: -- applesmc_destroy_nodes(fan_group); -+ applesmc_destroy_nodes(smc, fan_group); -+out_bclm: -+ applesmc_destroy_nodes(smc, BCLM_group); - out_info: -- applesmc_destroy_nodes(info_group); --out_smcreg: -- applesmc_destroy_smcreg(); --out_device: -- platform_device_unregister(pdev); --out_driver: -- platform_driver_unregister(&applesmc_driver); --out_region: -- release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS); -+ applesmc_destroy_nodes(smc, info_group); -+out: -+ return ret; -+} -+ -+static void applesmc_destroy_modules(struct applesmc_device *smc) -+{ -+ hwmon_device_unregister(smc->hwmon_dev); -+ applesmc_release_key_backlight(smc); -+ applesmc_release_light_sensor(smc); -+ applesmc_release_accelerometer(smc); -+ applesmc_destroy_nodes(smc, temp_group); -+ applesmc_destroy_nodes(smc, fan_group); -+ applesmc_destroy_nodes(smc, BCLM_group); -+ applesmc_destroy_nodes(smc, info_group); -+} -+ -+static int __init applesmc_init(void) -+{ -+ int ret; -+ -+ if (!dmi_check_system(applesmc_whitelist)) { -+ pr_warn("supported laptop not found!\n"); -+ ret = -ENODEV; -+ goto out; -+ } -+ -+ ret = acpi_bus_register_driver(&applesmc_driver); -+ if (ret) -+ goto out; -+ -+ return 0; -+ - out: - pr_warn("driver init failed (ret=%d)!\n", ret); - return ret; -@@ -1397,23 +1948,14 @@ static int __init applesmc_init(void) - - static void __exit applesmc_exit(void) - { -- hwmon_device_unregister(hwmon_dev); -- applesmc_release_key_backlight(); -- applesmc_release_light_sensor(); -- applesmc_release_accelerometer(); -- applesmc_destroy_nodes(temp_group); -- applesmc_destroy_nodes(fan_group); -- applesmc_destroy_nodes(info_group); -- applesmc_destroy_smcreg(); -- platform_device_unregister(pdev); -- platform_driver_unregister(&applesmc_driver); -- release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS); -+ acpi_bus_unregister_driver(&applesmc_driver); - } - - module_init(applesmc_init); - module_exit(applesmc_exit); - - MODULE_AUTHOR("Nicolas Boichat"); -+MODULE_AUTHOR("Paul Pawlowski"); - MODULE_DESCRIPTION("Apple SMC"); - MODULE_LICENSE("GPL v2"); - MODULE_DEVICE_TABLE(dmi, applesmc_whitelist); -diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c -index ca150618d32f..4e692b272ae9 100644 ---- a/drivers/input/mouse/bcm5974.c -+++ b/drivers/input/mouse/bcm5974.c -@@ -83,6 +83,24 @@ - #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273 - #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274 - -+/* T2-Attached Devices */ -+/* MacbookAir8,1 (2018) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K 0x027a -+/* MacbookPro15,2 (2018) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132 0x027b -+/* MacbookPro15,1 (2018) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680 0x027c -+/* MacbookPro15,4 (2019) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213 0x027d -+/* MacbookPro16,2 (2020) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K 0x027e -+/* MacbookPro16,3 (2020) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223 0x027f -+/* MacbookAir9,1 (2020) */ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K 0x0280 -+/* MacbookPro16,1 (2019)*/ -+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F 0x0340 -+ - #define BCM5974_DEVICE(prod) { \ - .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ - USB_DEVICE_ID_MATCH_INT_CLASS | \ -@@ -147,6 +165,22 @@ static const struct usb_device_id bcm5974_table[] = { - BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI), - BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO), - BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS), -+ /* MacbookAir8,1 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K), -+ /* MacbookPro15,2 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132), -+ /* MacbookPro15,1 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680), -+ /* MacbookPro15,4 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213), -+ /* MacbookPro16,2 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K), -+ /* MacbookPro16,3 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223), -+ /* MacbookAir9,1 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K), -+ /* MacbookPro16,1 */ -+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F), - /* Terminating entry */ - {} - }; -@@ -483,6 +517,110 @@ static const struct bcm5974_config bcm5974_config_table[] = { - { SN_COORD, -203, 6803 }, - { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } - }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -6243, 6749 }, -+ { SN_COORD, -170, 7685 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -6243, 6749 }, -+ { SN_COORD, -170, 7685 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -7456, 7976 }, -+ { SN_COORD, -1768, 7685 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -6243, 6749 }, -+ { SN_COORD, -170, 7685 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -7823, 8329 }, -+ { SN_COORD, -370, 7925 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -6243, 6749 }, -+ { SN_COORD, -170, 7685 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -6243, 6749 }, -+ { SN_COORD, -170, 7685 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, -+ { -+ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F, -+ 0, -+ 0, -+ HAS_INTEGRATED_BUTTON, -+ 0, sizeof(struct bt_data), -+ 0x83, DATAFORMAT(TYPE4), -+ { SN_PRESSURE, 0, 300 }, -+ { SN_WIDTH, 0, 2048 }, -+ { SN_COORD, -8916, 9918 }, -+ { SN_COORD, -1934, 9835 }, -+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } -+ }, - {} - }; - -diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c -index 78748e8d2dba..2b2b558cebe6 100644 ---- a/drivers/pci/vgaarb.c -+++ b/drivers/pci/vgaarb.c -@@ -143,6 +143,7 @@ void vga_set_default_device(struct pci_dev *pdev) - pci_dev_put(vga_default); - vga_default = pci_dev_get(pdev); - } -+EXPORT_SYMBOL_GPL(vga_set_default_device); - - /** - * vga_remove_vgacon - deactivate VGA console -diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c -index 1417e230edbd..e69785af8e1d 100644 ---- a/drivers/platform/x86/apple-gmux.c -+++ b/drivers/platform/x86/apple-gmux.c -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -107,6 +108,10 @@ struct apple_gmux_config { - - # define MMIO_GMUX_MAX_BRIGHTNESS 0xffff - -+static bool force_igd; -+module_param(force_igd, bool, 0); -+MODULE_PARM_DESC(force_idg, "Switch gpu to igd on module load. Make sure that you have apple-set-os set up and the iGPU is in `lspci -s 00:02.0`. (default: false) (bool)"); -+ - static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port) - { - return inb(gmux_data->iostart + port); -@@ -945,6 +950,19 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) - gmux_enable_interrupts(gmux_data); - gmux_read_switch_state(gmux_data); - -+ if (force_igd) { -+ struct pci_dev *pdev; -+ -+ pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(2, 0)); -+ if (pdev) { -+ pr_info("Switching to IGD"); -+ gmux_switchto(VGA_SWITCHEROO_IGD); -+ vga_set_default_device(pdev); -+ } else { -+ pr_err("force_idg is true, but couldn't find iGPU at 00:02.0! Is apple-set-os working?"); -+ } -+ } -+ - /* - * Retina MacBook Pros cannot switch the panel's AUX separately - * and need eDP pre-calibration. They are distinguishable from -diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig -index db4a392841b1..580df4ce4f9f 100644 ---- a/drivers/staging/Kconfig -+++ b/drivers/staging/Kconfig -@@ -66,4 +66,6 @@ source "drivers/staging/fieldbus/Kconfig" - - source "drivers/staging/vme_user/Kconfig" - -+source "drivers/staging/apple-bce/Kconfig" -+ - endif # STAGING -diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile -index 5390879b5d1b..528be2d3b546 100644 ---- a/drivers/staging/Makefile -+++ b/drivers/staging/Makefile -@@ -22,3 +22,4 @@ obj-$(CONFIG_GREYBUS) += greybus/ - obj-$(CONFIG_BCM2835_VCHIQ) += vc04_services/ - obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ - obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ -+obj-$(CONFIG_APPLE_BCE) += apple-bce/ -diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig -new file mode 100644 -index 000000000000..fe92bc441e89 ---- /dev/null -+++ b/drivers/staging/apple-bce/Kconfig -@@ -0,0 +1,18 @@ -+config APPLE_BCE -+ tristate "Apple BCE driver (VHCI and Audio support)" -+ default m -+ depends on X86 -+ select SOUND -+ select SND -+ select SND_PCM -+ select SND_JACK -+ help -+ VHCI and audio support on Apple MacBooks with the T2 Chip. -+ This driver is divided in three components: -+ - BCE (Buffer Copy Engine): which establishes a basic communication -+ channel with the T2 chip. This component is required by the other two: -+ - VHCI (Virtual Host Controller Interface): Access to keyboard, mouse -+ and other system devices depend on this virtual USB host controller -+ - Audio: a driver for the T2 audio interface. -+ -+ If "M" is selected, the module will be called apple-bce.' -diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile -new file mode 100644 -index 000000000000..8cfbd3f64af6 ---- /dev/null -+++ b/drivers/staging/apple-bce/Makefile -@@ -0,0 +1,28 @@ -+modname := apple-bce -+obj-$(CONFIG_APPLE_BCE) += $(modname).o -+ -+apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o -+ -+MY_CFLAGS += -DWITHOUT_NVME_PATCH -+#MY_CFLAGS += -g -DDEBUG -+ccflags-y += ${MY_CFLAGS} -+CC += ${MY_CFLAGS} -+ -+KVERSION := $(KERNELRELEASE) -+ifeq ($(origin KERNELRELEASE), undefined) -+KVERSION := $(shell uname -r) -+endif -+ -+KDIR := /lib/modules/$(KVERSION)/build -+PWD := $(shell pwd) -+ -+.PHONY: all -+ -+all: -+ $(MAKE) -C $(KDIR) M=$(PWD) modules -+ -+clean: -+ $(MAKE) -C $(KDIR) M=$(PWD) clean -+ -+install: -+ $(MAKE) -C $(KDIR) M=$(PWD) modules_install -diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c -new file mode 100644 -index 000000000000..5e2f2f3b973c ---- /dev/null -+++ b/drivers/staging/apple-bce/apple_bce.c -@@ -0,0 +1,444 @@ -+#include "apple_bce.h" -+#include -+#include -+#include "audio/audio.h" -+#include -+ -+static dev_t bce_chrdev; -+static struct class *bce_class; -+ -+struct apple_bce_device *global_bce; -+ -+static int bce_create_command_queues(struct apple_bce_device *bce); -+static void bce_free_command_queues(struct apple_bce_device *bce); -+static irqreturn_t bce_handle_mb_irq(int irq, void *dev); -+static irqreturn_t bce_handle_dma_irq(int irq, void *dev); -+static int bce_fw_version_handshake(struct apple_bce_device *bce); -+static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq); -+ -+static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id) -+{ -+ struct apple_bce_device *bce = NULL; -+ int status = 0; -+ int nvec; -+ -+ pr_info("apple-bce: capturing our device\n"); -+ -+ if (pci_enable_device(dev)) -+ return -ENODEV; -+ if (pci_request_regions(dev, "apple-bce")) { -+ status = -ENODEV; -+ goto fail; -+ } -+ pci_set_master(dev); -+ nvec = pci_alloc_irq_vectors(dev, 1, 8, PCI_IRQ_MSI); -+ if (nvec < 5) { -+ status = -EINVAL; -+ goto fail; -+ } -+ -+ bce = kzalloc(sizeof(struct apple_bce_device), GFP_KERNEL); -+ if (!bce) { -+ status = -ENOMEM; -+ goto fail; -+ } -+ -+ bce->pci = dev; -+ pci_set_drvdata(dev, bce); -+ -+ bce->devt = bce_chrdev; -+ bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce"); -+ if (IS_ERR_OR_NULL(bce->dev)) { -+ status = PTR_ERR(bce_class); -+ goto fail; -+ } -+ -+ bce->reg_mem_mb = pci_iomap(dev, 4, 0); -+ bce->reg_mem_dma = pci_iomap(dev, 2, 0); -+ -+ if (IS_ERR_OR_NULL(bce->reg_mem_mb) || IS_ERR_OR_NULL(bce->reg_mem_dma)) { -+ dev_warn(&dev->dev, "apple-bce: Failed to pci_iomap required regions\n"); -+ goto fail; -+ } -+ -+ bce_mailbox_init(&bce->mbox, bce->reg_mem_mb); -+ bce_timestamp_init(&bce->timestamp, bce->reg_mem_mb); -+ -+ spin_lock_init(&bce->queues_lock); -+ ida_init(&bce->queue_ida); -+ -+ if ((status = pci_request_irq(dev, 0, bce_handle_mb_irq, NULL, dev, "bce_mbox"))) -+ goto fail; -+ if ((status = pci_request_irq(dev, 4, NULL, bce_handle_dma_irq, dev, "bce_dma"))) -+ goto fail_interrupt_0; -+ -+ if ((status = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(37)))) { -+ dev_warn(&dev->dev, "dma: Setting mask failed\n"); -+ goto fail_interrupt; -+ } -+ -+ /* Gets the function 0's interface. This is needed because Apple only accepts DMA on our function if function 0 -+ is a bus master, so we need to work around this. */ -+ bce->pci0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); -+#ifndef WITHOUT_NVME_PATCH -+ if ((status = pci_enable_device_mem(bce->pci0))) { -+ dev_warn(&dev->dev, "apple-bce: failed to enable function 0\n"); -+ goto fail_dev0; -+ } -+#endif -+ pci_set_master(bce->pci0); -+ -+ bce_timestamp_start(&bce->timestamp, true); -+ -+ if ((status = bce_fw_version_handshake(bce))) -+ goto fail_ts; -+ pr_info("apple-bce: handshake done\n"); -+ -+ if ((status = bce_create_command_queues(bce))) { -+ pr_info("apple-bce: Creating command queues failed\n"); -+ goto fail_ts; -+ } -+ -+ global_bce = bce; -+ -+ bce_vhci_create(bce, &bce->vhci); -+ -+ return 0; -+ -+fail_ts: -+ bce_timestamp_stop(&bce->timestamp); -+#ifndef WITHOUT_NVME_PATCH -+ pci_disable_device(bce->pci0); -+fail_dev0: -+#endif -+ pci_dev_put(bce->pci0); -+fail_interrupt: -+ pci_free_irq(dev, 4, dev); -+fail_interrupt_0: -+ pci_free_irq(dev, 0, dev); -+fail: -+ if (bce && bce->dev) { -+ device_destroy(bce_class, bce->devt); -+ -+ if (!IS_ERR_OR_NULL(bce->reg_mem_mb)) -+ pci_iounmap(dev, bce->reg_mem_mb); -+ if (!IS_ERR_OR_NULL(bce->reg_mem_dma)) -+ pci_iounmap(dev, bce->reg_mem_dma); -+ -+ kfree(bce); -+ } -+ -+ pci_free_irq_vectors(dev); -+ pci_release_regions(dev); -+ pci_disable_device(dev); -+ -+ if (!status) -+ status = -EINVAL; -+ return status; -+} -+ -+static int bce_create_command_queues(struct apple_bce_device *bce) -+{ -+ int status; -+ struct bce_queue_memcfg *cfg; -+ -+ bce->cmd_cq = bce_alloc_cq(bce, 0, 0x20); -+ bce->cmd_cmdq = bce_alloc_cmdq(bce, 1, 0x20); -+ if (bce->cmd_cq == NULL || bce->cmd_cmdq == NULL) { -+ status = -ENOMEM; -+ goto err; -+ } -+ bce->queues[0] = (struct bce_queue *) bce->cmd_cq; -+ bce->queues[1] = (struct bce_queue *) bce->cmd_cmdq->sq; -+ -+ cfg = kzalloc(sizeof(struct bce_queue_memcfg), GFP_KERNEL); -+ if (!cfg) { -+ status = -ENOMEM; -+ goto err; -+ } -+ bce_get_cq_memcfg(bce->cmd_cq, cfg); -+ if ((status = bce_register_command_queue(bce, cfg, false))) -+ goto err; -+ bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg); -+ if ((status = bce_register_command_queue(bce, cfg, true))) -+ goto err; -+ kfree(cfg); -+ -+ return 0; -+ -+err: -+ if (bce->cmd_cq) -+ bce_free_cq(bce, bce->cmd_cq); -+ if (bce->cmd_cmdq) -+ bce_free_cmdq(bce, bce->cmd_cmdq); -+ return status; -+} -+ -+static void bce_free_command_queues(struct apple_bce_device *bce) -+{ -+ bce_free_cq(bce, bce->cmd_cq); -+ bce_free_cmdq(bce, bce->cmd_cmdq); -+ bce->cmd_cq = NULL; -+ bce->queues[0] = NULL; -+} -+ -+static irqreturn_t bce_handle_mb_irq(int irq, void *dev) -+{ -+ struct apple_bce_device *bce = pci_get_drvdata(dev); -+ bce_mailbox_handle_interrupt(&bce->mbox); -+ return IRQ_HANDLED; -+} -+ -+static irqreturn_t bce_handle_dma_irq(int irq, void *dev) -+{ -+ int i; -+ struct apple_bce_device *bce = pci_get_drvdata(dev); -+ spin_lock(&bce->queues_lock); -+ for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++) -+ if (bce->queues[i] && bce->queues[i]->type == BCE_QUEUE_CQ) -+ bce_handle_cq_completions(bce, (struct bce_queue_cq *) bce->queues[i]); -+ spin_unlock(&bce->queues_lock); -+ return IRQ_HANDLED; -+} -+ -+static int bce_fw_version_handshake(struct apple_bce_device *bce) -+{ -+ u64 result; -+ int status; -+ -+ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SET_FW_PROTOCOL_VERSION, BC_PROTOCOL_VERSION), -+ &result))) -+ return status; -+ if (BCE_MB_TYPE(result) != BCE_MB_SET_FW_PROTOCOL_VERSION || -+ BCE_MB_VALUE(result) != BC_PROTOCOL_VERSION) { -+ pr_err("apple-bce: FW version handshake failed %x:%llx\n", BCE_MB_TYPE(result), BCE_MB_VALUE(result)); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq) -+{ -+ int status; -+ int cmd_type; -+ u64 result; -+ // OS X uses an bidirectional direction, but that's not really needed -+ dma_addr_t a = dma_map_single(&bce->pci->dev, cfg, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE); -+ if (dma_mapping_error(&bce->pci->dev, a)) -+ return -ENOMEM; -+ cmd_type = is_sq ? BCE_MB_REGISTER_COMMAND_SQ : BCE_MB_REGISTER_COMMAND_CQ; -+ status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(cmd_type, a), &result); -+ dma_unmap_single(&bce->pci->dev, a, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE); -+ if (status) -+ return status; -+ if (BCE_MB_TYPE(result) != BCE_MB_REGISTER_COMMAND_QUEUE_REPLY) -+ return -EINVAL; -+ return 0; -+} -+ -+static void apple_bce_remove(struct pci_dev *dev) -+{ -+ struct apple_bce_device *bce = pci_get_drvdata(dev); -+ bce->is_being_removed = true; -+ -+ bce_vhci_destroy(&bce->vhci); -+ -+ bce_timestamp_stop(&bce->timestamp); -+#ifndef WITHOUT_NVME_PATCH -+ pci_disable_device(bce->pci0); -+#endif -+ pci_dev_put(bce->pci0); -+ pci_free_irq(dev, 0, dev); -+ pci_free_irq(dev, 4, dev); -+ bce_free_command_queues(bce); -+ pci_iounmap(dev, bce->reg_mem_mb); -+ pci_iounmap(dev, bce->reg_mem_dma); -+ device_destroy(bce_class, bce->devt); -+ pci_free_irq_vectors(dev); -+ pci_release_regions(dev); -+ pci_disable_device(dev); -+ kfree(bce); -+} -+ -+static int bce_save_state_and_sleep(struct apple_bce_device *bce) -+{ -+ int attempt, status = 0; -+ u64 resp; -+ dma_addr_t dma_addr; -+ void *dma_ptr = NULL; -+ size_t size = max(PAGE_SIZE, 4096UL); -+ -+ for (attempt = 0; attempt < 5; ++attempt) { -+ pr_debug("apple-bce: suspend: attempt %i, buffer size %li\n", attempt, size); -+ dma_ptr = dma_alloc_coherent(&bce->pci->dev, size, &dma_addr, GFP_KERNEL); -+ if (!dma_ptr) { -+ pr_err("apple-bce: suspend failed (data alloc failed)\n"); -+ break; -+ } -+ BUG_ON((dma_addr % 4096) != 0); -+ status = bce_mailbox_send(&bce->mbox, -+ BCE_MB_MSG(BCE_MB_SAVE_STATE_AND_SLEEP, (dma_addr & ~(4096LLU - 1)) | (size / 4096)), &resp); -+ if (status) { -+ pr_err("apple-bce: suspend failed (mailbox send)\n"); -+ break; -+ } -+ if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_RESTORE_STATE_COMPLETE) { -+ bce->saved_data_dma_addr = dma_addr; -+ bce->saved_data_dma_ptr = dma_ptr; -+ bce->saved_data_dma_size = size; -+ return 0; -+ } else if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE) { -+ dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr); -+ /* The 0x10ff magic value was extracted from Apple's driver */ -+ size = (BCE_MB_VALUE(resp) + 0x10ff) & ~(4096LLU - 1); -+ pr_debug("apple-bce: suspend: device requested a larger buffer (%li)\n", size); -+ continue; -+ } else { -+ pr_err("apple-bce: suspend failed (invalid device response)\n"); -+ status = -EINVAL; -+ break; -+ } -+ } -+ if (dma_ptr) -+ dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr); -+ if (!status) -+ return bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SLEEP_NO_STATE, 0), &resp); -+ return status; -+} -+ -+static int bce_restore_state_and_wake(struct apple_bce_device *bce) -+{ -+ int status; -+ u64 resp; -+ if (!bce->saved_data_dma_ptr) { -+ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_NO_STATE, 0), &resp))) { -+ pr_err("apple-bce: resume with no state failed (mailbox send)\n"); -+ return status; -+ } -+ if (BCE_MB_TYPE(resp) != BCE_MB_RESTORE_NO_STATE) { -+ pr_err("apple-bce: resume with no state failed (invalid device response)\n"); -+ return -EINVAL; -+ } -+ return 0; -+ } -+ -+ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_STATE_AND_WAKE, -+ (bce->saved_data_dma_addr & ~(4096LLU - 1)) | (bce->saved_data_dma_size / 4096)), &resp))) { -+ pr_err("apple-bce: resume with state failed (mailbox send)\n"); -+ goto finish_with_state; -+ } -+ if (BCE_MB_TYPE(resp) != BCE_MB_SAVE_RESTORE_STATE_COMPLETE) { -+ pr_err("apple-bce: resume with state failed (invalid device response)\n"); -+ status = -EINVAL; -+ goto finish_with_state; -+ } -+ -+finish_with_state: -+ dma_free_coherent(&bce->pci->dev, bce->saved_data_dma_size, bce->saved_data_dma_ptr, bce->saved_data_dma_addr); -+ bce->saved_data_dma_ptr = NULL; -+ return status; -+} -+ -+static int apple_bce_suspend(struct device *dev) -+{ -+ struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev)); -+ int status; -+ -+ bce_timestamp_stop(&bce->timestamp); -+ -+ if ((status = bce_save_state_and_sleep(bce))) -+ return status; -+ -+ return 0; -+} -+ -+static int apple_bce_resume(struct device *dev) -+{ -+ struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev)); -+ int status; -+ -+ pci_set_master(bce->pci); -+ pci_set_master(bce->pci0); -+ -+ if ((status = bce_restore_state_and_wake(bce))) -+ return status; -+ -+ bce_timestamp_start(&bce->timestamp, false); -+ -+ return 0; -+} -+ -+static struct pci_device_id apple_bce_ids[ ] = { -+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1801) }, -+ { 0, }, -+}; -+ -+struct dev_pm_ops apple_bce_pci_driver_pm = { -+ .suspend = apple_bce_suspend, -+ .resume = apple_bce_resume -+}; -+struct pci_driver apple_bce_pci_driver = { -+ .name = "apple-bce", -+ .id_table = apple_bce_ids, -+ .probe = apple_bce_probe, -+ .remove = apple_bce_remove, -+ .driver = { -+ .pm = &apple_bce_pci_driver_pm -+ } -+}; -+ -+ -+static int __init apple_bce_module_init(void) -+{ -+ int result; -+ if ((result = alloc_chrdev_region(&bce_chrdev, 0, 1, "apple-bce"))) -+ goto fail_chrdev; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0) -+ bce_class = class_create(THIS_MODULE, "apple-bce"); -+#else -+ bce_class = class_create("apple-bce"); -+#endif -+ if (IS_ERR(bce_class)) { -+ result = PTR_ERR(bce_class); -+ goto fail_class; -+ } -+ if ((result = bce_vhci_module_init())) { -+ pr_err("apple-bce: bce-vhci init failed"); -+ goto fail_class; -+ } -+ -+ result = pci_register_driver(&apple_bce_pci_driver); -+ if (result) -+ goto fail_drv; -+ -+ aaudio_module_init(); -+ -+ return 0; -+ -+fail_drv: -+ pci_unregister_driver(&apple_bce_pci_driver); -+fail_class: -+ class_destroy(bce_class); -+fail_chrdev: -+ unregister_chrdev_region(bce_chrdev, 1); -+ if (!result) -+ result = -EINVAL; -+ return result; -+} -+static void __exit apple_bce_module_exit(void) -+{ -+ pci_unregister_driver(&apple_bce_pci_driver); -+ -+ aaudio_module_exit(); -+ bce_vhci_module_exit(); -+ class_destroy(bce_class); -+ unregister_chrdev_region(bce_chrdev, 1); -+} -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("MrARM"); -+MODULE_DESCRIPTION("Apple BCE Driver"); -+MODULE_VERSION("0.01"); -+MODULE_ALIAS("pci:v0000106Bd00001801sv*sd*bc*sc*i*"); -+module_init(apple_bce_module_init); -+module_exit(apple_bce_module_exit); -diff --git a/drivers/staging/apple-bce/apple_bce.h b/drivers/staging/apple-bce/apple_bce.h -new file mode 100644 -index 000000000000..f13ab8d5742e ---- /dev/null -+++ b/drivers/staging/apple-bce/apple_bce.h -@@ -0,0 +1,38 @@ -+#pragma once -+ -+#include -+#include -+#include "mailbox.h" -+#include "queue.h" -+#include "vhci/vhci.h" -+ -+#define BC_PROTOCOL_VERSION 0x20001 -+#define BCE_MAX_QUEUE_COUNT 0x100 -+ -+#define BCE_QUEUE_USER_MIN 2 -+#define BCE_QUEUE_USER_MAX (BCE_MAX_QUEUE_COUNT - 1) -+ -+struct apple_bce_device { -+ struct pci_dev *pci, *pci0; -+ dev_t devt; -+ struct device *dev; -+ void __iomem *reg_mem_mb; -+ void __iomem *reg_mem_dma; -+ struct bce_mailbox mbox; -+ struct bce_timestamp timestamp; -+ struct bce_queue *queues[BCE_MAX_QUEUE_COUNT]; -+ struct spinlock queues_lock; -+ struct ida queue_ida; -+ struct bce_queue_cq *cmd_cq; -+ struct bce_queue_cmdq *cmd_cmdq; -+ struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT]; -+ bool is_being_removed; -+ -+ dma_addr_t saved_data_dma_addr; -+ void *saved_data_dma_ptr; -+ size_t saved_data_dma_size; -+ -+ struct bce_vhci vhci; -+}; -+ -+extern struct apple_bce_device *global_bce; -\ No newline at end of file -diff --git a/drivers/staging/apple-bce/audio/audio.c b/drivers/staging/apple-bce/audio/audio.c -new file mode 100644 -index 000000000000..bd16ddd16c1d ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/audio.c -@@ -0,0 +1,711 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "audio.h" -+#include "pcm.h" -+#include -+ -+static int aaudio_alsa_index = SNDRV_DEFAULT_IDX1; -+static char *aaudio_alsa_id = SNDRV_DEFAULT_STR1; -+ -+static dev_t aaudio_chrdev; -+static struct class *aaudio_class; -+ -+static int aaudio_init_cmd(struct aaudio_device *a); -+static int aaudio_init_bs(struct aaudio_device *a); -+static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id); -+static void aaudio_free_dev(struct aaudio_subdevice *sdev); -+ -+static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) -+{ -+ struct aaudio_device *aaudio = NULL; -+ struct aaudio_subdevice *sdev = NULL; -+ int status = 0; -+ u32 cfg; -+ -+ pr_info("aaudio: capturing our device\n"); -+ -+ if (pci_enable_device(dev)) -+ return -ENODEV; -+ if (pci_request_regions(dev, "aaudio")) { -+ status = -ENODEV; -+ goto fail; -+ } -+ pci_set_master(dev); -+ -+ aaudio = kzalloc(sizeof(struct aaudio_device), GFP_KERNEL); -+ if (!aaudio) { -+ status = -ENOMEM; -+ goto fail; -+ } -+ -+ aaudio->bce = global_bce; -+ if (!aaudio->bce) { -+ dev_warn(&dev->dev, "aaudio: No BCE available\n"); -+ status = -EINVAL; -+ goto fail; -+ } -+ -+ aaudio->pci = dev; -+ pci_set_drvdata(dev, aaudio); -+ -+ aaudio->devt = aaudio_chrdev; -+ aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio"); -+ if (IS_ERR_OR_NULL(aaudio->dev)) { -+ status = PTR_ERR(aaudio_class); -+ goto fail; -+ } -+ device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER); -+ -+ init_completion(&aaudio->remote_alive); -+ INIT_LIST_HEAD(&aaudio->subdevice_list); -+ -+ /* Init: set an unknown flag in the bitset */ -+ if (pci_read_config_dword(dev, 4, &cfg)) -+ dev_warn(&dev->dev, "aaudio: pci_read_config_dword fail\n"); -+ if (pci_write_config_dword(dev, 4, cfg | 6u)) -+ dev_warn(&dev->dev, "aaudio: pci_write_config_dword fail\n"); -+ -+ dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0)); -+ aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0); -+ aaudio->reg_mem_bs = pci_iomap(dev, 0, 0); -+ aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0); -+ -+ aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000); -+ -+ if (IS_ERR_OR_NULL(aaudio->reg_mem_bs) || IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) { -+ dev_warn(&dev->dev, "aaudio: Failed to pci_iomap required regions\n"); -+ goto fail; -+ } -+ -+ if (aaudio_bce_init(aaudio)) { -+ dev_warn(&dev->dev, "aaudio: Failed to init BCE command transport\n"); -+ goto fail; -+ } -+ -+ if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) { -+ dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n"); -+ goto fail; -+ } -+ -+ strcpy(aaudio->card->shortname, "Apple T2 Audio"); -+ strcpy(aaudio->card->longname, "Apple T2 Audio"); -+ strcpy(aaudio->card->mixername, "Apple T2 Audio"); -+ /* Dynamic alsa ids start at 100 */ -+ aaudio->next_alsa_id = 100; -+ -+ if (aaudio_init_cmd(aaudio)) { -+ dev_err(&dev->dev, "aaudio: Failed to initialize over BCE\n"); -+ goto fail_snd; -+ } -+ -+ if (aaudio_init_bs(aaudio)) { -+ dev_err(&dev->dev, "aaudio: Failed to initialize BufferStruct\n"); -+ goto fail_snd; -+ } -+ -+ if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) { -+ dev_err(&dev->dev, "Failed to set remote access\n"); -+ return status; -+ } -+ -+ if (snd_card_register(aaudio->card)) { -+ dev_err(&dev->dev, "aaudio: Failed to register ALSA sound device\n"); -+ goto fail_snd; -+ } -+ -+ list_for_each_entry(sdev, &aaudio->subdevice_list, list) { -+ struct aaudio_buffer_struct_device *dev = &aaudio->bs->devices[sdev->buf_id]; -+ -+ if (sdev->out_stream_cnt == 1 && !strcmp(dev->name, "Speaker")) { -+ struct snd_pcm_hardware *hw = sdev->out_streams[0].alsa_hw_desc; -+ -+ snprintf(aaudio->card->driver, sizeof(aaudio->card->driver) / sizeof(char), "AppleT2x%d", hw->channels_min); -+ } -+ } -+ -+ return 0; -+ -+fail_snd: -+ snd_card_free(aaudio->card); -+fail: -+ if (aaudio && aaudio->dev) -+ device_destroy(aaudio_class, aaudio->devt); -+ kfree(aaudio); -+ -+ if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs)) -+ pci_iounmap(dev, aaudio->reg_mem_bs); -+ if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) -+ pci_iounmap(dev, aaudio->reg_mem_cfg); -+ -+ pci_release_regions(dev); -+ pci_disable_device(dev); -+ -+ if (!status) -+ status = -EINVAL; -+ return status; -+} -+ -+ -+ -+static void aaudio_remove(struct pci_dev *dev) -+{ -+ struct aaudio_subdevice *sdev; -+ struct aaudio_device *aaudio = pci_get_drvdata(dev); -+ -+ snd_card_free(aaudio->card); -+ while (!list_empty(&aaudio->subdevice_list)) { -+ sdev = list_first_entry(&aaudio->subdevice_list, struct aaudio_subdevice, list); -+ list_del(&sdev->list); -+ aaudio_free_dev(sdev); -+ } -+ pci_iounmap(dev, aaudio->reg_mem_bs); -+ pci_iounmap(dev, aaudio->reg_mem_cfg); -+ device_destroy(aaudio_class, aaudio->devt); -+ pci_free_irq_vectors(dev); -+ pci_release_regions(dev); -+ pci_disable_device(dev); -+ kfree(aaudio); -+} -+ -+static int aaudio_suspend(struct device *dev) -+{ -+ struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev)); -+ -+ if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF)) -+ dev_warn(aaudio->dev, "Failed to reset remote access\n"); -+ -+ pci_disable_device(aaudio->pci); -+ return 0; -+} -+ -+static int aaudio_resume(struct device *dev) -+{ -+ int status; -+ struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev)); -+ -+ if ((status = pci_enable_device(aaudio->pci))) -+ return status; -+ pci_set_master(aaudio->pci); -+ -+ if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) { -+ dev_err(aaudio->dev, "Failed to set remote access\n"); -+ return status; -+ } -+ -+ return 0; -+} -+ -+static int aaudio_init_cmd(struct aaudio_device *a) -+{ -+ int status; -+ struct aaudio_send_ctx sctx; -+ struct aaudio_msg buf; -+ u64 dev_cnt, dev_i; -+ aaudio_device_id_t *dev_l; -+ -+ if ((status = aaudio_send(a, &sctx, 500, -+ aaudio_msg_write_alive_notification, 1, 3))) { -+ dev_err(a->dev, "Sending alive notification failed\n"); -+ return status; -+ } -+ -+ if (wait_for_completion_timeout(&a->remote_alive, msecs_to_jiffies(500)) == 0) { -+ dev_err(a->dev, "Timed out waiting for remote\n"); -+ return -ETIMEDOUT; -+ } -+ dev_info(a->dev, "Continuing init\n"); -+ -+ buf = aaudio_reply_alloc(); -+ if ((status = aaudio_cmd_get_device_list(a, &buf, &dev_l, &dev_cnt))) { -+ dev_err(a->dev, "Failed to get device list\n"); -+ aaudio_reply_free(&buf); -+ return status; -+ } -+ for (dev_i = 0; dev_i < dev_cnt; ++dev_i) -+ aaudio_init_dev(a, dev_l[dev_i]); -+ aaudio_reply_free(&buf); -+ -+ return 0; -+} -+ -+static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm); -+static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev); -+ -+static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id) -+{ -+ struct aaudio_subdevice *sdev; -+ struct aaudio_msg buf = aaudio_reply_alloc(); -+ u64 uid_len, stream_cnt, i; -+ aaudio_object_id_t *stream_list; -+ char *uid; -+ -+ sdev = kzalloc(sizeof(struct aaudio_subdevice), GFP_KERNEL); -+ -+ if (aaudio_cmd_get_property(a, &buf, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_UID, 0), -+ NULL, 0, (void **) &uid, &uid_len) || uid_len > AAUDIO_DEVICE_MAX_UID_LEN) { -+ dev_err(a->dev, "Failed to get device uid for device %llx\n", dev_id); -+ goto fail; -+ } -+ dev_info(a->dev, "Remote device %llx %.*s\n", dev_id, (int) uid_len, uid); -+ -+ sdev->a = a; -+ INIT_LIST_HEAD(&sdev->list); -+ sdev->dev_id = dev_id; -+ sdev->buf_id = AAUDIO_BUFFER_ID_NONE; -+ strncpy(sdev->uid, uid, uid_len); -+ sdev->uid[uid_len + 1] = '\0'; -+ -+ if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id, -+ AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32))) -+ dev_warn(a->dev, "Failed to query device input latency\n"); -+ if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id, -+ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->out_latency, sizeof(u32))) -+ dev_warn(a->dev, "Failed to query device output latency\n"); -+ -+ if (aaudio_cmd_get_input_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) { -+ dev_err(a->dev, "Failed to get input stream list for device %llx\n", dev_id); -+ goto fail; -+ } -+ if (stream_cnt > AAUDIO_DEIVCE_MAX_INPUT_STREAMS) { -+ dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n", -+ sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_INPUT_STREAMS); -+ stream_cnt = AAUDIO_DEIVCE_MAX_INPUT_STREAMS; -+ } -+ sdev->in_stream_cnt = stream_cnt; -+ for (i = 0; i < stream_cnt; i++) { -+ sdev->in_streams[i].id = stream_list[i]; -+ sdev->in_streams[i].buffer_cnt = 0; -+ aaudio_init_stream_info(sdev, &sdev->in_streams[i]); -+ sdev->in_streams[i].latency += sdev->in_latency; -+ } -+ -+ if (aaudio_cmd_get_output_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) { -+ dev_err(a->dev, "Failed to get output stream list for device %llx\n", dev_id); -+ goto fail; -+ } -+ if (stream_cnt > AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS) { -+ dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n", -+ sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS); -+ stream_cnt = AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS; -+ } -+ sdev->out_stream_cnt = stream_cnt; -+ for (i = 0; i < stream_cnt; i++) { -+ sdev->out_streams[i].id = stream_list[i]; -+ sdev->out_streams[i].buffer_cnt = 0; -+ aaudio_init_stream_info(sdev, &sdev->out_streams[i]); -+ sdev->out_streams[i].latency += sdev->in_latency; -+ } -+ -+ if (sdev->is_pcm) -+ aaudio_create_pcm(sdev); -+ /* Headphone Jack status */ -+ if (!strcmp(sdev->uid, "Codec Output")) { -+ if (snd_jack_new(a->card, sdev->uid, SND_JACK_HEADPHONE, &sdev->jack, true, false)) -+ dev_warn(a->dev, "Failed to create an attached jack for %s\n", sdev->uid); -+ aaudio_cmd_property_listener(a, sdev->dev_id, sdev->dev_id, -+ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0)); -+ aaudio_handle_jack_connection_change(sdev); -+ } -+ -+ aaudio_reply_free(&buf); -+ -+ list_add_tail(&sdev->list, &a->subdevice_list); -+ return; -+ -+fail: -+ aaudio_reply_free(&buf); -+ kfree(sdev); -+} -+ -+static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm) -+{ -+ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id, -+ AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_PHYS_FORMAT, 0), NULL, 0, -+ &strm->desc, sizeof(strm->desc))) -+ dev_warn(sdev->a->dev, "Failed to query stream descriptor\n"); -+ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id, -+ AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_LATENCY, 0), NULL, 0, &strm->latency, sizeof(u32))) -+ dev_warn(sdev->a->dev, "Failed to query stream latency\n"); -+ if (strm->desc.format_id == AAUDIO_FORMAT_LPCM) -+ sdev->is_pcm = true; -+} -+ -+static void aaudio_free_dev(struct aaudio_subdevice *sdev) -+{ -+ size_t i; -+ for (i = 0; i < sdev->in_stream_cnt; i++) { -+ if (sdev->in_streams[i].alsa_hw_desc) -+ kfree(sdev->in_streams[i].alsa_hw_desc); -+ if (sdev->in_streams[i].buffers) -+ kfree(sdev->in_streams[i].buffers); -+ } -+ for (i = 0; i < sdev->out_stream_cnt; i++) { -+ if (sdev->out_streams[i].alsa_hw_desc) -+ kfree(sdev->out_streams[i].alsa_hw_desc); -+ if (sdev->out_streams[i].buffers) -+ kfree(sdev->out_streams[i].buffers); -+ } -+ kfree(sdev); -+} -+ -+static struct aaudio_subdevice *aaudio_find_dev_by_dev_id(struct aaudio_device *a, aaudio_device_id_t dev_id) -+{ -+ struct aaudio_subdevice *sdev; -+ list_for_each_entry(sdev, &a->subdevice_list, list) { -+ if (dev_id == sdev->dev_id) -+ return sdev; -+ } -+ return NULL; -+} -+ -+static struct aaudio_subdevice *aaudio_find_dev_by_uid(struct aaudio_device *a, const char *uid) -+{ -+ struct aaudio_subdevice *sdev; -+ list_for_each_entry(sdev, &a->subdevice_list, list) { -+ if (!strcmp(uid, sdev->uid)) -+ return sdev; -+ } -+ return NULL; -+} -+ -+static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm, -+ struct aaudio_buffer_struct_stream *bs_strm); -+static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm, -+ struct aaudio_buffer_struct_stream *bs_strm); -+ -+static int aaudio_init_bs(struct aaudio_device *a) -+{ -+ int i, j; -+ struct aaudio_buffer_struct_device *dev; -+ struct aaudio_subdevice *sdev; -+ u32 ver, sig, bs_base; -+ -+ ver = ioread32(&a->reg_mem_gpr[0]); -+ if (ver < 3) { -+ dev_err(a->dev, "aaudio: Bad GPR version (%u)", ver); -+ return -EINVAL; -+ } -+ sig = ioread32(&a->reg_mem_gpr[1]); -+ if (sig != AAUDIO_SIG) { -+ dev_err(a->dev, "aaudio: Bad GPR sig (%x)", sig); -+ return -EINVAL; -+ } -+ bs_base = ioread32(&a->reg_mem_gpr[2]); -+ a->bs = (struct aaudio_buffer_struct *) ((u8 *) a->reg_mem_bs + bs_base); -+ if (a->bs->signature != AAUDIO_SIG) { -+ dev_err(a->dev, "aaudio: Bad BufferStruct sig (%x)", a->bs->signature); -+ return -EINVAL; -+ } -+ dev_info(a->dev, "aaudio: BufferStruct ver = %i\n", a->bs->version); -+ dev_info(a->dev, "aaudio: Num devices = %i\n", a->bs->num_devices); -+ for (i = 0; i < a->bs->num_devices; i++) { -+ dev = &a->bs->devices[i]; -+ dev_info(a->dev, "aaudio: Device %i %s\n", i, dev->name); -+ -+ sdev = aaudio_find_dev_by_uid(a, dev->name); -+ if (!sdev) { -+ dev_err(a->dev, "aaudio: Subdevice not found for BufferStruct device %s\n", dev->name); -+ continue; -+ } -+ sdev->buf_id = (u8) i; -+ dev->num_input_streams = 0; -+ for (j = 0; j < dev->num_output_streams; j++) { -+ dev_info(a->dev, "aaudio: Device %i Stream %i: Output; Buffer Count = %i\n", i, j, -+ dev->output_streams[j].num_buffers); -+ if (j < sdev->out_stream_cnt) -+ aaudio_init_bs_stream(a, &sdev->out_streams[j], &dev->output_streams[j]); -+ } -+ } -+ -+ list_for_each_entry(sdev, &a->subdevice_list, list) { -+ if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE) -+ continue; -+ sdev->buf_id = i; -+ dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid); -+ strcpy(a->bs->devices[i].name, sdev->uid); -+ a->bs->devices[i].num_input_streams = 0; -+ a->bs->devices[i].num_output_streams = 0; -+ a->bs->num_devices = ++i; -+ } -+ list_for_each_entry(sdev, &a->subdevice_list, list) { -+ if (sdev->in_stream_cnt == 1) { -+ dev_info(a->dev, "aaudio: Device %i Host Stream; Input\n", sdev->buf_id); -+ aaudio_init_bs_stream_host(a, &sdev->in_streams[0], &a->bs->devices[sdev->buf_id].input_streams[0]); -+ a->bs->devices[sdev->buf_id].num_input_streams = 1; -+ wmb(); -+ -+ if (aaudio_cmd_set_input_stream_address_ranges(a, sdev->dev_id)) { -+ dev_err(a->dev, "aaudio: Failed to set input stream address ranges\n"); -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm, -+ struct aaudio_buffer_struct_stream *bs_strm) -+{ -+ size_t i; -+ strm->buffer_cnt = bs_strm->num_buffers; -+ if (bs_strm->num_buffers > AAUDIO_DEIVCE_MAX_BUFFER_COUNT) { -+ dev_warn(a->dev, "BufferStruct buffer count %u exceeds driver limit of %u\n", bs_strm->num_buffers, -+ AAUDIO_DEIVCE_MAX_BUFFER_COUNT); -+ strm->buffer_cnt = AAUDIO_DEIVCE_MAX_BUFFER_COUNT; -+ } -+ if (!strm->buffer_cnt) -+ return; -+ strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL); -+ if (!strm->buffers) { -+ dev_err(a->dev, "Buffer list allocation failed\n"); -+ return; -+ } -+ for (i = 0; i < strm->buffer_cnt; i++) { -+ strm->buffers[i].dma_addr = a->reg_mem_bs_dma + (dma_addr_t) bs_strm->buffers[i].address; -+ strm->buffers[i].ptr = a->reg_mem_bs + bs_strm->buffers[i].address; -+ strm->buffers[i].size = bs_strm->buffers[i].size; -+ } -+ -+ if (strm->buffer_cnt == 1) { -+ strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL); -+ if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) { -+ kfree(strm->alsa_hw_desc); -+ strm->alsa_hw_desc = NULL; -+ } -+ } -+} -+ -+static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm, -+ struct aaudio_buffer_struct_stream *bs_strm) -+{ -+ size_t size; -+ dma_addr_t dma_addr; -+ void *dma_ptr; -+ size = strm->desc.bytes_per_packet * 16640; -+ dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL); -+ if (!dma_ptr) { -+ dev_err(a->dev, "dma_alloc_coherent failed\n"); -+ return; -+ } -+ bs_strm->buffers[0].address = dma_addr; -+ bs_strm->buffers[0].size = size; -+ bs_strm->num_buffers = 1; -+ -+ memset(dma_ptr, 0, size); -+ -+ strm->buffer_cnt = 1; -+ strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL); -+ if (!strm->buffers) { -+ dev_err(a->dev, "Buffer list allocation failed\n"); -+ return; -+ } -+ strm->buffers[0].dma_addr = dma_addr; -+ strm->buffers[0].ptr = dma_ptr; -+ strm->buffers[0].size = size; -+ -+ strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL); -+ if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) { -+ kfree(strm->alsa_hw_desc); -+ strm->alsa_hw_desc = NULL; -+ } -+} -+ -+static void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg); -+ -+void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg) -+{ -+ struct aaudio_send_ctx sctx; -+ struct aaudio_msg_base base; -+ if (aaudio_msg_read_base(msg, &base)) -+ return; -+ switch (base.msg) { -+ case AAUDIO_MSG_NOTIFICATION_BOOT: -+ dev_info(a->dev, "Received boot notification from remote\n"); -+ -+ /* Resend the alive notify */ -+ if (aaudio_send(a, &sctx, 500, -+ aaudio_msg_write_alive_notification, 1, 3)) { -+ pr_err("Sending alive notification failed\n"); -+ } -+ break; -+ case AAUDIO_MSG_NOTIFICATION_ALIVE: -+ dev_info(a->dev, "Received alive notification from remote\n"); -+ complete_all(&a->remote_alive); -+ break; -+ case AAUDIO_MSG_PROPERTY_CHANGED: -+ aaudio_handle_prop_change(a, msg); -+ break; -+ default: -+ dev_info(a->dev, "Unhandled notification %i", base.msg); -+ break; -+ } -+} -+ -+struct aaudio_prop_change_work_struct { -+ struct work_struct ws; -+ struct aaudio_device *a; -+ aaudio_device_id_t dev; -+ aaudio_object_id_t obj; -+ struct aaudio_prop_addr prop; -+}; -+ -+static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev) -+{ -+ u32 plugged; -+ if (!sdev->jack) -+ return; -+ /* NOTE: Apple made the plug status scoped to the input and output streams. This makes no sense for us, so I just -+ * always pick the OUTPUT status. */ -+ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, sdev->dev_id, -+ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0), NULL, 0, &plugged, sizeof(plugged))) { -+ dev_err(sdev->a->dev, "Failed to get jack enable status\n"); -+ return; -+ } -+ dev_dbg(sdev->a->dev, "Jack is now %s\n", plugged ? "plugged" : "unplugged"); -+ snd_jack_report(sdev->jack, plugged ? sdev->jack->type : 0); -+} -+ -+void aaudio_handle_prop_change_work(struct work_struct *ws) -+{ -+ struct aaudio_prop_change_work_struct *work = container_of(ws, struct aaudio_prop_change_work_struct, ws); -+ struct aaudio_subdevice *sdev; -+ -+ sdev = aaudio_find_dev_by_dev_id(work->a, work->dev); -+ if (!sdev) { -+ dev_err(work->a->dev, "Property notification change: device not found\n"); -+ goto done; -+ } -+ dev_dbg(work->a->dev, "Property changed for device: %s\n", sdev->uid); -+ -+ if (work->prop.scope == AAUDIO_PROP_SCOPE_OUTPUT && work->prop.selector == AAUDIO_PROP_JACK_PLUGGED) { -+ aaudio_handle_jack_connection_change(sdev); -+ } -+ -+done: -+ kfree(work); -+} -+ -+void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg) -+{ -+ /* NOTE: This is a scheduled work because this callback will generally need to query device information and this -+ * is not possible when we are in the reply parsing code's context. */ -+ struct aaudio_prop_change_work_struct *work; -+ work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL); -+ work->a = a; -+ INIT_WORK(&work->ws, aaudio_handle_prop_change_work); -+ aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop); -+ schedule_work(&work->ws); -+} -+ -+#define aaudio_send_cmd_response(a, sctx, msg, fn, ...) \ -+ if (aaudio_send_with_tag(a, sctx, ((struct aaudio_msg_header *) msg->data)->tag, 500, fn, ##__VA_ARGS__)) \ -+ pr_err("aaudio: Failed to reply to a command\n"); -+ -+void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg) -+{ -+ ktime_t time_os = ktime_get_boottime(); -+ struct aaudio_send_ctx sctx; -+ struct aaudio_subdevice *sdev; -+ u64 devid, timestamp, update_seed; -+ aaudio_msg_read_update_timestamp(msg, &devid, ×tamp, &update_seed); -+ dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed); -+ -+ sdev = aaudio_find_dev_by_dev_id(a, devid); -+ aaudio_handle_timestamp(sdev, time_os, timestamp); -+ -+ aaudio_send_cmd_response(a, &sctx, msg, -+ aaudio_msg_write_update_timestamp_response); -+} -+ -+void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg) -+{ -+ struct aaudio_msg_base base; -+ if (aaudio_msg_read_base(msg, &base)) -+ return; -+ switch (base.msg) { -+ case AAUDIO_MSG_UPDATE_TIMESTAMP: -+ aaudio_handle_cmd_timestamp(a, msg); -+ break; -+ default: -+ dev_info(a->dev, "Unhandled device command %i", base.msg); -+ break; -+ } -+} -+ -+static struct pci_device_id aaudio_ids[ ] = { -+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1803) }, -+ { 0, }, -+}; -+ -+struct dev_pm_ops aaudio_pci_driver_pm = { -+ .suspend = aaudio_suspend, -+ .resume = aaudio_resume -+}; -+struct pci_driver aaudio_pci_driver = { -+ .name = "aaudio", -+ .id_table = aaudio_ids, -+ .probe = aaudio_probe, -+ .remove = aaudio_remove, -+ .driver = { -+ .pm = &aaudio_pci_driver_pm -+ } -+}; -+ -+ -+int aaudio_module_init(void) -+{ -+ int result; -+ if ((result = alloc_chrdev_region(&aaudio_chrdev, 0, 1, "aaudio"))) -+ goto fail_chrdev; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0) -+ aaudio_class = class_create(THIS_MODULE, "aaudio"); -+#else -+ aaudio_class = class_create("aaudio"); -+#endif -+ if (IS_ERR(aaudio_class)) { -+ result = PTR_ERR(aaudio_class); -+ goto fail_class; -+ } -+ -+ result = pci_register_driver(&aaudio_pci_driver); -+ if (result) -+ goto fail_drv; -+ return 0; -+ -+fail_drv: -+ pci_unregister_driver(&aaudio_pci_driver); -+fail_class: -+ class_destroy(aaudio_class); -+fail_chrdev: -+ unregister_chrdev_region(aaudio_chrdev, 1); -+ if (!result) -+ result = -EINVAL; -+ return result; -+} -+ -+void aaudio_module_exit(void) -+{ -+ pci_unregister_driver(&aaudio_pci_driver); -+ class_destroy(aaudio_class); -+ unregister_chrdev_region(aaudio_chrdev, 1); -+} -+ -+struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[] = { -+ {"Speaker", 0}, -+ {"Digital Mic", 1}, -+ {"Codec Output", 2}, -+ {"Codec Input", 3}, -+ {"Bridge Loopback", 4}, -+ {} -+}; -+ -+module_param_named(index, aaudio_alsa_index, int, 0444); -+MODULE_PARM_DESC(index, "Index value for Apple Internal Audio soundcard."); -+module_param_named(id, aaudio_alsa_id, charp, 0444); -+MODULE_PARM_DESC(id, "ID string for Apple Internal Audio soundcard."); -diff --git a/drivers/staging/apple-bce/audio/audio.h b/drivers/staging/apple-bce/audio/audio.h -new file mode 100644 -index 000000000000..004bc1e22ea4 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/audio.h -@@ -0,0 +1,125 @@ -+#ifndef AAUDIO_H -+#define AAUDIO_H -+ -+#include -+#include -+#include "../apple_bce.h" -+#include "protocol_bce.h" -+#include "description.h" -+ -+#define AAUDIO_SIG 0x19870423 -+ -+#define AAUDIO_DEVICE_MAX_UID_LEN 128 -+#define AAUDIO_DEIVCE_MAX_INPUT_STREAMS 1 -+#define AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS 1 -+#define AAUDIO_DEIVCE_MAX_BUFFER_COUNT 1 -+ -+#define AAUDIO_BUFFER_ID_NONE 0xffu -+ -+struct snd_card; -+struct snd_pcm; -+struct snd_pcm_hardware; -+struct snd_jack; -+ -+struct __attribute__((packed)) __attribute__((aligned(4))) aaudio_buffer_struct_buffer { -+ size_t address; -+ size_t size; -+ size_t pad[4]; -+}; -+struct aaudio_buffer_struct_stream { -+ u8 num_buffers; -+ struct aaudio_buffer_struct_buffer buffers[100]; -+ char filler[32]; -+}; -+struct aaudio_buffer_struct_device { -+ char name[128]; -+ u8 num_input_streams; -+ u8 num_output_streams; -+ struct aaudio_buffer_struct_stream input_streams[5]; -+ struct aaudio_buffer_struct_stream output_streams[5]; -+ char filler[128]; -+}; -+struct aaudio_buffer_struct { -+ u32 version; -+ u32 signature; -+ u32 flags; -+ u8 num_devices; -+ struct aaudio_buffer_struct_device devices[20]; -+}; -+ -+struct aaudio_device; -+struct aaudio_dma_buf { -+ dma_addr_t dma_addr; -+ void *ptr; -+ size_t size; -+}; -+struct aaudio_stream { -+ aaudio_object_id_t id; -+ size_t buffer_cnt; -+ struct aaudio_dma_buf *buffers; -+ -+ struct aaudio_apple_description desc; -+ struct snd_pcm_hardware *alsa_hw_desc; -+ u32 latency; -+ -+ bool waiting_for_first_ts; -+ -+ ktime_t remote_timestamp; -+ snd_pcm_sframes_t frame_min; -+ int started; -+}; -+struct aaudio_subdevice { -+ struct aaudio_device *a; -+ struct list_head list; -+ aaudio_device_id_t dev_id; -+ u32 in_latency, out_latency; -+ u8 buf_id; -+ int alsa_id; -+ char uid[AAUDIO_DEVICE_MAX_UID_LEN + 1]; -+ size_t in_stream_cnt; -+ struct aaudio_stream in_streams[AAUDIO_DEIVCE_MAX_INPUT_STREAMS]; -+ size_t out_stream_cnt; -+ struct aaudio_stream out_streams[AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS]; -+ bool is_pcm; -+ struct snd_pcm *pcm; -+ struct snd_jack *jack; -+}; -+struct aaudio_alsa_pcm_id_mapping { -+ const char *name; -+ int alsa_id; -+}; -+ -+struct aaudio_device { -+ struct pci_dev *pci; -+ dev_t devt; -+ struct device *dev; -+ void __iomem *reg_mem_bs; -+ dma_addr_t reg_mem_bs_dma; -+ void __iomem *reg_mem_cfg; -+ -+ u32 __iomem *reg_mem_gpr; -+ -+ struct aaudio_buffer_struct *bs; -+ -+ struct apple_bce_device *bce; -+ struct aaudio_bce bcem; -+ -+ struct snd_card *card; -+ -+ struct list_head subdevice_list; -+ int next_alsa_id; -+ -+ struct completion remote_alive; -+}; -+ -+void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg); -+void aaudio_handle_prop_change_work(struct work_struct *ws); -+void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg); -+void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg); -+ -+int aaudio_module_init(void); -+void aaudio_module_exit(void); -+ -+extern struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[]; -+ -+#endif //AAUDIO_H -diff --git a/drivers/staging/apple-bce/audio/description.h b/drivers/staging/apple-bce/audio/description.h -new file mode 100644 -index 000000000000..dfef3ab68f27 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/description.h -@@ -0,0 +1,42 @@ -+#ifndef AAUDIO_DESCRIPTION_H -+#define AAUDIO_DESCRIPTION_H -+ -+#include -+ -+struct aaudio_apple_description { -+ u64 sample_rate_double; -+ u32 format_id; -+ u32 format_flags; -+ u32 bytes_per_packet; -+ u32 frames_per_packet; -+ u32 bytes_per_frame; -+ u32 channels_per_frame; -+ u32 bits_per_channel; -+ u32 reserved; -+}; -+ -+enum { -+ AAUDIO_FORMAT_LPCM = 0x6c70636d // 'lpcm' -+}; -+ -+enum { -+ AAUDIO_FORMAT_FLAG_FLOAT = 1, -+ AAUDIO_FORMAT_FLAG_BIG_ENDIAN = 2, -+ AAUDIO_FORMAT_FLAG_SIGNED = 4, -+ AAUDIO_FORMAT_FLAG_PACKED = 8, -+ AAUDIO_FORMAT_FLAG_ALIGNED_HIGH = 16, -+ AAUDIO_FORMAT_FLAG_NON_INTERLEAVED = 32, -+ AAUDIO_FORMAT_FLAG_NON_MIXABLE = 64 -+}; -+ -+static inline u64 aaudio_double_to_u64(u64 d) -+{ -+ u8 sign = (u8) ((d >> 63) & 1); -+ s32 exp = (s32) ((d >> 52) & 0x7ff) - 1023; -+ u64 fr = d & ((1LL << 52) - 1); -+ if (sign || exp < 0) -+ return 0; -+ return (u64) ((1LL << exp) + (fr >> (52 - exp))); -+} -+ -+#endif //AAUDIO_DESCRIPTION_H -diff --git a/drivers/staging/apple-bce/audio/pcm.c b/drivers/staging/apple-bce/audio/pcm.c -new file mode 100644 -index 000000000000..1026e10a9ac5 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/pcm.c -@@ -0,0 +1,308 @@ -+#include "pcm.h" -+#include "audio.h" -+ -+static u64 aaudio_get_alsa_fmtbit(struct aaudio_apple_description *desc) -+{ -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_FLOAT) { -+ if (desc->bits_per_channel == 32) { -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) -+ return SNDRV_PCM_FMTBIT_FLOAT_BE; -+ else -+ return SNDRV_PCM_FMTBIT_FLOAT_LE; -+ } else if (desc->bits_per_channel == 64) { -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) -+ return SNDRV_PCM_FMTBIT_FLOAT64_BE; -+ else -+ return SNDRV_PCM_FMTBIT_FLOAT64_LE; -+ } else { -+ pr_err("aaudio: unsupported bits per channel for float format: %u\n", desc->bits_per_channel); -+ return 0; -+ } -+ } -+#define DEFINE_BPC_OPTION(val, b) \ -+ case val: \ -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) { \ -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \ -+ return SNDRV_PCM_FMTBIT_S ## b ## BE; \ -+ else \ -+ return SNDRV_PCM_FMTBIT_U ## b ## BE; \ -+ } else { \ -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \ -+ return SNDRV_PCM_FMTBIT_S ## b ## LE; \ -+ else \ -+ return SNDRV_PCM_FMTBIT_U ## b ## LE; \ -+ } -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_PACKED) { -+ switch (desc->bits_per_channel) { -+ case 8: -+ case 16: -+ case 32: -+ break; -+ DEFINE_BPC_OPTION(24, 24_3) -+ default: -+ pr_err("aaudio: unsupported bits per channel for packed format: %u\n", desc->bits_per_channel); -+ return 0; -+ } -+ } -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_ALIGNED_HIGH) { -+ switch (desc->bits_per_channel) { -+ DEFINE_BPC_OPTION(24, 32_) -+ default: -+ pr_err("aaudio: unsupported bits per channel for high-aligned format: %u\n", desc->bits_per_channel); -+ return 0; -+ } -+ } -+ switch (desc->bits_per_channel) { -+ case 8: -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) -+ return SNDRV_PCM_FMTBIT_S8; -+ else -+ return SNDRV_PCM_FMTBIT_U8; -+ DEFINE_BPC_OPTION(16, 16_) -+ DEFINE_BPC_OPTION(24, 24_) -+ DEFINE_BPC_OPTION(32, 32_) -+ default: -+ pr_err("aaudio: unsupported bits per channel: %u\n", desc->bits_per_channel); -+ return 0; -+ } -+} -+int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, -+ size_t buf_size) -+{ -+ uint rate; -+ alsa_hw->info = (SNDRV_PCM_INFO_MMAP | -+ SNDRV_PCM_INFO_BLOCK_TRANSFER | -+ SNDRV_PCM_INFO_MMAP_VALID | -+ SNDRV_PCM_INFO_DOUBLE); -+ if (desc->format_flags & AAUDIO_FORMAT_FLAG_NON_MIXABLE) -+ pr_warn("aaudio: unsupported hw flag: NON_MIXABLE\n"); -+ if (!(desc->format_flags & AAUDIO_FORMAT_FLAG_NON_INTERLEAVED)) -+ alsa_hw->info |= SNDRV_PCM_INFO_INTERLEAVED; -+ alsa_hw->formats = aaudio_get_alsa_fmtbit(desc); -+ if (!alsa_hw->formats) -+ return -EINVAL; -+ rate = (uint) aaudio_double_to_u64(desc->sample_rate_double); -+ alsa_hw->rates = snd_pcm_rate_to_rate_bit(rate); -+ alsa_hw->rate_min = rate; -+ alsa_hw->rate_max = rate; -+ alsa_hw->channels_min = desc->channels_per_frame; -+ alsa_hw->channels_max = desc->channels_per_frame; -+ alsa_hw->buffer_bytes_max = buf_size; -+ alsa_hw->period_bytes_min = desc->bytes_per_packet; -+ alsa_hw->period_bytes_max = desc->bytes_per_packet; -+ alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet); -+ alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet); -+ pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n", -+ alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min, -+ alsa_hw->period_bytes_min); -+ return 0; -+} -+ -+static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substream) -+{ -+ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); -+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) -+ return &sdev->out_streams[substream->number]; -+ else -+ return &sdev->in_streams[substream->number]; -+} -+ -+static int aaudio_pcm_open(struct snd_pcm_substream *substream) -+{ -+ pr_debug("aaudio_pcm_open\n"); -+ substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc; -+ -+ return 0; -+} -+ -+static int aaudio_pcm_close(struct snd_pcm_substream *substream) -+{ -+ pr_debug("aaudio_pcm_close\n"); -+ return 0; -+} -+ -+static int aaudio_pcm_prepare(struct snd_pcm_substream *substream) -+{ -+ return 0; -+} -+ -+static int aaudio_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params) -+{ -+ struct aaudio_stream *astream = aaudio_pcm_stream(substream); -+ pr_debug("aaudio_pcm_hw_params\n"); -+ -+ if (!astream->buffer_cnt || !astream->buffers) -+ return -EINVAL; -+ -+ substream->runtime->dma_area = astream->buffers[0].ptr; -+ substream->runtime->dma_addr = astream->buffers[0].dma_addr; -+ substream->runtime->dma_bytes = astream->buffers[0].size; -+ return 0; -+} -+ -+static int aaudio_pcm_hw_free(struct snd_pcm_substream *substream) -+{ -+ pr_debug("aaudio_pcm_hw_free\n"); -+ return 0; -+} -+ -+static void aaudio_pcm_start(struct snd_pcm_substream *substream) -+{ -+ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); -+ struct aaudio_stream *stream = aaudio_pcm_stream(substream); -+ void *buf; -+ size_t s; -+ ktime_t time_start, time_end; -+ bool back_buffer; -+ time_start = ktime_get(); -+ -+ back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); -+ -+ if (back_buffer) { -+ s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr); -+ buf = kmalloc(s, GFP_KERNEL); -+ memcpy_fromio(buf, substream->runtime->dma_area, s); -+ time_end = ktime_get(); -+ pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start), -+ substream->runtime->control->appl_ptr); -+ } -+ -+ stream->waiting_for_first_ts = true; -+ stream->frame_min = stream->latency; -+ -+ aaudio_cmd_start_io(sdev->a, sdev->dev_id); -+ if (back_buffer) -+ memcpy_toio(substream->runtime->dma_area, buf, s); -+ -+ time_end = ktime_get(); -+ pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start)); -+} -+ -+static int aaudio_pcm_trigger(struct snd_pcm_substream *substream, int cmd) -+{ -+ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); -+ struct aaudio_stream *stream = aaudio_pcm_stream(substream); -+ pr_debug("aaudio_pcm_trigger %x\n", cmd); -+ -+ /* We only supports triggers on the #0 buffer */ -+ if (substream->number != 0) -+ return 0; -+ switch (cmd) { -+ case SNDRV_PCM_TRIGGER_START: -+ aaudio_pcm_start(substream); -+ stream->started = 1; -+ break; -+ case SNDRV_PCM_TRIGGER_STOP: -+ aaudio_cmd_stop_io(sdev->a, sdev->dev_id); -+ stream->started = 0; -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream) -+{ -+ struct aaudio_stream *stream = aaudio_pcm_stream(substream); -+ ktime_t time_from_start; -+ snd_pcm_sframes_t frames; -+ snd_pcm_sframes_t buffer_time_length; -+ -+ if (!stream->started || stream->waiting_for_first_ts) { -+ pr_warn("aaudio_pcm_pointer while not started\n"); -+ return 0; -+ } -+ -+ /* Approximate the pointer based on the last received timestamp */ -+ time_from_start = ktime_get_boottime() - stream->remote_timestamp; -+ buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate; -+ frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length; -+ if (ktime_to_ns(time_from_start) < buffer_time_length) { -+ if (frames < stream->frame_min) -+ frames = stream->frame_min; -+ else -+ stream->frame_min = 0; -+ } else { -+ if (ktime_to_ns(time_from_start) < 2 * buffer_time_length) -+ stream->frame_min = frames; -+ else -+ stream->frame_min = 0; /* Heavy desync */ -+ } -+ frames -= stream->latency; -+ if (frames < 0) -+ frames += ((-frames - 1) / substream->runtime->buffer_size + 1) * substream->runtime->buffer_size; -+ return (snd_pcm_uframes_t) frames; -+} -+ -+static struct snd_pcm_ops aaudio_pcm_ops = { -+ .open = aaudio_pcm_open, -+ .close = aaudio_pcm_close, -+ .ioctl = snd_pcm_lib_ioctl, -+ .hw_params = aaudio_pcm_hw_params, -+ .hw_free = aaudio_pcm_hw_free, -+ .prepare = aaudio_pcm_prepare, -+ .trigger = aaudio_pcm_trigger, -+ .pointer = aaudio_pcm_pointer, -+ .mmap = snd_pcm_lib_mmap_iomem -+}; -+ -+int aaudio_create_pcm(struct aaudio_subdevice *sdev) -+{ -+ struct snd_pcm *pcm; -+ struct aaudio_alsa_pcm_id_mapping *id_mapping; -+ int err; -+ -+ if (!sdev->is_pcm || (sdev->in_stream_cnt == 0 && sdev->out_stream_cnt == 0)) { -+ return -EINVAL; -+ } -+ -+ for (id_mapping = aaudio_alsa_id_mappings; id_mapping->name; id_mapping++) { -+ if (!strcmp(sdev->uid, id_mapping->name)) { -+ sdev->alsa_id = id_mapping->alsa_id; -+ break; -+ } -+ } -+ if (!id_mapping->name) -+ sdev->alsa_id = sdev->a->next_alsa_id++; -+ err = snd_pcm_new(sdev->a->card, sdev->uid, sdev->alsa_id, -+ (int) sdev->out_stream_cnt, (int) sdev->in_stream_cnt, &pcm); -+ if (err < 0) -+ return err; -+ pcm->private_data = sdev; -+ pcm->nonatomic = 1; -+ sdev->pcm = pcm; -+ strcpy(pcm->name, sdev->uid); -+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &aaudio_pcm_ops); -+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &aaudio_pcm_ops); -+ return 0; -+} -+ -+static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, ktime_t timestamp) -+{ -+ unsigned long flags; -+ struct aaudio_stream *stream; -+ -+ stream = aaudio_pcm_stream(substream); -+ snd_pcm_stream_lock_irqsave(substream, flags); -+ stream->remote_timestamp = timestamp; -+ if (stream->waiting_for_first_ts) { -+ stream->waiting_for_first_ts = false; -+ snd_pcm_stream_unlock_irqrestore(substream, flags); -+ return; -+ } -+ snd_pcm_stream_unlock_irqrestore(substream, flags); -+ snd_pcm_period_elapsed(substream); -+} -+ -+void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp) -+{ -+ struct snd_pcm_substream *substream; -+ -+ substream = sdev->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; -+ if (substream) -+ aaudio_handle_stream_timestamp(substream, dev_timestamp); -+ substream = sdev->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream; -+ if (substream) -+ aaudio_handle_stream_timestamp(substream, os_timestamp); -+} -diff --git a/drivers/staging/apple-bce/audio/pcm.h b/drivers/staging/apple-bce/audio/pcm.h -new file mode 100644 -index 000000000000..ea5f35fbe408 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/pcm.h -@@ -0,0 +1,16 @@ -+#ifndef AAUDIO_PCM_H -+#define AAUDIO_PCM_H -+ -+#include -+#include -+ -+struct aaudio_subdevice; -+struct aaudio_apple_description; -+struct snd_pcm_hardware; -+ -+int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, size_t buf_size); -+int aaudio_create_pcm(struct aaudio_subdevice *sdev); -+ -+void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp); -+ -+#endif //AAUDIO_PCM_H -diff --git a/drivers/staging/apple-bce/audio/protocol.c b/drivers/staging/apple-bce/audio/protocol.c -new file mode 100644 -index 000000000000..2314813aeead ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/protocol.c -@@ -0,0 +1,347 @@ -+#include "protocol.h" -+#include "protocol_bce.h" -+#include "audio.h" -+ -+int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base) -+{ -+ if (msg->size < sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base) * 2) -+ return -EINVAL; -+ *base = *((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1)); -+ return 0; -+} -+ -+#define READ_START(type) \ -+ size_t offset = sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base); (void)offset; \ -+ if (((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1))->msg != type) \ -+ return -EINVAL; -+#define READ_DEVID_VAR(devid) *devid = ((struct aaudio_msg_header *) msg->data)->device_id -+#define READ_VAL(type) ({ offset += sizeof(type); *((type *) ((u8 *) msg->data + offset - sizeof(type))); }) -+#define READ_VAR(type, var) *var = READ_VAL(type) -+ -+int aaudio_msg_read_start_io_response(struct aaudio_msg *msg) -+{ -+ READ_START(AAUDIO_MSG_START_IO_RESPONSE); -+ return 0; -+} -+ -+int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg) -+{ -+ READ_START(AAUDIO_MSG_STOP_IO_RESPONSE); -+ return 0; -+} -+ -+int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid, -+ u64 *timestamp, u64 *update_seed) -+{ -+ READ_START(AAUDIO_MSG_UPDATE_TIMESTAMP); -+ READ_DEVID_VAR(devid); -+ READ_VAR(u64, timestamp); -+ READ_VAR(u64, update_seed); -+ return 0; -+} -+ -+int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj, -+ struct aaudio_prop_addr *prop, void **data, u64 *data_size) -+{ -+ READ_START(AAUDIO_MSG_GET_PROPERTY_RESPONSE); -+ READ_VAR(aaudio_object_id_t, obj); -+ READ_VAR(u32, &prop->element); -+ READ_VAR(u32, &prop->scope); -+ READ_VAR(u32, &prop->selector); -+ READ_VAR(u64, data_size); -+ *data = ((u8 *) msg->data + offset); -+ /* offset += data_size; */ -+ return 0; -+} -+ -+int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj) -+{ -+ READ_START(AAUDIO_MSG_SET_PROPERTY_RESPONSE); -+ READ_VAR(aaudio_object_id_t, obj); -+ return 0; -+} -+ -+int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg, aaudio_object_id_t *obj, -+ struct aaudio_prop_addr *prop) -+{ -+ READ_START(AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE); -+ READ_VAR(aaudio_object_id_t, obj); -+ READ_VAR(u32, &prop->element); -+ READ_VAR(u32, &prop->scope); -+ READ_VAR(u32, &prop->selector); -+ return 0; -+} -+ -+int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj, -+ struct aaudio_prop_addr *prop) -+{ -+ READ_START(AAUDIO_MSG_PROPERTY_CHANGED); -+ READ_DEVID_VAR(devid); -+ READ_VAR(aaudio_object_id_t, obj); -+ READ_VAR(u32, &prop->element); -+ READ_VAR(u32, &prop->scope); -+ READ_VAR(u32, &prop->selector); -+ return 0; -+} -+ -+int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg) -+{ -+ READ_START(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE); -+ return 0; -+} -+ -+int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt) -+{ -+ READ_START(AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE); -+ READ_VAR(u64, str_cnt); -+ *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset); -+ /* offset += str_cnt * sizeof(aaudio_object_id_t); */ -+ return 0; -+} -+ -+int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt) -+{ -+ READ_START(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE); -+ READ_VAR(u64, str_cnt); -+ *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset); -+ /* offset += str_cnt * sizeof(aaudio_object_id_t); */ -+ return 0; -+} -+ -+int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg) -+{ -+ READ_START(AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE); -+ return 0; -+} -+ -+int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt) -+{ -+ READ_START(AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE); -+ READ_VAR(u64, dev_cnt); -+ *dev_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset); -+ /* offset += dev_cnt * sizeof(aaudio_device_id_t); */ -+ return 0; -+} -+ -+#define WRITE_START_OF_TYPE(typev, devid) \ -+ size_t offset = sizeof(struct aaudio_msg_header); (void) offset; \ -+ ((struct aaudio_msg_header *) msg->data)->type = (typev); \ -+ ((struct aaudio_msg_header *) msg->data)->device_id = (devid); -+#define WRITE_START_COMMAND(devid) WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_COMMAND, devid) -+#define WRITE_START_RESPONSE() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_RESPONSE, 0) -+#define WRITE_START_NOTIFICATION() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_NOTIFICATION, 0) -+#define WRITE_VAL(type, value) { *((type *) ((u8 *) msg->data + offset)) = value; offset += sizeof(value); } -+#define WRITE_BIN(value, size) { memcpy((u8 *) msg->data + offset, value, size); offset += size; } -+#define WRITE_BASE(type) WRITE_VAL(u32, type) WRITE_VAL(u32, 0) -+#define WRITE_END() { msg->size = offset; } -+ -+void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev) -+{ -+ WRITE_START_COMMAND(dev); -+ WRITE_BASE(AAUDIO_MSG_START_IO); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev) -+{ -+ WRITE_START_COMMAND(dev); -+ WRITE_BASE(AAUDIO_MSG_STOP_IO); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size) -+{ -+ WRITE_START_COMMAND(dev); -+ WRITE_BASE(AAUDIO_MSG_GET_PROPERTY); -+ WRITE_VAL(aaudio_object_id_t, obj); -+ WRITE_VAL(u32, prop.element); -+ WRITE_VAL(u32, prop.scope); -+ WRITE_VAL(u32, prop.selector); -+ WRITE_VAL(u64, qualifier_size); -+ WRITE_BIN(qualifier, qualifier_size); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size) -+{ -+ WRITE_START_COMMAND(dev); -+ WRITE_BASE(AAUDIO_MSG_SET_PROPERTY); -+ WRITE_VAL(aaudio_object_id_t, obj); -+ WRITE_VAL(u32, prop.element); -+ WRITE_VAL(u32, prop.scope); -+ WRITE_VAL(u32, prop.selector); -+ WRITE_VAL(u64, data_size); -+ WRITE_BIN(data, data_size); -+ WRITE_VAL(u64, qualifier_size); -+ WRITE_BIN(qualifier, qualifier_size); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop) -+{ -+ WRITE_START_COMMAND(dev); -+ WRITE_BASE(AAUDIO_MSG_PROPERTY_LISTENER); -+ WRITE_VAL(aaudio_object_id_t, obj); -+ WRITE_VAL(u32, prop.element); -+ WRITE_VAL(u32, prop.scope); -+ WRITE_VAL(u32, prop.selector); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid) -+{ -+ WRITE_START_COMMAND(devid); -+ WRITE_BASE(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid) -+{ -+ WRITE_START_COMMAND(devid); -+ WRITE_BASE(AAUDIO_MSG_GET_INPUT_STREAM_LIST); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid) -+{ -+ WRITE_START_COMMAND(devid); -+ WRITE_BASE(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode) -+{ -+ WRITE_START_COMMAND(0); -+ WRITE_BASE(AAUDIO_MSG_SET_REMOTE_ACCESS); -+ WRITE_VAL(u64, mode); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver) -+{ -+ WRITE_START_NOTIFICATION(); -+ WRITE_BASE(AAUDIO_MSG_NOTIFICATION_ALIVE); -+ WRITE_VAL(u32, proto_ver); -+ WRITE_VAL(u32, msg_ver); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg) -+{ -+ WRITE_START_RESPONSE(); -+ WRITE_BASE(AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE); -+ WRITE_END(); -+} -+ -+void aaudio_msg_write_get_device_list(struct aaudio_msg *msg) -+{ -+ WRITE_START_COMMAND(0); -+ WRITE_BASE(AAUDIO_MSG_GET_DEVICE_LIST); -+ WRITE_END(); -+} -+ -+#define CMD_SHARED_VARS_NO_REPLY \ -+ int status = 0; \ -+ struct aaudio_send_ctx sctx; -+#define CMD_SHARED_VARS \ -+ CMD_SHARED_VARS_NO_REPLY \ -+ struct aaudio_msg reply = aaudio_reply_alloc(); \ -+ struct aaudio_msg *buf = &reply; -+#define CMD_SEND_REQUEST(fn, ...) \ -+ if ((status = aaudio_send_cmd_sync(a, &sctx, buf, 500, fn, ##__VA_ARGS__))) \ -+ return status; -+#define CMD_DEF_SHARED_AND_SEND(fn, ...) \ -+ CMD_SHARED_VARS \ -+ CMD_SEND_REQUEST(fn, ##__VA_ARGS__); -+#define CMD_DEF_SHARED_NO_REPLY_AND_SEND(fn, ...) \ -+ CMD_SHARED_VARS_NO_REPLY \ -+ CMD_SEND_REQUEST(fn, ##__VA_ARGS__); -+#define CMD_HNDL_REPLY_NO_FREE(fn, ...) \ -+ status = fn(buf, ##__VA_ARGS__); \ -+ return status; -+#define CMD_HNDL_REPLY_AND_FREE(fn, ...) \ -+ status = fn(buf, ##__VA_ARGS__); \ -+ aaudio_reply_free(&reply); \ -+ return status; -+ -+int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid) -+{ -+ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_start_io, devid); -+ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_start_io_response); -+} -+int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid) -+{ -+ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_stop_io, devid); -+ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_stop_io_response); -+} -+int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf, -+ aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size) -+{ -+ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_property, devid, obj, prop, qualifier, qualifier_size); -+ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_property_response, &obj, &prop, data, data_size); -+} -+int aaudio_cmd_get_primitive_property(struct aaudio_device *a, -+ aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size) -+{ -+ int status; -+ struct aaudio_msg reply = aaudio_reply_alloc(); -+ void *r_data; -+ u64 r_data_size; -+ if ((status = aaudio_cmd_get_property(a, &reply, devid, obj, prop, qualifier, qualifier_size, -+ &r_data, &r_data_size))) -+ goto finish; -+ if (r_data_size != data_size) { -+ status = -EINVAL; -+ goto finish; -+ } -+ memcpy(data, r_data, data_size); -+finish: -+ aaudio_reply_free(&reply); -+ return status; -+} -+int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size) -+{ -+ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_property, devid, obj, prop, data, data_size, -+ qualifier, qualifier_size); -+ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_property_response, &obj); -+} -+int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop) -+{ -+ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_property_listener, devid, obj, prop); -+ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_property_listener_response, &obj, &prop); -+} -+int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid) -+{ -+ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_input_stream_address_ranges, devid); -+ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_input_stream_address_ranges_response); -+} -+int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, -+ aaudio_object_id_t **str_l, u64 *str_cnt) -+{ -+ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_input_stream_list, devid); -+ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_input_stream_list_response, str_l, str_cnt); -+} -+int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, -+ aaudio_object_id_t **str_l, u64 *str_cnt) -+{ -+ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_output_stream_list, devid); -+ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_output_stream_list_response, str_l, str_cnt); -+} -+int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode) -+{ -+ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_remote_access, mode); -+ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_remote_access_response); -+} -+int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf, -+ aaudio_device_id_t **dev_l, u64 *dev_cnt) -+{ -+ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_device_list); -+ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_device_list_response, dev_l, dev_cnt); -+} -\ No newline at end of file -diff --git a/drivers/staging/apple-bce/audio/protocol.h b/drivers/staging/apple-bce/audio/protocol.h -new file mode 100644 -index 000000000000..3427486f3f57 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/protocol.h -@@ -0,0 +1,147 @@ -+#ifndef AAUDIO_PROTOCOL_H -+#define AAUDIO_PROTOCOL_H -+ -+#include -+ -+struct aaudio_device; -+ -+typedef u64 aaudio_device_id_t; -+typedef u64 aaudio_object_id_t; -+ -+struct aaudio_msg { -+ void *data; -+ size_t size; -+}; -+ -+struct __attribute__((packed)) aaudio_msg_header { -+ char tag[4]; -+ u8 type; -+ aaudio_device_id_t device_id; // Idk, use zero for commands? -+}; -+struct __attribute__((packed)) aaudio_msg_base { -+ u32 msg; -+ u32 status; -+}; -+ -+struct aaudio_prop_addr { -+ u32 scope; -+ u32 selector; -+ u32 element; -+}; -+#define AAUDIO_PROP(scope, sel, el) (struct aaudio_prop_addr) { scope, sel, el } -+ -+enum { -+ AAUDIO_MSG_TYPE_COMMAND = 1, -+ AAUDIO_MSG_TYPE_RESPONSE = 2, -+ AAUDIO_MSG_TYPE_NOTIFICATION = 3 -+}; -+ -+enum { -+ AAUDIO_MSG_START_IO = 0, -+ AAUDIO_MSG_START_IO_RESPONSE = 1, -+ AAUDIO_MSG_STOP_IO = 2, -+ AAUDIO_MSG_STOP_IO_RESPONSE = 3, -+ AAUDIO_MSG_UPDATE_TIMESTAMP = 4, -+ AAUDIO_MSG_GET_PROPERTY = 7, -+ AAUDIO_MSG_GET_PROPERTY_RESPONSE = 8, -+ AAUDIO_MSG_SET_PROPERTY = 9, -+ AAUDIO_MSG_SET_PROPERTY_RESPONSE = 10, -+ AAUDIO_MSG_PROPERTY_LISTENER = 11, -+ AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE = 12, -+ AAUDIO_MSG_PROPERTY_CHANGED = 13, -+ AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES = 18, -+ AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE = 19, -+ AAUDIO_MSG_GET_INPUT_STREAM_LIST = 24, -+ AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE = 25, -+ AAUDIO_MSG_GET_OUTPUT_STREAM_LIST = 26, -+ AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE = 27, -+ AAUDIO_MSG_SET_REMOTE_ACCESS = 32, -+ AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE = 33, -+ AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE = 34, -+ -+ AAUDIO_MSG_NOTIFICATION_ALIVE = 100, -+ AAUDIO_MSG_GET_DEVICE_LIST = 101, -+ AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE = 102, -+ AAUDIO_MSG_NOTIFICATION_BOOT = 104 -+}; -+ -+enum { -+ AAUDIO_REMOTE_ACCESS_OFF = 0, -+ AAUDIO_REMOTE_ACCESS_ON = 2 -+}; -+ -+enum { -+ AAUDIO_PROP_SCOPE_GLOBAL = 0x676c6f62, // 'glob' -+ AAUDIO_PROP_SCOPE_INPUT = 0x696e7074, // 'inpt' -+ AAUDIO_PROP_SCOPE_OUTPUT = 0x6f757470 // 'outp' -+}; -+ -+enum { -+ AAUDIO_PROP_UID = 0x75696420, // 'uid ' -+ AAUDIO_PROP_BOOL_VALUE = 0x6263766c, // 'bcvl' -+ AAUDIO_PROP_JACK_PLUGGED = 0x6a61636b, // 'jack' -+ AAUDIO_PROP_SEL_VOLUME = 0x64656176, // 'deav' -+ AAUDIO_PROP_LATENCY = 0x6c746e63, // 'ltnc' -+ AAUDIO_PROP_PHYS_FORMAT = 0x70667420 // 'pft ' -+}; -+ -+int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base); -+ -+int aaudio_msg_read_start_io_response(struct aaudio_msg *msg); -+int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg); -+int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid, -+ u64 *timestamp, u64 *update_seed); -+int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj, -+ struct aaudio_prop_addr *prop, void **data, u64 *data_size); -+int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj); -+int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg,aaudio_object_id_t *obj, -+ struct aaudio_prop_addr *prop); -+int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj, -+ struct aaudio_prop_addr *prop); -+int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg); -+int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt); -+int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt); -+int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg); -+int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt); -+ -+void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev); -+void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev); -+void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size); -+void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size); -+void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop); -+void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid); -+void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid); -+void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid); -+void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode); -+void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver); -+void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg); -+void aaudio_msg_write_get_device_list(struct aaudio_msg *msg); -+ -+ -+int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid); -+int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid); -+int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf, -+ aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size); -+int aaudio_cmd_get_primitive_property(struct aaudio_device *a, -+ aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size); -+int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size); -+int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, -+ struct aaudio_prop_addr prop); -+int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid); -+int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, -+ aaudio_object_id_t **str_l, u64 *str_cnt); -+int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, -+ aaudio_object_id_t **str_l, u64 *str_cnt); -+int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode); -+int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf, -+ aaudio_device_id_t **dev_l, u64 *dev_cnt); -+ -+ -+ -+#endif //AAUDIO_PROTOCOL_H -diff --git a/drivers/staging/apple-bce/audio/protocol_bce.c b/drivers/staging/apple-bce/audio/protocol_bce.c -new file mode 100644 -index 000000000000..28f2dfd44d67 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/protocol_bce.c -@@ -0,0 +1,226 @@ -+#include "protocol_bce.h" -+ -+#include "audio.h" -+ -+static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq); -+static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq); -+static int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction, -+ bce_sq_completion cfn); -+void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count); -+ -+int aaudio_bce_init(struct aaudio_device *dev) -+{ -+ int status; -+ struct aaudio_bce *bce = &dev->bcem; -+ bce->cq = bce_create_cq(dev->bce, 0x80); -+ spin_lock_init(&bce->spinlock); -+ if (!bce->cq) -+ return -EINVAL; -+ if ((status = aaudio_bce_queue_init(dev, &bce->qout, "com.apple.BridgeAudio.IntelToARM", DMA_TO_DEVICE, -+ aaudio_bce_out_queue_completion))) { -+ return status; -+ } -+ if ((status = aaudio_bce_queue_init(dev, &bce->qin, "com.apple.BridgeAudio.ARMToIntel", DMA_FROM_DEVICE, -+ aaudio_bce_in_queue_completion))) { -+ return status; -+ } -+ aaudio_bce_in_queue_submit_pending(&bce->qin, bce->qin.el_count); -+ return 0; -+} -+ -+int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction, -+ bce_sq_completion cfn) -+{ -+ q->cq = dev->bcem.cq; -+ q->el_size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE; -+ q->el_count = AAUDIO_BCE_QUEUE_ELEMENT_COUNT; -+ /* NOTE: The Apple impl uses 0x80 as the queue size, however we use 21 (in fact 20) to simplify the impl */ -+ q->sq = bce_create_sq(dev->bce, q->cq, name, (u32) (q->el_count + 1), direction, cfn, dev); -+ if (!q->sq) -+ return -EINVAL; -+ -+ q->data = dma_alloc_coherent(&dev->bce->pci->dev, q->el_size * q->el_count, &q->dma_addr, GFP_KERNEL); -+ if (!q->data) { -+ bce_destroy_sq(dev->bce, q->sq); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static void aaudio_send_create_tag(struct aaudio_bce *b, int *tagn, char tag[4]) -+{ -+ char tag_zero[5]; -+ b->tag_num = (b->tag_num + 1) % AAUDIO_BCE_QUEUE_TAG_COUNT; -+ *tagn = b->tag_num; -+ snprintf(tag_zero, 5, "S%03d", b->tag_num); -+ *((u32 *) tag) = *((u32 *) tag_zero); -+} -+ -+int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag) -+{ -+ int status; -+ size_t index; -+ void *dptr; -+ struct aaudio_msg_header *header; -+ if ((status = bce_reserve_submission(b->qout.sq, &ctx->timeout))) -+ return status; -+ spin_lock_irqsave(&b->spinlock, ctx->irq_flags); -+ index = b->qout.data_tail; -+ dptr = (u8 *) b->qout.data + index * b->qout.el_size; -+ ctx->msg.data = dptr; -+ header = dptr; -+ if (tag) -+ *((u32 *) header->tag) = *((u32 *) tag); -+ else -+ aaudio_send_create_tag(b, &ctx->tag_n, header->tag); -+ return 0; -+} -+ -+void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx) -+{ -+ struct bce_qe_submission *s = bce_next_submission(b->qout.sq); -+#ifdef DEBUG -+ pr_debug("aaudio: Sending command data\n"); -+ print_hex_dump(KERN_DEBUG, "aaudio:OUT ", DUMP_PREFIX_NONE, 32, 1, ctx->msg.data, ctx->msg.size, true); -+#endif -+ bce_set_submission_single(s, b->qout.dma_addr + (dma_addr_t) (ctx->msg.data - b->qout.data), ctx->msg.size); -+ bce_submit_to_device(b->qout.sq); -+ b->qout.data_tail = (b->qout.data_tail + 1) % b->qout.el_count; -+ spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags); -+} -+ -+int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply) -+{ -+ struct aaudio_bce_queue_entry ent; -+ DECLARE_COMPLETION_ONSTACK(cmpl); -+ ent.msg = reply; -+ ent.cmpl = &cmpl; -+ b->pending_entries[ctx->tag_n] = &ent; -+ __aaudio_send(b, ctx); /* unlocks the spinlock */ -+ ctx->timeout = wait_for_completion_timeout(&cmpl, ctx->timeout); -+ if (ctx->timeout == 0) { -+ /* Remove the pending queue entry; this will be normally handled by the completion route but -+ * during a timeout it won't */ -+ spin_lock_irqsave(&b->spinlock, ctx->irq_flags); -+ if (b->pending_entries[ctx->tag_n] == &ent) -+ b->pending_entries[ctx->tag_n] = NULL; -+ spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags); -+ return -ETIMEDOUT; -+ } -+ return 0; -+} -+ -+static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply) -+{ -+ const char *tag; -+ int tagn; -+ unsigned long irq_flags; -+ char tag_zero[5]; -+ struct aaudio_bce_queue_entry *entry; -+ -+ tag = ((struct aaudio_msg_header *) reply->data)->tag; -+ if (tag[0] != 'S') { -+ pr_err("aaudio_handle_reply: Unexpected tag: %.4s\n", tag); -+ return; -+ } -+ *((u32 *) tag_zero) = *((u32 *) tag); -+ tag_zero[4] = 0; -+ if (kstrtoint(&tag_zero[1], 10, &tagn)) { -+ pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag); -+ return; -+ } -+ -+ spin_lock_irqsave(&b->spinlock, irq_flags); -+ entry = b->pending_entries[tagn]; -+ if (entry) { -+ if (reply->size < entry->msg->size) -+ entry->msg->size = reply->size; -+ memcpy(entry->msg->data, reply->data, entry->msg->size); -+ complete(entry->cmpl); -+ -+ b->pending_entries[tagn] = NULL; -+ } else { -+ pr_err("aaudio_handle_reply: No queued item found for tag: %.4s\n", tag); -+ } -+ spin_unlock_irqrestore(&b->spinlock, irq_flags); -+} -+ -+static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq) -+{ -+ while (bce_next_completion(sq)) { -+ //pr_info("aaudio: Send confirmed\n"); -+ bce_notify_submission_complete(sq); -+ } -+} -+ -+static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg); -+ -+static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq) -+{ -+ struct aaudio_msg msg; -+ struct aaudio_device *dev = sq->userdata; -+ struct aaudio_bce_queue *q = &dev->bcem.qin; -+ struct bce_sq_completion_data *c; -+ size_t cnt = 0; -+ -+ mb(); -+ while ((c = bce_next_completion(sq))) { -+ msg.data = (u8 *) q->data + q->data_head * q->el_size; -+ msg.size = c->data_size; -+#ifdef DEBUG -+ pr_debug("aaudio: Received command data %llx\n", c->data_size); -+ print_hex_dump(KERN_DEBUG, "aaudio:IN ", DUMP_PREFIX_NONE, 32, 1, msg.data, min(msg.size, 128UL), true); -+#endif -+ aaudio_bce_in_queue_handle_msg(dev, &msg); -+ -+ q->data_head = (q->data_head + 1) % q->el_count; -+ -+ bce_notify_submission_complete(sq); -+ ++cnt; -+ } -+ aaudio_bce_in_queue_submit_pending(q, cnt); -+} -+ -+static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg) -+{ -+ struct aaudio_msg_header *header = (struct aaudio_msg_header *) msg->data; -+ if (msg->size < sizeof(struct aaudio_msg_header)) { -+ pr_err("aaudio: Msg size smaller than header (%lx)", msg->size); -+ return; -+ } -+ if (header->type == AAUDIO_MSG_TYPE_RESPONSE) { -+ aaudio_handle_reply(&a->bcem, msg); -+ } else if (header->type == AAUDIO_MSG_TYPE_COMMAND) { -+ aaudio_handle_command(a, msg); -+ } else if (header->type == AAUDIO_MSG_TYPE_NOTIFICATION) { -+ aaudio_handle_notification(a, msg); -+ } -+} -+ -+void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count) -+{ -+ struct bce_qe_submission *s; -+ while (count--) { -+ if (bce_reserve_submission(q->sq, NULL)) { -+ pr_err("aaudio: Failed to reserve an event queue submission\n"); -+ break; -+ } -+ s = bce_next_submission(q->sq); -+ bce_set_submission_single(s, q->dma_addr + (dma_addr_t) (q->data_tail * q->el_size), q->el_size); -+ q->data_tail = (q->data_tail + 1) % q->el_count; -+ } -+ bce_submit_to_device(q->sq); -+} -+ -+struct aaudio_msg aaudio_reply_alloc(void) -+{ -+ struct aaudio_msg ret; -+ ret.size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE; -+ ret.data = kmalloc(ret.size, GFP_KERNEL); -+ return ret; -+} -+ -+void aaudio_reply_free(struct aaudio_msg *reply) -+{ -+ kfree(reply->data); -+} -diff --git a/drivers/staging/apple-bce/audio/protocol_bce.h b/drivers/staging/apple-bce/audio/protocol_bce.h -new file mode 100644 -index 000000000000..14d26c05ddf9 ---- /dev/null -+++ b/drivers/staging/apple-bce/audio/protocol_bce.h -@@ -0,0 +1,72 @@ -+#ifndef AAUDIO_PROTOCOL_BCE_H -+#define AAUDIO_PROTOCOL_BCE_H -+ -+#include "protocol.h" -+#include "../queue.h" -+ -+#define AAUDIO_BCE_QUEUE_ELEMENT_SIZE 0x1000 -+#define AAUDIO_BCE_QUEUE_ELEMENT_COUNT 20 -+ -+#define AAUDIO_BCE_QUEUE_TAG_COUNT 1000 -+ -+struct aaudio_device; -+ -+struct aaudio_bce_queue_entry { -+ struct aaudio_msg *msg; -+ struct completion *cmpl; -+}; -+struct aaudio_bce_queue { -+ struct bce_queue_cq *cq; -+ struct bce_queue_sq *sq; -+ void *data; -+ dma_addr_t dma_addr; -+ size_t data_head, data_tail; -+ size_t el_size, el_count; -+}; -+struct aaudio_bce { -+ struct bce_queue_cq *cq; -+ struct aaudio_bce_queue qin; -+ struct aaudio_bce_queue qout; -+ int tag_num; -+ struct aaudio_bce_queue_entry *pending_entries[AAUDIO_BCE_QUEUE_TAG_COUNT]; -+ struct spinlock spinlock; -+}; -+ -+struct aaudio_send_ctx { -+ int status; -+ int tag_n; -+ unsigned long irq_flags; -+ struct aaudio_msg msg; -+ unsigned long timeout; -+}; -+ -+int aaudio_bce_init(struct aaudio_device *dev); -+int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag); -+void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx); -+int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply); -+ -+#define aaudio_send_with_tag(a, ctx, tag, tout, fn, ...) ({ \ -+ (ctx)->timeout = msecs_to_jiffies(tout); \ -+ (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), (tag)); \ -+ if (!(ctx)->status) { \ -+ fn(&(ctx)->msg, ##__VA_ARGS__); \ -+ __aaudio_send(&(a)->bcem, (ctx)); \ -+ } \ -+ (ctx)->status; \ -+}) -+#define aaudio_send(a, ctx, tout, fn, ...) aaudio_send_with_tag(a, ctx, NULL, tout, fn, ##__VA_ARGS__) -+ -+#define aaudio_send_cmd_sync(a, ctx, reply, tout, fn, ...) ({ \ -+ (ctx)->timeout = msecs_to_jiffies(tout); \ -+ (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), NULL); \ -+ if (!(ctx)->status) { \ -+ fn(&(ctx)->msg, ##__VA_ARGS__); \ -+ (ctx)->status = __aaudio_send_cmd_sync(&(a)->bcem, (ctx), (reply)); \ -+ } \ -+ (ctx)->status; \ -+}) -+ -+struct aaudio_msg aaudio_reply_alloc(void); -+void aaudio_reply_free(struct aaudio_msg *reply); -+ -+#endif //AAUDIO_PROTOCOL_BCE_H -diff --git a/drivers/staging/apple-bce/mailbox.c b/drivers/staging/apple-bce/mailbox.c -new file mode 100644 -index 000000000000..e24bd35215c0 ---- /dev/null -+++ b/drivers/staging/apple-bce/mailbox.c -@@ -0,0 +1,151 @@ -+#include "mailbox.h" -+#include -+#include "apple_bce.h" -+ -+#define REG_MBOX_OUT_BASE 0x820 -+#define REG_MBOX_REPLY_COUNTER 0x108 -+#define REG_MBOX_REPLY_BASE 0x810 -+#define REG_TIMESTAMP_BASE 0xC000 -+ -+#define BCE_MBOX_TIMEOUT_MS 200 -+ -+void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb) -+{ -+ mb->reg_mb = reg_mb; -+ init_completion(&mb->mb_completion); -+} -+ -+int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv) -+{ -+ u32 __iomem *regb; -+ -+ if (atomic_cmpxchg(&mb->mb_status, 0, 1) != 0) { -+ return -EEXIST; // We don't support two messages at once -+ } -+ reinit_completion(&mb->mb_completion); -+ -+ pr_debug("bce_mailbox_send: %llx\n", msg); -+ regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_OUT_BASE); -+ iowrite32((u32) msg, regb); -+ iowrite32((u32) (msg >> 32), regb + 1); -+ iowrite32(0, regb + 2); -+ iowrite32(0, regb + 3); -+ -+ wait_for_completion_timeout(&mb->mb_completion, msecs_to_jiffies(BCE_MBOX_TIMEOUT_MS)); -+ if (atomic_read(&mb->mb_status) != 2) { // Didn't get the reply -+ atomic_set(&mb->mb_status, 0); -+ return -ETIMEDOUT; -+ } -+ -+ *recv = mb->mb_result; -+ pr_debug("bce_mailbox_send: reply %llx\n", *recv); -+ -+ atomic_set(&mb->mb_status, 0); -+ return 0; -+} -+ -+static int bce_mailbox_retrive_response(struct bce_mailbox *mb) -+{ -+ u32 __iomem *regb; -+ u32 lo, hi; -+ int count, counter; -+ u32 res = ioread32((u8*) mb->reg_mb + REG_MBOX_REPLY_COUNTER); -+ count = (res >> 20) & 0xf; -+ counter = count; -+ pr_debug("bce_mailbox_retrive_response count=%i\n", count); -+ while (counter--) { -+ regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_REPLY_BASE); -+ lo = ioread32(regb); -+ hi = ioread32(regb + 1); -+ ioread32(regb + 2); -+ ioread32(regb + 3); -+ pr_debug("bce_mailbox_retrive_response %llx\n", ((u64) hi << 32) | lo); -+ mb->mb_result = ((u64) hi << 32) | lo; -+ } -+ return count > 0 ? 0 : -ENODATA; -+} -+ -+int bce_mailbox_handle_interrupt(struct bce_mailbox *mb) -+{ -+ int status = bce_mailbox_retrive_response(mb); -+ if (!status) { -+ atomic_set(&mb->mb_status, 2); -+ complete(&mb->mb_completion); -+ } -+ return status; -+} -+ -+static void bc_send_timestamp(struct timer_list *tl); -+ -+void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg) -+{ -+ u32 __iomem *regb; -+ -+ spin_lock_init(&ts->stop_sl); -+ ts->stopped = false; -+ -+ ts->reg = reg; -+ -+ regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); -+ -+ ioread32(regb); -+ mb(); -+ -+ timer_setup(&ts->timer, bc_send_timestamp, 0); -+} -+ -+void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial) -+{ -+ unsigned long flags; -+ u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); -+ -+ if (is_initial) { -+ iowrite32((u32) -4, regb + 2); -+ iowrite32((u32) -1, regb); -+ } else { -+ iowrite32((u32) -3, regb + 2); -+ iowrite32((u32) -1, regb); -+ } -+ -+ spin_lock_irqsave(&ts->stop_sl, flags); -+ ts->stopped = false; -+ spin_unlock_irqrestore(&ts->stop_sl, flags); -+ mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150)); -+} -+ -+void bce_timestamp_stop(struct bce_timestamp *ts) -+{ -+ unsigned long flags; -+ u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); -+ -+ spin_lock_irqsave(&ts->stop_sl, flags); -+ ts->stopped = true; -+ spin_unlock_irqrestore(&ts->stop_sl, flags); -+ del_timer_sync(&ts->timer); -+ -+ iowrite32((u32) -2, regb + 2); -+ iowrite32((u32) -1, regb); -+} -+ -+static void bc_send_timestamp(struct timer_list *tl) -+{ -+ struct bce_timestamp *ts; -+ unsigned long flags; -+ u32 __iomem *regb; -+ ktime_t bt; -+ -+ ts = container_of(tl, struct bce_timestamp, timer); -+ regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); -+ local_irq_save(flags); -+ ioread32(regb + 2); -+ mb(); -+ bt = ktime_get_boottime(); -+ iowrite32((u32) bt, regb + 2); -+ iowrite32((u32) (bt >> 32), regb); -+ -+ spin_lock(&ts->stop_sl); -+ if (!ts->stopped) -+ mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150)); -+ spin_unlock(&ts->stop_sl); -+ local_irq_restore(flags); -+} -\ No newline at end of file -diff --git a/drivers/staging/apple-bce/mailbox.h b/drivers/staging/apple-bce/mailbox.h -new file mode 100644 -index 000000000000..f3323f95ba51 ---- /dev/null -+++ b/drivers/staging/apple-bce/mailbox.h -@@ -0,0 +1,53 @@ -+#ifndef BCE_MAILBOX_H -+#define BCE_MAILBOX_H -+ -+#include -+#include -+#include -+ -+struct bce_mailbox { -+ void __iomem *reg_mb; -+ -+ atomic_t mb_status; // possible statuses: 0 (no msg), 1 (has active msg), 2 (got reply) -+ struct completion mb_completion; -+ uint64_t mb_result; -+}; -+ -+enum bce_message_type { -+ BCE_MB_REGISTER_COMMAND_SQ = 0x7, // to-device -+ BCE_MB_REGISTER_COMMAND_CQ = 0x8, // to-device -+ BCE_MB_REGISTER_COMMAND_QUEUE_REPLY = 0xB, // to-host -+ BCE_MB_SET_FW_PROTOCOL_VERSION = 0xC, // both -+ BCE_MB_SLEEP_NO_STATE = 0x14, // to-device -+ BCE_MB_RESTORE_NO_STATE = 0x15, // to-device -+ BCE_MB_SAVE_STATE_AND_SLEEP = 0x17, // to-device -+ BCE_MB_RESTORE_STATE_AND_WAKE = 0x18, // to-device -+ BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19, // from-device -+ BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A, // from-device -+}; -+ -+#define BCE_MB_MSG(type, value) (((u64) (type) << 58) | ((value) & 0x3FFFFFFFFFFFFFFLL)) -+#define BCE_MB_TYPE(v) ((u32) (v >> 58)) -+#define BCE_MB_VALUE(v) (v & 0x3FFFFFFFFFFFFFFLL) -+ -+void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb); -+ -+int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv); -+ -+int bce_mailbox_handle_interrupt(struct bce_mailbox *mb); -+ -+ -+struct bce_timestamp { -+ void __iomem *reg; -+ struct timer_list timer; -+ struct spinlock stop_sl; -+ bool stopped; -+}; -+ -+void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg); -+ -+void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial); -+ -+void bce_timestamp_stop(struct bce_timestamp *ts); -+ -+#endif //BCEDRIVER_MAILBOX_H -diff --git a/drivers/staging/apple-bce/queue.c b/drivers/staging/apple-bce/queue.c -new file mode 100644 -index 000000000000..bc9cd3bc6f0c ---- /dev/null -+++ b/drivers/staging/apple-bce/queue.c -@@ -0,0 +1,390 @@ -+#include "queue.h" -+#include "apple_bce.h" -+ -+#define REG_DOORBELL_BASE 0x44000 -+ -+struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count) -+{ -+ struct bce_queue_cq *q; -+ q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL); -+ q->qid = qid; -+ q->type = BCE_QUEUE_CQ; -+ q->el_count = el_count; -+ q->data = dma_alloc_coherent(&dev->pci->dev, el_count * sizeof(struct bce_qe_completion), -+ &q->dma_handle, GFP_KERNEL); -+ if (!q->data) { -+ pr_err("DMA queue memory alloc failed\n"); -+ kfree(q); -+ return NULL; -+ } -+ return q; -+} -+ -+void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg) -+{ -+ cfg->qid = (u16) cq->qid; -+ cfg->el_count = (u16) cq->el_count; -+ cfg->vector_or_cq = 0; -+ cfg->_pad = 0; -+ cfg->addr = cq->dma_handle; -+ cfg->length = cq->el_count * sizeof(struct bce_qe_completion); -+} -+ -+void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq) -+{ -+ dma_free_coherent(&dev->pci->dev, cq->el_count * sizeof(struct bce_qe_completion), cq->data, cq->dma_handle); -+ kfree(cq); -+} -+ -+static void bce_handle_cq_completion(struct apple_bce_device *dev, struct bce_qe_completion *e, size_t *ce) -+{ -+ struct bce_queue *target; -+ struct bce_queue_sq *target_sq; -+ struct bce_sq_completion_data *cmpl; -+ if (e->qid >= BCE_MAX_QUEUE_COUNT) { -+ pr_err("Device sent a response for qid (%u) >= BCE_MAX_QUEUE_COUNT\n", e->qid); -+ return; -+ } -+ target = dev->queues[e->qid]; -+ if (!target || target->type != BCE_QUEUE_SQ) { -+ pr_err("Device sent a response for qid (%u), which does not exist\n", e->qid); -+ return; -+ } -+ target_sq = (struct bce_queue_sq *) target; -+ if (target_sq->completion_tail != e->completion_index) { -+ pr_err("Completion index mismatch; this is likely going to make this driver unusable\n"); -+ return; -+ } -+ if (!target_sq->has_pending_completions) { -+ target_sq->has_pending_completions = true; -+ dev->int_sq_list[(*ce)++] = target_sq; -+ } -+ cmpl = &target_sq->completion_data[e->completion_index]; -+ cmpl->status = e->status; -+ cmpl->data_size = e->data_size; -+ cmpl->result = e->result; -+ wmb(); -+ target_sq->completion_tail = (target_sq->completion_tail + 1) % target_sq->el_count; -+} -+ -+void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq) -+{ -+ size_t ce = 0; -+ struct bce_qe_completion *e; -+ struct bce_queue_sq *sq; -+ e = bce_cq_element(cq, cq->index); -+ if (!(e->flags & BCE_COMPLETION_FLAG_PENDING)) -+ return; -+ mb(); -+ while (true) { -+ e = bce_cq_element(cq, cq->index); -+ if (!(e->flags & BCE_COMPLETION_FLAG_PENDING)) -+ break; -+ // pr_info("apple-bce: compl: %i: %i %llx %llx", e->qid, e->status, e->data_size, e->result); -+ bce_handle_cq_completion(dev, e, &ce); -+ e->flags = 0; -+ cq->index = (cq->index + 1) % cq->el_count; -+ } -+ mb(); -+ iowrite32(cq->index, (u32 *) ((u8 *) dev->reg_mem_dma + REG_DOORBELL_BASE) + cq->qid); -+ while (ce) { -+ --ce; -+ sq = dev->int_sq_list[ce]; -+ sq->completion(sq); -+ sq->has_pending_completions = false; -+ } -+} -+ -+ -+struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count, -+ bce_sq_completion compl, void *userdata) -+{ -+ struct bce_queue_sq *q; -+ q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL); -+ q->qid = qid; -+ q->type = BCE_QUEUE_SQ; -+ q->el_size = el_size; -+ q->el_count = el_count; -+ q->data = dma_alloc_coherent(&dev->pci->dev, el_count * el_size, -+ &q->dma_handle, GFP_KERNEL); -+ q->completion = compl; -+ q->userdata = userdata; -+ q->completion_data = kzalloc(sizeof(struct bce_sq_completion_data) * el_count, GFP_KERNEL); -+ q->reg_mem_dma = dev->reg_mem_dma; -+ atomic_set(&q->available_commands, el_count - 1); -+ init_completion(&q->available_command_completion); -+ atomic_set(&q->available_command_completion_waiting_count, 0); -+ if (!q->data) { -+ pr_err("DMA queue memory alloc failed\n"); -+ kfree(q); -+ return NULL; -+ } -+ return q; -+} -+ -+void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg) -+{ -+ cfg->qid = (u16) sq->qid; -+ cfg->el_count = (u16) sq->el_count; -+ cfg->vector_or_cq = (u16) cq->qid; -+ cfg->_pad = 0; -+ cfg->addr = sq->dma_handle; -+ cfg->length = sq->el_count * sq->el_size; -+} -+ -+void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) -+{ -+ dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle); -+ kfree(sq); -+} -+ -+int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout) -+{ -+ while (atomic_dec_if_positive(&sq->available_commands) < 0) { -+ if (!timeout || !*timeout) -+ return -EAGAIN; -+ atomic_inc(&sq->available_command_completion_waiting_count); -+ *timeout = wait_for_completion_timeout(&sq->available_command_completion, *timeout); -+ if (!*timeout) { -+ if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) < 0) -+ try_wait_for_completion(&sq->available_command_completion); /* consume the pending completion */ -+ } -+ } -+ return 0; -+} -+ -+void bce_cancel_submission_reservation(struct bce_queue_sq *sq) -+{ -+ atomic_inc(&sq->available_commands); -+} -+ -+void *bce_next_submission(struct bce_queue_sq *sq) -+{ -+ void *ret = bce_sq_element(sq, sq->tail); -+ sq->tail = (sq->tail + 1) % sq->el_count; -+ return ret; -+} -+ -+void bce_submit_to_device(struct bce_queue_sq *sq) -+{ -+ mb(); -+ iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma + REG_DOORBELL_BASE) + sq->qid); -+} -+ -+void bce_notify_submission_complete(struct bce_queue_sq *sq) -+{ -+ sq->head = (sq->head + 1) % sq->el_count; -+ atomic_inc(&sq->available_commands); -+ if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) >= 0) { -+ complete(&sq->available_command_completion); -+ } -+} -+ -+void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size) -+{ -+ element->addr = addr; -+ element->length = size; -+ element->segl_addr = element->segl_length = 0; -+} -+ -+static void bce_cmdq_completion(struct bce_queue_sq *q); -+ -+struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count) -+{ -+ struct bce_queue_cmdq *q; -+ q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL); -+ q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q); -+ if (!q->sq) { -+ kfree(q); -+ return NULL; -+ } -+ spin_lock_init(&q->lck); -+ q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL); -+ if (!q->tres) { -+ kfree(q); -+ return NULL; -+ } -+ return q; -+} -+ -+void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq) -+{ -+ bce_free_sq(dev, cmdq->sq); -+ kfree(cmdq->tres); -+ kfree(cmdq); -+} -+ -+void bce_cmdq_completion(struct bce_queue_sq *q) -+{ -+ struct bce_queue_cmdq_result_el *el; -+ struct bce_queue_cmdq *cmdq = q->userdata; -+ struct bce_sq_completion_data *result; -+ -+ spin_lock(&cmdq->lck); -+ while ((result = bce_next_completion(q))) { -+ el = cmdq->tres[cmdq->sq->head]; -+ if (el) { -+ el->result = result->result; -+ el->status = result->status; -+ mb(); -+ complete(&el->cmpl); -+ } else { -+ pr_err("apple-bce: Unexpected command queue completion\n"); -+ } -+ cmdq->tres[cmdq->sq->head] = NULL; -+ bce_notify_submission_complete(q); -+ } -+ spin_unlock(&cmdq->lck); -+} -+ -+static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res) -+{ -+ void *ret; -+ unsigned long timeout; -+ init_completion(&res->cmpl); -+ mb(); -+ -+ timeout = msecs_to_jiffies(1000L * 60 * 5); /* wait for up to ~5 minutes */ -+ if (bce_reserve_submission(cmdq->sq, &timeout)) -+ return NULL; -+ -+ spin_lock(&cmdq->lck); -+ cmdq->tres[cmdq->sq->tail] = res; -+ ret = bce_next_submission(cmdq->sq); -+ return ret; -+} -+ -+static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res) -+{ -+ bce_submit_to_device(cmdq->sq); -+ spin_unlock(&cmdq->lck); -+ -+ wait_for_completion(&res->cmpl); -+ mb(); -+} -+ -+u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout) -+{ -+ struct bce_queue_cmdq_result_el res; -+ struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); -+ if (!cmd) -+ return (u32) -1; -+ cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE; -+ cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0)); -+ cmd->qid = cfg->qid; -+ cmd->el_count = cfg->el_count; -+ cmd->vector_or_cq = cfg->vector_or_cq; -+ memset(cmd->name, 0, sizeof(cmd->name)); -+ if (name) { -+ cmd->name_len = (u16) min(strlen(name), (size_t) sizeof(cmd->name)); -+ memcpy(cmd->name, name, cmd->name_len); -+ } else { -+ cmd->name_len = 0; -+ } -+ cmd->addr = cfg->addr; -+ cmd->length = cfg->length; -+ -+ bce_cmd_finish(cmdq, &res); -+ return res.status; -+} -+ -+u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid) -+{ -+ struct bce_queue_cmdq_result_el res; -+ struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); -+ if (!cmd) -+ return (u32) -1; -+ cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE; -+ cmd->flags = 0; -+ cmd->qid = qid; -+ bce_cmd_finish(cmdq, &res); -+ return res.status; -+} -+ -+u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid) -+{ -+ struct bce_queue_cmdq_result_el res; -+ struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); -+ if (!cmd) -+ return (u32) -1; -+ cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE; -+ cmd->flags = 0; -+ cmd->qid = qid; -+ bce_cmd_finish(cmdq, &res); -+ return res.status; -+} -+ -+ -+struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count) -+{ -+ struct bce_queue_cq *cq; -+ struct bce_queue_memcfg cfg; -+ int qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL); -+ if (qid < 0) -+ return NULL; -+ cq = bce_alloc_cq(dev, qid, el_count); -+ if (!cq) -+ return NULL; -+ bce_get_cq_memcfg(cq, &cfg); -+ if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) { -+ pr_err("apple-bce: CQ registration failed (%i)", qid); -+ bce_free_cq(dev, cq); -+ ida_simple_remove(&dev->queue_ida, (uint) qid); -+ return NULL; -+ } -+ dev->queues[qid] = (struct bce_queue *) cq; -+ return cq; -+} -+ -+struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count, -+ int direction, bce_sq_completion compl, void *userdata) -+{ -+ struct bce_queue_sq *sq; -+ struct bce_queue_memcfg cfg; -+ int qid; -+ if (cq == NULL) -+ return NULL; /* cq can not be null */ -+ if (name == NULL) -+ return NULL; /* name can not be null */ -+ if (direction != DMA_TO_DEVICE && direction != DMA_FROM_DEVICE) -+ return NULL; /* unsupported direction */ -+ qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL); -+ if (qid < 0) -+ return NULL; -+ sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata); -+ if (!sq) -+ return NULL; -+ bce_get_sq_memcfg(sq, cq, &cfg); -+ if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) { -+ pr_err("apple-bce: SQ registration failed (%i)", qid); -+ bce_free_sq(dev, sq); -+ ida_simple_remove(&dev->queue_ida, (uint) qid); -+ return NULL; -+ } -+ spin_lock(&dev->queues_lock); -+ dev->queues[qid] = (struct bce_queue *) sq; -+ spin_unlock(&dev->queues_lock); -+ return sq; -+} -+ -+void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq) -+{ -+ if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid)) -+ pr_err("apple-bce: CQ unregister failed"); -+ spin_lock(&dev->queues_lock); -+ dev->queues[cq->qid] = NULL; -+ spin_unlock(&dev->queues_lock); -+ ida_simple_remove(&dev->queue_ida, (uint) cq->qid); -+ bce_free_cq(dev, cq); -+} -+ -+void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) -+{ -+ if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid)) -+ pr_err("apple-bce: CQ unregister failed"); -+ spin_lock(&dev->queues_lock); -+ dev->queues[sq->qid] = NULL; -+ spin_unlock(&dev->queues_lock); -+ ida_simple_remove(&dev->queue_ida, (uint) sq->qid); -+ bce_free_sq(dev, sq); -+} -\ No newline at end of file -diff --git a/drivers/staging/apple-bce/queue.h b/drivers/staging/apple-bce/queue.h -new file mode 100644 -index 000000000000..8368ac5dfca8 ---- /dev/null -+++ b/drivers/staging/apple-bce/queue.h -@@ -0,0 +1,177 @@ -+#ifndef BCE_QUEUE_H -+#define BCE_QUEUE_H -+ -+#include -+#include -+ -+#define BCE_CMD_SIZE 0x40 -+ -+struct apple_bce_device; -+ -+enum bce_queue_type { -+ BCE_QUEUE_CQ, BCE_QUEUE_SQ -+}; -+struct bce_queue { -+ int qid; -+ int type; -+}; -+struct bce_queue_cq { -+ int qid; -+ int type; -+ u32 el_count; -+ dma_addr_t dma_handle; -+ void *data; -+ -+ u32 index; -+}; -+struct bce_queue_sq; -+typedef void (*bce_sq_completion)(struct bce_queue_sq *q); -+struct bce_sq_completion_data { -+ u32 status; -+ u64 data_size; -+ u64 result; -+}; -+struct bce_queue_sq { -+ int qid; -+ int type; -+ u32 el_size; -+ u32 el_count; -+ dma_addr_t dma_handle; -+ void *data; -+ void *userdata; -+ void __iomem *reg_mem_dma; -+ -+ atomic_t available_commands; -+ struct completion available_command_completion; -+ atomic_t available_command_completion_waiting_count; -+ u32 head, tail; -+ -+ u32 completion_cidx, completion_tail; -+ struct bce_sq_completion_data *completion_data; -+ bool has_pending_completions; -+ bce_sq_completion completion; -+}; -+ -+struct bce_queue_cmdq_result_el { -+ struct completion cmpl; -+ u32 status; -+ u64 result; -+}; -+struct bce_queue_cmdq { -+ struct bce_queue_sq *sq; -+ struct spinlock lck; -+ struct bce_queue_cmdq_result_el **tres; -+}; -+ -+struct bce_queue_memcfg { -+ u16 qid; -+ u16 el_count; -+ u16 vector_or_cq; -+ u16 _pad; -+ u64 addr; -+ u64 length; -+}; -+ -+enum bce_qe_completion_status { -+ BCE_COMPLETION_SUCCESS = 0, -+ BCE_COMPLETION_ERROR = 1, -+ BCE_COMPLETION_ABORTED = 2, -+ BCE_COMPLETION_NO_SPACE = 3, -+ BCE_COMPLETION_OVERRUN = 4 -+}; -+enum bce_qe_completion_flags { -+ BCE_COMPLETION_FLAG_PENDING = 0x8000 -+}; -+struct bce_qe_completion { -+ u64 result; -+ u64 data_size; -+ u16 qid; -+ u16 completion_index; -+ u16 status; // bce_qe_completion_status -+ u16 flags; // bce_qe_completion_flags -+}; -+ -+struct bce_qe_submission { -+ u64 length; -+ u64 addr; -+ -+ u64 segl_addr; -+ u64 segl_length; -+}; -+ -+enum bce_cmdq_command { -+ BCE_CMD_REGISTER_MEMORY_QUEUE = 0x20, -+ BCE_CMD_UNREGISTER_MEMORY_QUEUE = 0x30, -+ BCE_CMD_FLUSH_MEMORY_QUEUE = 0x40, -+ BCE_CMD_SET_MEMORY_QUEUE_PROPERTY = 0x50 -+}; -+struct bce_cmdq_simple_memory_queue_cmd { -+ u16 cmd; // bce_cmdq_command -+ u16 flags; -+ u16 qid; -+}; -+struct bce_cmdq_register_memory_queue_cmd { -+ u16 cmd; // bce_cmdq_command -+ u16 flags; -+ u16 qid; -+ u16 _pad; -+ u16 el_count; -+ u16 vector_or_cq; -+ u16 _pad2; -+ u16 name_len; -+ char name[0x20]; -+ u64 addr; -+ u64 length; -+}; -+ -+static __always_inline void *bce_sq_element(struct bce_queue_sq *q, int i) { -+ return (void *) ((u8 *) q->data + q->el_size * i); -+} -+static __always_inline void *bce_cq_element(struct bce_queue_cq *q, int i) { -+ return (void *) ((struct bce_qe_completion *) q->data + i); -+} -+ -+static __always_inline struct bce_sq_completion_data *bce_next_completion(struct bce_queue_sq *sq) { -+ struct bce_sq_completion_data *res; -+ rmb(); -+ if (sq->completion_cidx == sq->completion_tail) -+ return NULL; -+ res = &sq->completion_data[sq->completion_cidx]; -+ sq->completion_cidx = (sq->completion_cidx + 1) % sq->el_count; -+ return res; -+} -+ -+struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count); -+void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg); -+void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq); -+void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq); -+ -+struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count, -+ bce_sq_completion compl, void *userdata); -+void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg); -+void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq); -+int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout); -+void bce_cancel_submission_reservation(struct bce_queue_sq *sq); -+void *bce_next_submission(struct bce_queue_sq *sq); -+void bce_submit_to_device(struct bce_queue_sq *sq); -+void bce_notify_submission_complete(struct bce_queue_sq *sq); -+ -+void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size); -+ -+struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count); -+void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq); -+ -+u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout); -+u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); -+u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); -+ -+ -+/* User API - Creates and registers the queue */ -+ -+struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count); -+struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count, -+ int direction, bce_sq_completion compl, void *userdata); -+void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq); -+void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq); -+ -+#endif //BCEDRIVER_MAILBOX_H -diff --git a/drivers/staging/apple-bce/queue_dma.c b/drivers/staging/apple-bce/queue_dma.c -new file mode 100644 -index 000000000000..b236613285c0 ---- /dev/null -+++ b/drivers/staging/apple-bce/queue_dma.c -@@ -0,0 +1,220 @@ -+#include "queue_dma.h" -+#include -+#include -+#include "queue.h" -+ -+static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len); -+static struct bce_segment_list_element_hostinfo *bce_map_segment_list( -+ struct device *dev, struct scatterlist *pages, int pagen); -+static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list); -+ -+int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist, -+ enum dma_data_direction dir) -+{ -+ int cnt; -+ -+ buf->direction = dir; -+ buf->scatterlist = scatterlist; -+ buf->seglist_hostinfo = NULL; -+ -+ cnt = dma_map_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir); -+ if (cnt != buf->scatterlist.nents) { -+ pr_err("apple-bce: DMA scatter list mapping returned an unexpected count: %i\n", cnt); -+ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir); -+ return -EIO; -+ } -+ if (cnt == 1) -+ return 0; -+ -+ buf->seglist_hostinfo = bce_map_segment_list(dev, buf->scatterlist.sgl, buf->scatterlist.nents); -+ if (!buf->seglist_hostinfo) { -+ pr_err("apple-bce: Creating segment list failed\n"); -+ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir); -+ return -EIO; -+ } -+ return 0; -+} -+ -+int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, -+ enum dma_data_direction dir) -+{ -+ int status; -+ struct sg_table scatterlist; -+ if ((status = bce_alloc_scatterlist_from_vm(&scatterlist, data, len))) -+ return status; -+ if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) { -+ sg_free_table(&scatterlist); -+ return status; -+ } -+ return 0; -+} -+ -+int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, -+ enum dma_data_direction dir) -+{ -+ /* Kernel memory is continuous which is great for us. */ -+ int status; -+ struct sg_table scatterlist; -+ if ((status = sg_alloc_table(&scatterlist, 1, GFP_KERNEL))) { -+ sg_free_table(&scatterlist); -+ return status; -+ } -+ sg_set_buf(scatterlist.sgl, data, (uint) len); -+ if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) { -+ sg_free_table(&scatterlist); -+ return status; -+ } -+ return 0; -+} -+ -+void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf) -+{ -+ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction); -+ bce_unmap_segement_list(dev, buf->seglist_hostinfo); -+} -+ -+ -+static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len) -+{ -+ int status, i; -+ struct page **pages; -+ size_t off, start_page, end_page, page_count; -+ off = (size_t) data % PAGE_SIZE; -+ start_page = (size_t) data / PAGE_SIZE; -+ end_page = ((size_t) data + len - 1) / PAGE_SIZE; -+ page_count = end_page - start_page + 1; -+ -+ if (page_count > PAGE_SIZE / sizeof(struct page *)) -+ pages = vmalloc(page_count * sizeof(struct page *)); -+ else -+ pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL); -+ -+ for (i = 0; i < page_count; i++) -+ pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE)); -+ -+ if ((status = sg_alloc_table_from_pages(tbl, pages, page_count, (unsigned int) off, len, GFP_KERNEL))) { -+ sg_free_table(tbl); -+ } -+ -+ if (page_count > PAGE_SIZE / sizeof(struct page *)) -+ vfree(pages); -+ else -+ kfree(pages); -+ return status; -+} -+ -+#define BCE_ELEMENTS_PER_PAGE ((PAGE_SIZE - sizeof(struct bce_segment_list_header)) \ -+ / sizeof(struct bce_segment_list_element)) -+#define BCE_ELEMENTS_PER_ADDITIONAL_PAGE (PAGE_SIZE / sizeof(struct bce_segment_list_element)) -+ -+static struct bce_segment_list_element_hostinfo *bce_map_segment_list( -+ struct device *dev, struct scatterlist *pages, int pagen) -+{ -+ size_t ptr, pptr = 0; -+ struct bce_segment_list_header theader; /* a temp header, to store the initial seg */ -+ struct bce_segment_list_header *header; -+ struct bce_segment_list_element *el, *el_end; -+ struct bce_segment_list_element_hostinfo *out, *pout, *out_root; -+ struct scatterlist *sg; -+ int i; -+ header = &theader; -+ out = out_root = NULL; -+ el = el_end = NULL; -+ for_each_sg(pages, sg, pagen, i) { -+ if (el >= el_end) { -+ /* allocate a new page, this will be also done for the first element */ -+ ptr = __get_free_page(GFP_KERNEL); -+ if (pptr && ptr == pptr + PAGE_SIZE) { -+ out->page_count++; -+ header->element_count += BCE_ELEMENTS_PER_ADDITIONAL_PAGE; -+ el_end += BCE_ELEMENTS_PER_ADDITIONAL_PAGE; -+ } else { -+ header = (void *) ptr; -+ header->element_count = BCE_ELEMENTS_PER_PAGE; -+ header->data_size = 0; -+ header->next_segl_addr = 0; -+ header->next_segl_length = 0; -+ el = (void *) (header + 1); -+ el_end = el + BCE_ELEMENTS_PER_PAGE; -+ -+ if (out) { -+ out->next = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL); -+ out = out->next; -+ } else { -+ out_root = out = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL); -+ } -+ out->page_start = (void *) ptr; -+ out->page_count = 1; -+ out->dma_start = DMA_MAPPING_ERROR; -+ out->next = NULL; -+ } -+ pptr = ptr; -+ } -+ el->addr = sg->dma_address; -+ el->length = sg->length; -+ header->data_size += el->length; -+ } -+ -+ /* DMA map */ -+ out = out_root; -+ pout = NULL; -+ while (out) { -+ out->dma_start = dma_map_single(dev, out->page_start, out->page_count * PAGE_SIZE, DMA_TO_DEVICE); -+ if (dma_mapping_error(dev, out->dma_start)) -+ goto error; -+ if (pout) { -+ header = pout->page_start; -+ header->next_segl_addr = out->dma_start; -+ header->next_segl_length = out->page_count * PAGE_SIZE; -+ } -+ pout = out; -+ out = out->next; -+ } -+ return out_root; -+ -+ error: -+ bce_unmap_segement_list(dev, out_root); -+ return NULL; -+} -+ -+static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list) -+{ -+ struct bce_segment_list_element_hostinfo *next; -+ while (list) { -+ if (list->dma_start != DMA_MAPPING_ERROR) -+ dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE); -+ next = list->next; -+ kfree(list); -+ list = next; -+ } -+} -+ -+int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length) -+{ -+ struct bce_segment_list_element_hostinfo *seg; -+ struct bce_segment_list_header *seg_header; -+ -+ seg = buf->seglist_hostinfo; -+ if (!seg) { -+ element->addr = buf->scatterlist.sgl->dma_address + offset; -+ element->length = length; -+ element->segl_addr = 0; -+ element->segl_length = 0; -+ return 0; -+ } -+ -+ while (seg) { -+ seg_header = seg->page_start; -+ if (offset <= seg_header->data_size) -+ break; -+ offset -= seg_header->data_size; -+ seg = seg->next; -+ } -+ if (!seg) -+ return -EINVAL; -+ element->addr = offset; -+ element->length = buf->scatterlist.sgl->dma_length; -+ element->segl_addr = seg->dma_start; -+ element->segl_length = seg->page_count * PAGE_SIZE; -+ return 0; -+} -\ No newline at end of file -diff --git a/drivers/staging/apple-bce/queue_dma.h b/drivers/staging/apple-bce/queue_dma.h -new file mode 100644 -index 000000000000..f8a57e50e7a3 ---- /dev/null -+++ b/drivers/staging/apple-bce/queue_dma.h -@@ -0,0 +1,50 @@ -+#ifndef BCE_QUEUE_DMA_H -+#define BCE_QUEUE_DMA_H -+ -+#include -+ -+struct bce_qe_submission; -+ -+struct bce_segment_list_header { -+ u64 element_count; -+ u64 data_size; -+ -+ u64 next_segl_addr; -+ u64 next_segl_length; -+}; -+struct bce_segment_list_element { -+ u64 addr; -+ u64 length; -+}; -+ -+struct bce_segment_list_element_hostinfo { -+ struct bce_segment_list_element_hostinfo *next; -+ void *page_start; -+ size_t page_count; -+ dma_addr_t dma_start; -+}; -+ -+ -+struct bce_dma_buffer { -+ enum dma_data_direction direction; -+ struct sg_table scatterlist; -+ struct bce_segment_list_element_hostinfo *seglist_hostinfo; -+}; -+ -+/* NOTE: Takes ownership of the sg_table if it succeeds. Ownership is not transferred on failure. */ -+int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist, -+ enum dma_data_direction dir); -+ -+/* Creates a buffer from virtual memory (vmalloc) */ -+int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, -+ enum dma_data_direction dir); -+ -+/* Creates a buffer from kernel memory (kmalloc) */ -+int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, -+ enum dma_data_direction dir); -+ -+void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf); -+ -+int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length); -+ -+#endif //BCE_QUEUE_DMA_H -diff --git a/drivers/staging/apple-bce/vhci/command.h b/drivers/staging/apple-bce/vhci/command.h -new file mode 100644 -index 000000000000..26619e0bccfa ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/command.h -@@ -0,0 +1,204 @@ -+#ifndef BCE_VHCI_COMMAND_H -+#define BCE_VHCI_COMMAND_H -+ -+#include "queue.h" -+#include -+#include -+ -+#define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000) -+#define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000) -+ -+#define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2 -+#define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2) -+ -+typedef u8 bce_vhci_port_t; -+typedef u8 bce_vhci_device_t; -+ -+enum bce_vhci_command { -+ BCE_VHCI_CMD_CONTROLLER_ENABLE = 1, -+ BCE_VHCI_CMD_CONTROLLER_DISABLE = 2, -+ BCE_VHCI_CMD_CONTROLLER_START = 3, -+ BCE_VHCI_CMD_CONTROLLER_PAUSE = 4, -+ -+ BCE_VHCI_CMD_PORT_POWER_ON = 0x10, -+ BCE_VHCI_CMD_PORT_POWER_OFF = 0x11, -+ BCE_VHCI_CMD_PORT_RESUME = 0x12, -+ BCE_VHCI_CMD_PORT_SUSPEND = 0x13, -+ BCE_VHCI_CMD_PORT_RESET = 0x14, -+ BCE_VHCI_CMD_PORT_DISABLE = 0x15, -+ BCE_VHCI_CMD_PORT_STATUS = 0x16, -+ -+ BCE_VHCI_CMD_DEVICE_CREATE = 0x30, -+ BCE_VHCI_CMD_DEVICE_DESTROY = 0x31, -+ -+ BCE_VHCI_CMD_ENDPOINT_CREATE = 0x40, -+ BCE_VHCI_CMD_ENDPOINT_DESTROY = 0x41, -+ BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42, -+ BCE_VHCI_CMD_ENDPOINT_RESET = 0x44, -+ -+ /* Device to host only */ -+ BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43, -+ BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000, -+ BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS = 0x1005 -+}; -+ -+enum bce_vhci_endpoint_state { -+ BCE_VHCI_ENDPOINT_ACTIVE = 0, -+ BCE_VHCI_ENDPOINT_PAUSED = 1, -+ BCE_VHCI_ENDPOINT_STALLED = 2 -+}; -+ -+static inline int bce_vhci_cmd_controller_enable(struct bce_vhci_command_queue *q, u8 busNum, u16 *portMask) -+{ -+ int status; -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_ENABLE; -+ cmd.param1 = 0x7100u | busNum; -+ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+ if (!status) -+ *portMask = (u16) res.param2; -+ return status; -+} -+static inline int bce_vhci_cmd_controller_disable(struct bce_vhci_command_queue *q) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_DISABLE; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+} -+static inline int bce_vhci_cmd_controller_start(struct bce_vhci_command_queue *q) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_START; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+} -+static inline int bce_vhci_cmd_controller_pause(struct bce_vhci_command_queue *q) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+} -+ -+static inline int bce_vhci_cmd_port_power_on(struct bce_vhci_command_queue *q, bce_vhci_port_t port) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_POWER_ON; -+ cmd.param1 = port; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+static inline int bce_vhci_cmd_port_power_off(struct bce_vhci_command_queue *q, bce_vhci_port_t port) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_POWER_OFF; -+ cmd.param1 = port; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+static inline int bce_vhci_cmd_port_resume(struct bce_vhci_command_queue *q, bce_vhci_port_t port) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_RESUME; -+ cmd.param1 = port; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+} -+static inline int bce_vhci_cmd_port_suspend(struct bce_vhci_command_queue *q, bce_vhci_port_t port) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_SUSPEND; -+ cmd.param1 = port; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+} -+static inline int bce_vhci_cmd_port_reset(struct bce_vhci_command_queue *q, bce_vhci_port_t port, u32 timeout) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_RESET; -+ cmd.param1 = port; -+ cmd.param2 = timeout; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+static inline int bce_vhci_cmd_port_disable(struct bce_vhci_command_queue *q, bce_vhci_port_t port) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_DISABLE; -+ cmd.param1 = port; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+static inline int bce_vhci_cmd_port_status(struct bce_vhci_command_queue *q, bce_vhci_port_t port, -+ u32 clearFlags, u32 *resStatus) -+{ -+ int status; -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_PORT_STATUS; -+ cmd.param1 = port; -+ cmd.param2 = clearFlags & 0x560000; -+ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+ if (status >= 0) -+ *resStatus = (u32) res.param2; -+ return status; -+} -+ -+static inline int bce_vhci_cmd_device_create(struct bce_vhci_command_queue *q, bce_vhci_port_t port, -+ bce_vhci_device_t *dev) -+{ -+ int status; -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_DEVICE_CREATE; -+ cmd.param1 = port; -+ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+ if (!status) -+ *dev = (bce_vhci_device_t) res.param2; -+ return status; -+} -+static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY; -+ cmd.param1 = dev; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); -+} -+ -+static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, -+ struct usb_endpoint_descriptor *desc) -+{ -+ struct bce_vhci_message cmd, res; -+ int endpoint_type = usb_endpoint_type(desc); -+ int maxp = usb_endpoint_maxp(desc); -+ int maxp_burst = usb_endpoint_maxp_mult(desc) * maxp; -+ u8 max_active_requests_pow2 = 0; -+ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_CREATE; -+ cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8); -+ if (endpoint_type == USB_ENDPOINT_XFER_BULK) -+ max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2; -+ cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32); -+ if (endpoint_type == USB_ENDPOINT_XFER_INT) -+ cmd.param2 |= (desc->bInterval - 1) << 8; -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+static inline int bce_vhci_cmd_endpoint_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_DESTROY; -+ cmd.param1 = dev | (endpoint << 8); -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint, -+ enum bce_vhci_endpoint_state newState, enum bce_vhci_endpoint_state *retState) -+{ -+ int status; -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_SET_STATE; -+ cmd.param1 = dev | (endpoint << 8); -+ cmd.param2 = (u64) newState; -+ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+ if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER) -+ *retState = (enum bce_vhci_endpoint_state) res.param2; -+ return status; -+} -+static inline int bce_vhci_cmd_endpoint_reset(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint) -+{ -+ struct bce_vhci_message cmd, res; -+ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_RESET; -+ cmd.param1 = dev | (endpoint << 8); -+ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); -+} -+ -+ -+#endif //BCE_VHCI_COMMAND_H -diff --git a/drivers/staging/apple-bce/vhci/queue.c b/drivers/staging/apple-bce/vhci/queue.c -new file mode 100644 -index 000000000000..7b0b5027157b ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/queue.c -@@ -0,0 +1,268 @@ -+#include "queue.h" -+#include "vhci.h" -+#include "../apple_bce.h" -+ -+ -+static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq); -+ -+int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name) -+{ -+ int status; -+ ret->cq = bce_create_cq(vhci->dev, VHCI_EVENT_QUEUE_EL_COUNT); -+ if (!ret->cq) -+ return -EINVAL; -+ ret->sq = bce_create_sq(vhci->dev, ret->cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_TO_DEVICE, -+ bce_vhci_message_queue_completion, ret); -+ if (!ret->sq) { -+ status = -EINVAL; -+ goto fail_cq; -+ } -+ ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, -+ &ret->dma_addr, GFP_KERNEL); -+ if (!ret->data) { -+ status = -EINVAL; -+ goto fail_sq; -+ } -+ return 0; -+ -+fail_sq: -+ bce_destroy_sq(vhci->dev, ret->sq); -+ ret->sq = NULL; -+fail_cq: -+ bce_destroy_cq(vhci->dev, ret->cq); -+ ret->cq = NULL; -+ return status; -+} -+ -+void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q) -+{ -+ if (!q->cq) -+ return; -+ dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, -+ q->data, q->dma_addr); -+ bce_destroy_sq(vhci->dev, q->sq); -+ bce_destroy_cq(vhci->dev, q->cq); -+} -+ -+void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req) -+{ -+ int sidx; -+ struct bce_qe_submission *s; -+ sidx = q->sq->tail; -+ s = bce_next_submission(q->sq); -+ pr_debug("bce-vhci: Send message: %x s=%x p1=%x p2=%llx\n", req->cmd, req->status, req->param1, req->param2); -+ q->data[sidx] = *req; -+ bce_set_submission_single(s, q->dma_addr + sizeof(struct bce_vhci_message) * sidx, -+ sizeof(struct bce_vhci_message)); -+ bce_submit_to_device(q->sq); -+} -+ -+static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq) -+{ -+ while (bce_next_completion(sq)) -+ bce_notify_submission_complete(sq); -+} -+ -+ -+ -+static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq); -+ -+int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, -+ bce_sq_completion compl) -+{ -+ ret->vhci = vhci; -+ -+ ret->sq = bce_create_sq(vhci->dev, vhci->ev_cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_FROM_DEVICE, compl, ret); -+ if (!ret->sq) -+ return -EINVAL; -+ ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, -+ &ret->dma_addr, GFP_KERNEL); -+ if (!ret->data) { -+ bce_destroy_sq(vhci->dev, ret->sq); -+ ret->sq = NULL; -+ return -EINVAL; -+ } -+ -+ init_completion(&ret->queue_empty_completion); -+ bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT); -+ return 0; -+} -+ -+int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, -+ bce_vhci_event_queue_callback cb) -+{ -+ ret->cb = cb; -+ return __bce_vhci_event_queue_create(vhci, ret, name, bce_vhci_event_queue_completion); -+} -+ -+void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q) -+{ -+ if (!q->sq) -+ return; -+ dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, -+ q->data, q->dma_addr); -+ bce_destroy_sq(vhci->dev, q->sq); -+} -+ -+static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq) -+{ -+ struct bce_sq_completion_data *cd; -+ struct bce_vhci_event_queue *ev = sq->userdata; -+ struct bce_vhci_message *msg; -+ size_t cnt = 0; -+ -+ while ((cd = bce_next_completion(sq))) { -+ if (cd->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */ -+ bce_notify_submission_complete(sq); -+ continue; -+ } -+ msg = &ev->data[sq->head]; -+ pr_debug("bce-vhci: Got event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2); -+ ev->cb(ev, msg); -+ -+ bce_notify_submission_complete(sq); -+ ++cnt; -+ } -+ bce_vhci_event_queue_submit_pending(ev, cnt); -+ if (atomic_read(&sq->available_commands) == sq->el_count - 1) -+ complete(&ev->queue_empty_completion); -+} -+ -+void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count) -+{ -+ int idx; -+ struct bce_qe_submission *s; -+ while (count--) { -+ if (bce_reserve_submission(q->sq, NULL)) { -+ pr_err("bce-vhci: Failed to reserve an event queue submission\n"); -+ break; -+ } -+ idx = q->sq->tail; -+ s = bce_next_submission(q->sq); -+ bce_set_submission_single(s, -+ q->dma_addr + idx * sizeof(struct bce_vhci_message), sizeof(struct bce_vhci_message)); -+ } -+ bce_submit_to_device(q->sq); -+} -+ -+void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q) -+{ -+ unsigned long timeout; -+ reinit_completion(&q->queue_empty_completion); -+ if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid)) -+ pr_warn("bce-vhci: failed to flush event queue\n"); -+ timeout = msecs_to_jiffies(5000); -+ while (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) { -+ timeout = wait_for_completion_timeout(&q->queue_empty_completion, timeout); -+ if (timeout == 0) { -+ pr_err("bce-vhci: waiting for queue to be flushed timed out\n"); -+ break; -+ } -+ } -+} -+ -+void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q) -+{ -+ if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) { -+ pr_err("bce-vhci: resume of a queue with pending submissions\n"); -+ return; -+ } -+ bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT); -+} -+ -+void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq) -+{ -+ ret->mq = mq; -+ ret->completion.result = NULL; -+ init_completion(&ret->completion.completion); -+ spin_lock_init(&ret->completion_lock); -+ mutex_init(&ret->mutex); -+} -+ -+void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq) -+{ -+ spin_lock(&cq->completion_lock); -+ if (cq->completion.result) { -+ memset(cq->completion.result, 0, sizeof(struct bce_vhci_message)); -+ cq->completion.result->status = BCE_VHCI_ABORT; -+ complete(&cq->completion.completion); -+ cq->completion.result = NULL; -+ } -+ spin_unlock(&cq->completion_lock); -+ mutex_lock(&cq->mutex); -+ mutex_unlock(&cq->mutex); -+ mutex_destroy(&cq->mutex); -+} -+ -+void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg) -+{ -+ struct bce_vhci_command_queue_completion *c = &cq->completion; -+ -+ spin_lock(&cq->completion_lock); -+ if (c->result) { -+ *c->result = *msg; -+ complete(&c->completion); -+ c->result = NULL; -+ } -+ spin_unlock(&cq->completion_lock); -+} -+ -+static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req, -+ struct bce_vhci_message *res, unsigned long timeout) -+{ -+ int status; -+ struct bce_vhci_command_queue_completion *c; -+ struct bce_vhci_message creq; -+ c = &cq->completion; -+ -+ if ((status = bce_reserve_submission(cq->mq->sq, &timeout))) -+ return status; -+ -+ spin_lock(&cq->completion_lock); -+ c->result = res; -+ reinit_completion(&c->completion); -+ spin_unlock(&cq->completion_lock); -+ -+ bce_vhci_message_queue_write(cq->mq, req); -+ -+ if (!wait_for_completion_timeout(&c->completion, timeout)) { -+ /* we ran out of time, send cancellation */ -+ pr_debug("bce-vhci: command timed out req=%x\n", req->cmd); -+ if ((status = bce_reserve_submission(cq->mq->sq, &timeout))) -+ return status; -+ -+ creq = *req; -+ creq.cmd |= 0x4000; -+ bce_vhci_message_queue_write(cq->mq, &creq); -+ -+ if (!wait_for_completion_timeout(&c->completion, 1000)) { -+ pr_err("bce-vhci: Possible desync, cmd cancel timed out\n"); -+ -+ spin_lock(&cq->completion_lock); -+ c->result = NULL; -+ spin_unlock(&cq->completion_lock); -+ return -ETIMEDOUT; -+ } -+ if ((res->cmd & ~0x8000) == creq.cmd) -+ return -ETIMEDOUT; -+ /* reply for the previous command most likely arrived */ -+ } -+ -+ if ((res->cmd & ~0x8000) != req->cmd) { -+ pr_err("bce-vhci: Possible desync, cmd reply mismatch req=%x, res=%x\n", req->cmd, res->cmd); -+ return -EIO; -+ } -+ if (res->status == BCE_VHCI_SUCCESS) -+ return 0; -+ return res->status; -+} -+ -+int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req, -+ struct bce_vhci_message *res, unsigned long timeout) -+{ -+ int status; -+ mutex_lock(&cq->mutex); -+ status = __bce_vhci_command_queue_execute(cq, req, res, timeout); -+ mutex_unlock(&cq->mutex); -+ return status; -+} -diff --git a/drivers/staging/apple-bce/vhci/queue.h b/drivers/staging/apple-bce/vhci/queue.h -new file mode 100644 -index 000000000000..adb705b6ba1d ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/queue.h -@@ -0,0 +1,76 @@ -+#ifndef BCE_VHCI_QUEUE_H -+#define BCE_VHCI_QUEUE_H -+ -+#include -+#include "../queue.h" -+ -+#define VHCI_EVENT_QUEUE_EL_COUNT 256 -+#define VHCI_EVENT_PENDING_COUNT 32 -+ -+struct bce_vhci; -+struct bce_vhci_event_queue; -+ -+enum bce_vhci_message_status { -+ BCE_VHCI_SUCCESS = 1, -+ BCE_VHCI_ERROR = 2, -+ BCE_VHCI_USB_PIPE_STALL = 3, -+ BCE_VHCI_ABORT = 4, -+ BCE_VHCI_BAD_ARGUMENT = 5, -+ BCE_VHCI_OVERRUN = 6, -+ BCE_VHCI_INTERNAL_ERROR = 7, -+ BCE_VHCI_NO_POWER = 8, -+ BCE_VHCI_UNSUPPORTED = 9 -+}; -+struct bce_vhci_message { -+ u16 cmd; -+ u16 status; // bce_vhci_message_status -+ u32 param1; -+ u64 param2; -+}; -+ -+struct bce_vhci_message_queue { -+ struct bce_queue_cq *cq; -+ struct bce_queue_sq *sq; -+ struct bce_vhci_message *data; -+ dma_addr_t dma_addr; -+}; -+typedef void (*bce_vhci_event_queue_callback)(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg); -+struct bce_vhci_event_queue { -+ struct bce_vhci *vhci; -+ struct bce_queue_sq *sq; -+ struct bce_vhci_message *data; -+ dma_addr_t dma_addr; -+ bce_vhci_event_queue_callback cb; -+ struct completion queue_empty_completion; -+}; -+struct bce_vhci_command_queue_completion { -+ struct bce_vhci_message *result; -+ struct completion completion; -+}; -+struct bce_vhci_command_queue { -+ struct bce_vhci_message_queue *mq; -+ struct bce_vhci_command_queue_completion completion; -+ struct spinlock completion_lock; -+ struct mutex mutex; -+}; -+ -+int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name); -+void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q); -+void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req); -+ -+int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, -+ bce_sq_completion compl); -+int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, -+ bce_vhci_event_queue_callback cb); -+void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q); -+void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count); -+void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q); -+void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q); -+ -+void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq); -+void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq); -+int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req, -+ struct bce_vhci_message *res, unsigned long timeout); -+void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg); -+ -+#endif //BCE_VHCI_QUEUE_H -diff --git a/drivers/staging/apple-bce/vhci/transfer.c b/drivers/staging/apple-bce/vhci/transfer.c -new file mode 100644 -index 000000000000..8226363d69c8 ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/transfer.c -@@ -0,0 +1,661 @@ -+#include "transfer.h" -+#include "../queue.h" -+#include "vhci.h" -+#include "../apple_bce.h" -+#include -+ -+static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq); -+static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q); -+static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q); -+ -+static int bce_vhci_urb_init(struct bce_vhci_urb *vurb); -+static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg); -+static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c); -+ -+static void bce_vhci_transfer_queue_reset_w(struct work_struct *work); -+ -+void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q, -+ struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir) -+{ -+ char name[0x21]; -+ INIT_LIST_HEAD(&q->evq); -+ INIT_LIST_HEAD(&q->giveback_urb_list); -+ spin_lock_init(&q->urb_lock); -+ mutex_init(&q->pause_lock); -+ q->vhci = vhci; -+ q->endp = endp; -+ q->dev_addr = dev_addr; -+ q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F); -+ q->state = BCE_VHCI_ENDPOINT_ACTIVE; -+ q->active = true; -+ q->stalled = false; -+ q->max_active_requests = 1; -+ if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK) -+ q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS; -+ q->remaining_active_requests = q->max_active_requests; -+ q->cq = bce_create_cq(vhci->dev, 0x100); -+ INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w); -+ q->sq_in = NULL; -+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { -+ snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc)); -+ q->sq_in = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_FROM_DEVICE, -+ bce_vhci_transfer_queue_completion, q); -+ } -+ q->sq_out = NULL; -+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) { -+ snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, usb_endpoint_num(&endp->desc)); -+ q->sq_out = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_TO_DEVICE, -+ bce_vhci_transfer_queue_completion, q); -+ } -+} -+ -+void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q) -+{ -+ bce_vhci_transfer_queue_giveback(q); -+ bce_vhci_transfer_queue_remove_pending(q); -+ if (q->sq_in) -+ bce_destroy_sq(vhci->dev, q->sq_in); -+ if (q->sq_out) -+ bce_destroy_sq(vhci->dev, q->sq_out); -+ bce_destroy_cq(vhci->dev, q->cq); -+} -+ -+static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer_queue *q) -+{ -+ return q->remaining_active_requests > 0; -+} -+ -+static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg) -+{ -+ struct bce_vhci_list_message *lm; -+ lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL); -+ INIT_LIST_HEAD(&lm->list); -+ lm->msg = *msg; -+ list_add_tail(&lm->list, &q->evq); -+} -+ -+static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q) -+{ -+ unsigned long flags; -+ struct urb *urb; -+ spin_lock_irqsave(&q->urb_lock, flags); -+ while (!list_empty(&q->giveback_urb_list)) { -+ urb = list_first_entry(&q->giveback_urb_list, struct urb, urb_list); -+ list_del(&urb->urb_list); -+ -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ usb_hcd_giveback_urb(q->vhci->hcd, urb, urb->status); -+ spin_lock_irqsave(&q->urb_lock, flags); -+ } -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+} -+ -+static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q); -+ -+static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_queue *q) -+{ -+ struct urb *urb; -+ struct bce_vhci_list_message *lm; -+ -+ while (!list_empty(&q->endp->urb_list) && !list_empty(&q->evq)) { -+ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); -+ -+ lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list); -+ if (bce_vhci_urb_update(urb->hcpriv, &lm->msg) == -EAGAIN) -+ break; -+ list_del(&lm->list); -+ kfree(lm); -+ } -+ -+ /* some of the URBs could have been completed, so initialize more URBs if possible */ -+ bce_vhci_transfer_queue_init_pending_urbs(q); -+} -+ -+static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q) -+{ -+ unsigned long flags; -+ struct bce_vhci_list_message *lm; -+ spin_lock_irqsave(&q->urb_lock, flags); -+ while (!list_empty(&q->evq)) { -+ lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list); -+ list_del(&lm->list); -+ kfree(lm); -+ } -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+} -+ -+void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg) -+{ -+ unsigned long flags; -+ struct bce_vhci_urb *turb; -+ struct urb *urb; -+ spin_lock_irqsave(&q->urb_lock, flags); -+ bce_vhci_transfer_queue_deliver_pending(q); -+ -+ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && -+ (!list_empty(&q->evq) || list_empty(&q->endp->urb_list))) { -+ bce_vhci_transfer_queue_defer_event(q, msg); -+ goto complete; -+ } -+ if (list_empty(&q->endp->urb_list)) { -+ pr_err("bce-vhci: [%02x] Unexpected transfer queue event\n", q->endp_addr); -+ goto complete; -+ } -+ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); -+ turb = urb->hcpriv; -+ if (bce_vhci_urb_update(turb, msg) == -EAGAIN) { -+ bce_vhci_transfer_queue_defer_event(q, msg); -+ } else { -+ bce_vhci_transfer_queue_init_pending_urbs(q); -+ } -+ -+complete: -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ bce_vhci_transfer_queue_giveback(q); -+} -+ -+static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq) -+{ -+ unsigned long flags; -+ struct bce_sq_completion_data *c; -+ struct urb *urb; -+ struct bce_vhci_transfer_queue *q = sq->userdata; -+ spin_lock_irqsave(&q->urb_lock, flags); -+ while ((c = bce_next_completion(sq))) { -+ if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */ -+ pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr); -+ bce_notify_submission_complete(sq); -+ continue; -+ } -+ if (list_empty(&q->endp->urb_list)) { -+ pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr); -+ continue; -+ } -+ pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr); -+ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); -+ bce_vhci_urb_transfer_completion(urb->hcpriv, c); -+ bce_notify_submission_complete(sq); -+ } -+ bce_vhci_transfer_queue_deliver_pending(q); -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ bce_vhci_transfer_queue_giveback(q); -+} -+ -+int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q) -+{ -+ unsigned long flags; -+ int status; -+ u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F); -+ spin_lock_irqsave(&q->urb_lock, flags); -+ q->active = false; -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ if (q->sq_out) { -+ pr_err("bce-vhci: Not implemented: wait for pending output requests\n"); -+ } -+ bce_vhci_transfer_queue_remove_pending(q); -+ if ((status = bce_vhci_cmd_endpoint_set_state( -+ &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state))) -+ return status; -+ if (q->state != BCE_VHCI_ENDPOINT_PAUSED) -+ return -EINVAL; -+ if (q->sq_in) -+ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid); -+ if (q->sq_out) -+ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid); -+ return 0; -+} -+ -+static void bce_vhci_urb_resume(struct bce_vhci_urb *urb); -+ -+int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q) -+{ -+ unsigned long flags; -+ int status; -+ struct urb *urb, *urbt; -+ struct bce_vhci_urb *vurb; -+ u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F); -+ if ((status = bce_vhci_cmd_endpoint_set_state( -+ &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state))) -+ return status; -+ if (q->state != BCE_VHCI_ENDPOINT_ACTIVE) -+ return -EINVAL; -+ spin_lock_irqsave(&q->urb_lock, flags); -+ q->active = true; -+ list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) { -+ vurb = urb->hcpriv; -+ if (vurb->state == BCE_VHCI_URB_INIT_PENDING) { -+ if (!bce_vhci_transfer_queue_can_init_urb(q)) -+ break; -+ bce_vhci_urb_init(vurb); -+ } else { -+ bce_vhci_urb_resume(vurb); -+ } -+ } -+ bce_vhci_transfer_queue_deliver_pending(q); -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ return 0; -+} -+ -+int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src) -+{ -+ int ret = 0; -+ mutex_lock(&q->pause_lock); -+ if ((q->paused_by & src) != src) { -+ if (!q->paused_by) -+ ret = bce_vhci_transfer_queue_do_pause(q); -+ if (!ret) -+ q->paused_by |= src; -+ } -+ mutex_unlock(&q->pause_lock); -+ return ret; -+} -+ -+int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src) -+{ -+ int ret = 0; -+ mutex_lock(&q->pause_lock); -+ if (q->paused_by & src) { -+ if (!(q->paused_by & ~src)) -+ ret = bce_vhci_transfer_queue_do_resume(q); -+ if (!ret) -+ q->paused_by &= ~src; -+ } -+ mutex_unlock(&q->pause_lock); -+ return ret; -+} -+ -+static void bce_vhci_transfer_queue_reset_w(struct work_struct *work) -+{ -+ unsigned long flags; -+ struct bce_vhci_transfer_queue *q = container_of(work, struct bce_vhci_transfer_queue, w_reset); -+ -+ mutex_lock(&q->pause_lock); -+ spin_lock_irqsave(&q->urb_lock, flags); -+ if (!q->stalled) { -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ mutex_unlock(&q->pause_lock); -+ return; -+ } -+ q->active = false; -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ q->paused_by |= BCE_VHCI_PAUSE_INTERNAL_WQ; -+ bce_vhci_transfer_queue_remove_pending(q); -+ if (q->sq_in) -+ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid); -+ if (q->sq_out) -+ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid); -+ bce_vhci_cmd_endpoint_reset(&q->vhci->cq, q->dev_addr, (u8) (q->endp->desc.bEndpointAddress & 0x8F)); -+ spin_lock_irqsave(&q->urb_lock, flags); -+ q->stalled = false; -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ mutex_unlock(&q->pause_lock); -+ bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); -+} -+ -+void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q) -+{ -+ queue_work(q->vhci->tq_state_wq, &q->w_reset); -+} -+ -+static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q) -+{ -+ struct urb *urb, *urbt; -+ struct bce_vhci_urb *vurb; -+ list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) { -+ vurb = urb->hcpriv; -+ if (!bce_vhci_transfer_queue_can_init_urb(q)) -+ break; -+ if (vurb->state == BCE_VHCI_URB_INIT_PENDING) -+ bce_vhci_urb_init(vurb); -+ } -+} -+ -+ -+ -+static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout); -+ -+int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb) -+{ -+ unsigned long flags; -+ int status = 0; -+ struct bce_vhci_urb *vurb; -+ vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL); -+ urb->hcpriv = vurb; -+ -+ vurb->q = q; -+ vurb->urb = urb; -+ vurb->dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; -+ vurb->is_control = (usb_endpoint_num(&urb->ep->desc) == 0); -+ -+ spin_lock_irqsave(&q->urb_lock, flags); -+ status = usb_hcd_link_urb_to_ep(q->vhci->hcd, urb); -+ if (status) { -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ urb->hcpriv = NULL; -+ kfree(vurb); -+ return status; -+ } -+ -+ if (q->active) { -+ if (bce_vhci_transfer_queue_can_init_urb(vurb->q)) -+ status = bce_vhci_urb_init(vurb); -+ else -+ vurb->state = BCE_VHCI_URB_INIT_PENDING; -+ } else { -+ if (q->stalled) -+ bce_vhci_transfer_queue_request_reset(q); -+ vurb->state = BCE_VHCI_URB_INIT_PENDING; -+ } -+ if (status) { -+ usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb); -+ urb->hcpriv = NULL; -+ kfree(vurb); -+ } else { -+ bce_vhci_transfer_queue_deliver_pending(q); -+ } -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ pr_debug("bce-vhci: [%02x] URB enqueued (dir = %s, size = %i)\n", q->endp_addr, -+ usb_urb_dir_in(urb) ? "IN" : "OUT", urb->transfer_buffer_length); -+ return status; -+} -+ -+static int bce_vhci_urb_init(struct bce_vhci_urb *vurb) -+{ -+ int status = 0; -+ -+ if (vurb->q->remaining_active_requests == 0) { -+ pr_err("bce-vhci: cannot init request (remaining_active_requests = 0)\n"); -+ return -EINVAL; -+ } -+ -+ if (vurb->is_control) { -+ vurb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST; -+ } else { -+ status = bce_vhci_urb_data_start(vurb, NULL); -+ } -+ -+ if (!status) { -+ --vurb->q->remaining_active_requests; -+ } -+ return status; -+} -+ -+static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status) -+{ -+ struct bce_vhci_transfer_queue *q = urb->q; -+ struct bce_vhci *vhci = q->vhci; -+ struct urb *real_urb = urb->urb; -+ pr_debug("bce-vhci: [%02x] URB complete %i\n", q->endp_addr, status); -+ usb_hcd_unlink_urb_from_ep(vhci->hcd, real_urb); -+ real_urb->hcpriv = NULL; -+ real_urb->status = status; -+ if (urb->state != BCE_VHCI_URB_INIT_PENDING) -+ ++urb->q->remaining_active_requests; -+ kfree(urb); -+ list_add_tail(&real_urb->urb_list, &q->giveback_urb_list); -+} -+ -+int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status) -+{ -+ struct bce_vhci_urb *vurb; -+ unsigned long flags; -+ int ret; -+ -+ spin_lock_irqsave(&q->urb_lock, flags); -+ if ((ret = usb_hcd_check_unlink_urb(q->vhci->hcd, urb, status))) { -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ return ret; -+ } -+ -+ vurb = urb->hcpriv; -+ /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */ -+ if (vurb->state != BCE_VHCI_URB_INIT_PENDING) { -+ pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr); -+ -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ); -+ spin_lock_irqsave(&q->urb_lock, flags); -+ -+ ++q->remaining_active_requests; -+ } -+ -+ usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb); -+ -+ spin_unlock_irqrestore(&q->urb_lock, flags); -+ -+ usb_hcd_giveback_urb(q->vhci->hcd, urb, status); -+ -+ if (vurb->state != BCE_VHCI_URB_INIT_PENDING) -+ bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); -+ -+ kfree(vurb); -+ -+ return 0; -+} -+ -+static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout) -+{ -+ struct bce_vhci_message msg; -+ struct bce_qe_submission *s; -+ u32 tr_len; -+ int reservation1, reservation2 = -EFAULT; -+ -+ pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr, -+ (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length); -+ -+ /* Reserve both a message and a submission, so we don't run into issues later. */ -+ reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout); -+ if (!reservation1) -+ reservation2 = bce_reserve_submission(urb->q->sq_in, timeout); -+ if (reservation1 || reservation2) { -+ pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n"); -+ if (!reservation1) -+ bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq); -+ return -ENOMEM; -+ } -+ -+ urb->send_offset = urb->receive_offset; -+ -+ tr_len = urb->urb->transfer_buffer_length - urb->send_offset; -+ -+ spin_lock(&urb->q->vhci->msg_asynchronous_lock); -+ msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST; -+ msg.status = 0; -+ msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr; -+ msg.param2 = tr_len; -+ bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg); -+ spin_unlock(&urb->q->vhci->msg_asynchronous_lock); -+ -+ s = bce_next_submission(urb->q->sq_in); -+ bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len); -+ bce_submit_to_device(urb->q->sq_in); -+ -+ urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION; -+ return 0; -+} -+ -+static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout) -+{ -+ if (urb->dir == DMA_TO_DEVICE) { -+ if (urb->urb->transfer_buffer_length > 0) -+ urb->state = BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST; -+ else -+ urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE; -+ return 0; -+ } else { -+ return bce_vhci_urb_data_transfer_in(urb, timeout); -+ } -+} -+ -+static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, size_t size) -+{ -+ struct bce_qe_submission *s; -+ unsigned long timeout = 0; -+ if (bce_reserve_submission(urb->q->sq_out, &timeout)) { -+ pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n"); -+ return -EPIPE; -+ } -+ -+ pr_debug("bce-vhci: [%02x] DMA to device %llx %lx\n", urb->q->endp_addr, (u64) addr, size); -+ -+ s = bce_next_submission(urb->q->sq_out); -+ bce_set_submission_single(s, addr, size); -+ bce_submit_to_device(urb->q->sq_out); -+ return 0; -+} -+ -+static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg) -+{ -+ u32 tr_len; -+ int status; -+ if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST) { -+ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) { -+ tr_len = min(urb->urb->transfer_buffer_length - urb->send_offset, (u32) msg->param2); -+ if ((status = bce_vhci_urb_send_out_data(urb, urb->urb->transfer_dma + urb->send_offset, tr_len))) -+ return status; -+ urb->send_offset += tr_len; -+ urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION; -+ return 0; -+ } -+ } -+ -+ /* 0x1000 in out queues aren't really unexpected */ -+ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL) -+ return -EAGAIN; -+ pr_err("bce-vhci: [%02x] %s URB unexpected message (state = %x, msg: %x %x %x %llx)\n", -+ urb->q->endp_addr, (urb->is_control ? "Control (data update)" : "Data"), urb->state, -+ msg->cmd, msg->status, msg->param1, msg->param2); -+ return -EAGAIN; -+} -+ -+static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) -+{ -+ if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { -+ urb->receive_offset += c->data_size; -+ if (urb->dir == DMA_FROM_DEVICE || urb->receive_offset >= urb->urb->transfer_buffer_length) { -+ urb->urb->actual_length = (u32) urb->receive_offset; -+ urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE; -+ if (!urb->is_control) { -+ bce_vhci_urb_complete(urb, 0); -+ return -ENOENT; -+ } -+ } -+ } else { -+ pr_err("bce-vhci: [%02x] Data URB unexpected completion\n", urb->q->endp_addr); -+ } -+ return 0; -+} -+ -+ -+static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb) -+{ -+ struct bce_vhci_transfer_queue *q = urb->q; -+ if (urb->received_status == 0) -+ return 0; -+ if (urb->state == BCE_VHCI_URB_DATA_TRANSFER_COMPLETE || -+ (urb->received_status != BCE_VHCI_SUCCESS && urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST && -+ urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) { -+ urb->state = BCE_VHCI_URB_CONTROL_COMPLETE; -+ if (urb->received_status != BCE_VHCI_SUCCESS) { -+ pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status); -+ urb->q->active = false; -+ urb->q->stalled = true; -+ bce_vhci_urb_complete(urb, -EPIPE); -+ if (!list_empty(&q->endp->urb_list)) -+ bce_vhci_transfer_queue_request_reset(q); -+ return -ENOENT; -+ } -+ bce_vhci_urb_complete(urb, 0); -+ return -ENOENT; -+ } -+ return 0; -+} -+ -+static int bce_vhci_urb_control_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg) -+{ -+ int status; -+ if (msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) { -+ urb->received_status = msg->status; -+ return bce_vhci_urb_control_check_status(urb); -+ } -+ -+ if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST) { -+ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) { -+ if (bce_vhci_urb_send_out_data(urb, urb->urb->setup_dma, sizeof(struct usb_ctrlrequest))) { -+ pr_err("bce-vhci: [%02x] Failed to start URB setup transfer\n", urb->q->endp_addr); -+ return 0; /* TODO: fail the URB? */ -+ } -+ urb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION; -+ pr_debug("bce-vhci: [%02x] Sent setup %llx\n", urb->q->endp_addr, urb->urb->setup_dma); -+ return 0; -+ } -+ } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST || -+ urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { -+ if ((status = bce_vhci_urb_data_update(urb, msg))) -+ return status; -+ return bce_vhci_urb_control_check_status(urb); -+ } -+ -+ /* 0x1000 in out queues aren't really unexpected */ -+ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL) -+ return -EAGAIN; -+ pr_err("bce-vhci: [%02x] Control URB unexpected message (state = %x, msg: %x %x %x %llx)\n", urb->q->endp_addr, -+ urb->state, msg->cmd, msg->status, msg->param1, msg->param2); -+ return -EAGAIN; -+} -+ -+static int bce_vhci_urb_control_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) -+{ -+ int status; -+ unsigned long timeout; -+ -+ if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION) { -+ if (c->data_size != sizeof(struct usb_ctrlrequest)) -+ pr_err("bce-vhci: [%02x] transfer complete data size mistmatch for usb_ctrlrequest (%llx instead of %lx)\n", -+ urb->q->endp_addr, c->data_size, sizeof(struct usb_ctrlrequest)); -+ -+ timeout = 1000; -+ status = bce_vhci_urb_data_start(urb, &timeout); -+ if (status) { -+ bce_vhci_urb_complete(urb, status); -+ return -ENOENT; -+ } -+ return 0; -+ } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST || -+ urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { -+ if ((status = bce_vhci_urb_data_transfer_completion(urb, c))) -+ return status; -+ return bce_vhci_urb_control_check_status(urb); -+ } else { -+ pr_err("bce-vhci: [%02x] Control URB unexpected completion (state = %x)\n", urb->q->endp_addr, urb->state); -+ } -+ return 0; -+} -+ -+static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg) -+{ -+ if (urb->state == BCE_VHCI_URB_INIT_PENDING) -+ return -EAGAIN; -+ if (urb->is_control) -+ return bce_vhci_urb_control_update(urb, msg); -+ else -+ return bce_vhci_urb_data_update(urb, msg); -+} -+ -+static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) -+{ -+ if (urb->is_control) -+ return bce_vhci_urb_control_transfer_completion(urb, c); -+ else -+ return bce_vhci_urb_data_transfer_completion(urb, c); -+} -+ -+static void bce_vhci_urb_resume(struct bce_vhci_urb *urb) -+{ -+ int status = 0; -+ if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { -+ status = bce_vhci_urb_data_transfer_in(urb, NULL); -+ } -+ if (status) -+ bce_vhci_urb_complete(urb, status); -+} -diff --git a/drivers/staging/apple-bce/vhci/transfer.h b/drivers/staging/apple-bce/vhci/transfer.h -new file mode 100644 -index 000000000000..89ecad6bcf8f ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/transfer.h -@@ -0,0 +1,73 @@ -+#ifndef BCEDRIVER_TRANSFER_H -+#define BCEDRIVER_TRANSFER_H -+ -+#include -+#include "queue.h" -+#include "command.h" -+#include "../queue.h" -+ -+struct bce_vhci_list_message { -+ struct list_head list; -+ struct bce_vhci_message msg; -+}; -+enum bce_vhci_pause_source { -+ BCE_VHCI_PAUSE_INTERNAL_WQ = 1, -+ BCE_VHCI_PAUSE_FIRMWARE = 2, -+ BCE_VHCI_PAUSE_SUSPEND = 4, -+ BCE_VHCI_PAUSE_SHUTDOWN = 8 -+}; -+struct bce_vhci_transfer_queue { -+ struct bce_vhci *vhci; -+ struct usb_host_endpoint *endp; -+ enum bce_vhci_endpoint_state state; -+ u32 max_active_requests, remaining_active_requests; -+ bool active, stalled; -+ u32 paused_by; -+ bce_vhci_device_t dev_addr; -+ u8 endp_addr; -+ struct bce_queue_cq *cq; -+ struct bce_queue_sq *sq_in; -+ struct bce_queue_sq *sq_out; -+ struct list_head evq; -+ struct spinlock urb_lock; -+ struct mutex pause_lock; -+ struct list_head giveback_urb_list; -+ -+ struct work_struct w_reset; -+}; -+enum bce_vhci_urb_state { -+ BCE_VHCI_URB_INIT_PENDING, -+ -+ BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST, -+ BCE_VHCI_URB_WAITING_FOR_COMPLETION, -+ BCE_VHCI_URB_DATA_TRANSFER_COMPLETE, -+ -+ BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST, -+ BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION, -+ BCE_VHCI_URB_CONTROL_COMPLETE -+}; -+struct bce_vhci_urb { -+ struct urb *urb; -+ struct bce_vhci_transfer_queue *q; -+ enum dma_data_direction dir; -+ bool is_control; -+ enum bce_vhci_urb_state state; -+ int received_status; -+ u32 send_offset; -+ u32 receive_offset; -+}; -+ -+void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q, -+ struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir); -+void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q); -+void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg); -+int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q); -+int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q); -+int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src); -+int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src); -+void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q); -+ -+int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb); -+int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status); -+ -+#endif //BCEDRIVER_TRANSFER_H -diff --git a/drivers/staging/apple-bce/vhci/vhci.c b/drivers/staging/apple-bce/vhci/vhci.c -new file mode 100644 -index 000000000000..eb26f55000d8 ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/vhci.c -@@ -0,0 +1,759 @@ -+#include "vhci.h" -+#include "../apple_bce.h" -+#include "command.h" -+#include -+#include -+#include -+#include -+ -+static dev_t bce_vhci_chrdev; -+static struct class *bce_vhci_class; -+static const struct hc_driver bce_vhci_driver; -+static u16 bce_vhci_port_mask = U16_MAX; -+ -+static int bce_vhci_create_event_queues(struct bce_vhci *vhci); -+static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci); -+static int bce_vhci_create_message_queues(struct bce_vhci *vhci); -+static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci); -+static void bce_vhci_handle_firmware_events_w(struct work_struct *ws); -+static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq); -+ -+int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci) -+{ -+ int status; -+ -+ spin_lock_init(&vhci->hcd_spinlock); -+ -+ vhci->dev = dev; -+ -+ vhci->vdevt = bce_vhci_chrdev; -+ vhci->vdev = device_create(bce_vhci_class, dev->dev, vhci->vdevt, NULL, "bce-vhci"); -+ if (IS_ERR_OR_NULL(vhci->vdev)) { -+ status = PTR_ERR(vhci->vdev); -+ goto fail_dev; -+ } -+ -+ if ((status = bce_vhci_create_message_queues(vhci))) -+ goto fail_mq; -+ if ((status = bce_vhci_create_event_queues(vhci))) -+ goto fail_eq; -+ -+ vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0); -+ INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w); -+ -+ vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci"); -+ if (!vhci->hcd) { -+ status = -ENOMEM; -+ goto fail_hcd; -+ } -+ vhci->hcd->self.sysdev = &dev->pci->dev; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0) -+ vhci->hcd->self.uses_dma = 1; -+#endif -+ *((struct bce_vhci **) vhci->hcd->hcd_priv) = vhci; -+ vhci->hcd->speed = HCD_USB2; -+ -+ if ((status = usb_add_hcd(vhci->hcd, 0, 0))) -+ goto fail_hcd; -+ -+ return 0; -+ -+fail_hcd: -+ bce_vhci_destroy_event_queues(vhci); -+fail_eq: -+ bce_vhci_destroy_message_queues(vhci); -+fail_mq: -+ device_destroy(bce_vhci_class, vhci->vdevt); -+fail_dev: -+ if (!status) -+ status = -EINVAL; -+ return status; -+} -+ -+void bce_vhci_destroy(struct bce_vhci *vhci) -+{ -+ usb_remove_hcd(vhci->hcd); -+ bce_vhci_destroy_event_queues(vhci); -+ bce_vhci_destroy_message_queues(vhci); -+ device_destroy(bce_vhci_class, vhci->vdevt); -+} -+ -+struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd) -+{ -+ return *((struct bce_vhci **) hcd->hcd_priv); -+} -+ -+int bce_vhci_start(struct usb_hcd *hcd) -+{ -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ int status; -+ u16 port_mask = 0; -+ bce_vhci_port_t port_no = 0; -+ if ((status = bce_vhci_cmd_controller_enable(&vhci->cq, 1, &port_mask))) -+ return status; -+ vhci->port_mask = port_mask; -+ vhci->port_power_mask = 0; -+ if ((status = bce_vhci_cmd_controller_start(&vhci->cq))) -+ return status; -+ port_mask = vhci->port_mask; -+ while (port_mask) { -+ port_no += 1; -+ port_mask >>= 1; -+ } -+ vhci->port_count = port_no; -+ return 0; -+} -+ -+void bce_vhci_stop(struct usb_hcd *hcd) -+{ -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ bce_vhci_cmd_controller_disable(&vhci->cq); -+} -+ -+static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf) -+{ -+ return 0; -+} -+ -+static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout); -+ -+static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength) -+{ -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ int status; -+ struct usb_hub_descriptor *hd; -+ struct usb_hub_status *hs; -+ struct usb_port_status *ps; -+ u32 port_status; -+ // pr_info("bce-vhci: bce_vhci_hub_control %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength); -+ if (typeReq == GetHubDescriptor && wLength >= sizeof(struct usb_hub_descriptor)) { -+ hd = (struct usb_hub_descriptor *) buf; -+ memset(hd, 0, sizeof(*hd)); -+ hd->bDescLength = sizeof(struct usb_hub_descriptor); -+ hd->bDescriptorType = USB_DT_HUB; -+ hd->bNbrPorts = (u8) vhci->port_count; -+ hd->wHubCharacteristics = HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_INDV_PORT_OCPM; -+ hd->bPwrOn2PwrGood = 0; -+ hd->bHubContrCurrent = 0; -+ return 0; -+ } else if (typeReq == GetHubStatus && wLength >= sizeof(struct usb_hub_status)) { -+ hs = (struct usb_hub_status *) buf; -+ memset(hs, 0, sizeof(*hs)); -+ hs->wHubStatus = 0; -+ hs->wHubChange = 0; -+ return 0; -+ } else if (typeReq == GetPortStatus && wLength >= 4 /* usb 2.0 */) { -+ ps = (struct usb_port_status *) buf; -+ ps->wPortStatus = 0; -+ ps->wPortChange = 0; -+ -+ if (vhci->port_power_mask & BIT(wIndex)) -+ ps->wPortStatus |= USB_PORT_STAT_POWER; -+ -+ if (!(bce_vhci_port_mask & BIT(wIndex))) -+ return 0; -+ -+ if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status))) -+ return status; -+ -+ if (port_status & 16) -+ ps->wPortStatus |= USB_PORT_STAT_ENABLE | USB_PORT_STAT_HIGH_SPEED; -+ if (port_status & 4) -+ ps->wPortStatus |= USB_PORT_STAT_CONNECTION; -+ if (port_status & 2) -+ ps->wPortStatus |= USB_PORT_STAT_OVERCURRENT; -+ if (port_status & 8) -+ ps->wPortStatus |= USB_PORT_STAT_RESET; -+ if (port_status & 0x60) -+ ps->wPortStatus |= USB_PORT_STAT_SUSPEND; -+ -+ if (port_status & 0x40000) -+ ps->wPortChange |= USB_PORT_STAT_C_CONNECTION; -+ -+ pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange); -+ return 0; -+ } else if (typeReq == SetPortFeature) { -+ if (wValue == USB_PORT_FEAT_POWER) { -+ status = bce_vhci_cmd_port_power_on(&vhci->cq, (u8) wIndex); -+ /* As far as I am aware, power status is not part of the port status so store it separately */ -+ if (!status) -+ vhci->port_power_mask |= BIT(wIndex); -+ return status; -+ } -+ if (wValue == USB_PORT_FEAT_RESET) { -+ return bce_vhci_reset_device(vhci, wIndex, wValue); -+ } -+ if (wValue == USB_PORT_FEAT_SUSPEND) { -+ /* TODO: Am I supposed to also suspend the endpoints? */ -+ pr_debug("bce-vhci: Suspending port %i\n", wIndex); -+ return bce_vhci_cmd_port_suspend(&vhci->cq, (u8) wIndex); -+ } -+ } else if (typeReq == ClearPortFeature) { -+ if (wValue == USB_PORT_FEAT_ENABLE) -+ return bce_vhci_cmd_port_disable(&vhci->cq, (u8) wIndex); -+ if (wValue == USB_PORT_FEAT_POWER) { -+ status = bce_vhci_cmd_port_power_off(&vhci->cq, (u8) wIndex); -+ if (!status) -+ vhci->port_power_mask &= ~BIT(wIndex); -+ return status; -+ } -+ if (wValue == USB_PORT_FEAT_C_CONNECTION) -+ return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status); -+ if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */ -+ return 0; -+ } -+ if (wValue == USB_PORT_FEAT_SUSPEND) { -+ pr_debug("bce-vhci: Resuming port %i\n", wIndex); -+ return bce_vhci_cmd_port_resume(&vhci->cq, (u8) wIndex); -+ } -+ } -+ pr_err("bce-vhci: bce_vhci_hub_control unhandled request: %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength); -+ dump_stack(); -+ return -EIO; -+} -+ -+static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) -+{ -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ struct bce_vhci_device *vdev; -+ bce_vhci_device_t devid; -+ pr_info("bce_vhci_enable_device\n"); -+ -+ if (vhci->port_to_device[udev->portnum]) -+ return 0; -+ -+ /* We need to early address the device */ -+ if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid)) -+ return -EIO; -+ -+ pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid); -+ -+ vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL); -+ vhci->port_to_device[udev->portnum] = devid; -+ vhci->devices[devid] = vdev; -+ -+ bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], &udev->ep0, devid, DMA_BIDIRECTIONAL); -+ udev->ep0.hcpriv = &vdev->tq[0]; -+ vdev->tq_mask |= BIT(0); -+ -+ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &udev->ep0.desc); -+ return 0; -+} -+ -+static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, unsigned int timeout_ms) //TODO: follow timeout -+{ -+ /* This is the same as enable_device, but instead in the old scheme */ -+ return bce_vhci_enable_device(hcd, udev); -+} -+ -+static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev) -+{ -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ int i; -+ bce_vhci_device_t devid; -+ struct bce_vhci_device *dev; -+ pr_info("bce_vhci_free_device %i\n", udev->portnum); -+ if (!vhci->port_to_device[udev->portnum]) -+ return; -+ devid = vhci->port_to_device[udev->portnum]; -+ dev = vhci->devices[devid]; -+ for (i = 0; i < 32; i++) { -+ if (dev->tq_mask & BIT(i)) { -+ bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN); -+ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i); -+ bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); -+ } -+ } -+ vhci->devices[devid] = NULL; -+ vhci->port_to_device[udev->portnum] = 0; -+ bce_vhci_cmd_device_destroy(&vhci->cq, devid); -+ kfree(dev); -+} -+ -+static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout) -+{ -+ struct bce_vhci_device *dev = NULL; -+ bce_vhci_device_t devid; -+ int i; -+ int status; -+ enum dma_data_direction dir; -+ pr_info("bce_vhci_reset_device %i\n", index); -+ -+ devid = vhci->port_to_device[index]; -+ if (devid) { -+ dev = vhci->devices[devid]; -+ -+ for (i = 0; i < 32; i++) { -+ if (dev->tq_mask & BIT(i)) { -+ bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN); -+ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i); -+ bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); -+ } -+ } -+ vhci->devices[devid] = NULL; -+ vhci->port_to_device[index] = 0; -+ bce_vhci_cmd_device_destroy(&vhci->cq, devid); -+ } -+ status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout); -+ -+ if (dev) { -+ if ((status = bce_vhci_cmd_device_create(&vhci->cq, index, &devid))) -+ return status; -+ vhci->devices[devid] = dev; -+ vhci->port_to_device[index] = devid; -+ -+ for (i = 0; i < 32; i++) { -+ if (dev->tq_mask & BIT(i)) { -+ dir = usb_endpoint_dir_in(&dev->tq[i].endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; -+ if (i == 0) -+ dir = DMA_BIDIRECTIONAL; -+ bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir); -+ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc); -+ } -+ } -+ } -+ -+ return status; -+} -+ -+static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) -+{ -+ return 0; -+} -+ -+static int bce_vhci_get_frame_number(struct usb_hcd *hcd) -+{ -+ return 0; -+} -+ -+static int bce_vhci_bus_suspend(struct usb_hcd *hcd) -+{ -+ int i, j; -+ int status; -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ pr_info("bce_vhci: suspend started\n"); -+ -+ pr_info("bce_vhci: suspend endpoints\n"); -+ for (i = 0; i < 16; i++) { -+ if (!vhci->port_to_device[i]) -+ continue; -+ for (j = 0; j < 32; j++) { -+ if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j))) -+ continue; -+ bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j], -+ BCE_VHCI_PAUSE_SUSPEND); -+ } -+ } -+ -+ pr_info("bce_vhci: suspend ports\n"); -+ for (i = 0; i < 16; i++) { -+ if (!vhci->port_to_device[i]) -+ continue; -+ bce_vhci_cmd_port_suspend(&vhci->cq, i); -+ } -+ pr_info("bce_vhci: suspend controller\n"); -+ if ((status = bce_vhci_cmd_controller_pause(&vhci->cq))) -+ return status; -+ -+ bce_vhci_event_queue_pause(&vhci->ev_commands); -+ bce_vhci_event_queue_pause(&vhci->ev_system); -+ bce_vhci_event_queue_pause(&vhci->ev_isochronous); -+ bce_vhci_event_queue_pause(&vhci->ev_interrupt); -+ bce_vhci_event_queue_pause(&vhci->ev_asynchronous); -+ pr_info("bce_vhci: suspend done\n"); -+ return 0; -+} -+ -+static int bce_vhci_bus_resume(struct usb_hcd *hcd) -+{ -+ int i, j; -+ int status; -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ pr_info("bce_vhci: resume started\n"); -+ -+ bce_vhci_event_queue_resume(&vhci->ev_system); -+ bce_vhci_event_queue_resume(&vhci->ev_isochronous); -+ bce_vhci_event_queue_resume(&vhci->ev_interrupt); -+ bce_vhci_event_queue_resume(&vhci->ev_asynchronous); -+ bce_vhci_event_queue_resume(&vhci->ev_commands); -+ -+ pr_info("bce_vhci: resume controller\n"); -+ if ((status = bce_vhci_cmd_controller_start(&vhci->cq))) -+ return status; -+ -+ pr_info("bce_vhci: resume ports\n"); -+ for (i = 0; i < 16; i++) { -+ if (!vhci->port_to_device[i]) -+ continue; -+ bce_vhci_cmd_port_resume(&vhci->cq, i); -+ } -+ pr_info("bce_vhci: resume endpoints\n"); -+ for (i = 0; i < 16; i++) { -+ if (!vhci->port_to_device[i]) -+ continue; -+ for (j = 0; j < 32; j++) { -+ if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j))) -+ continue; -+ bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j], -+ BCE_VHCI_PAUSE_SUSPEND); -+ } -+ } -+ -+ pr_info("bce_vhci: resume done\n"); -+ return 0; -+} -+ -+static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) -+{ -+ struct bce_vhci_transfer_queue *q = urb->ep->hcpriv; -+ pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress); -+ if (!q) -+ return -ENOENT; -+ return bce_vhci_urb_create(q, urb); -+} -+ -+static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) -+{ -+ struct bce_vhci_transfer_queue *q = urb->ep->hcpriv; -+ pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress); -+ return bce_vhci_urb_request_cancel(q, urb, status); -+} -+ -+static void bce_vhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep) -+{ -+ struct bce_vhci_transfer_queue *q = ep->hcpriv; -+ pr_debug("bce_vhci_endpoint_reset\n"); -+ if (q) -+ bce_vhci_transfer_queue_request_reset(q); -+} -+ -+static u8 bce_vhci_endpoint_index(u8 addr) -+{ -+ if (addr & 0x80) -+ return (u8) (0x10 + (addr & 0xf)); -+ return (u8) (addr & 0xf); -+} -+ -+static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp) -+{ -+ u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress); -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ bce_vhci_device_t devid = vhci->port_to_device[udev->portnum]; -+ struct bce_vhci_device *vdev = vhci->devices[devid]; -+ pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index); -+ -+ if (udev->bus->root_hub == udev) /* The USB hub */ -+ return 0; -+ if (vdev == NULL) -+ return -ENODEV; -+ if (vdev->tq_mask & BIT(endp_index)) { -+ endp->hcpriv = &vdev->tq[endp_index]; -+ return 0; -+ } -+ -+ bce_vhci_create_transfer_queue(vhci, &vdev->tq[endp_index], endp, devid, -+ usb_endpoint_dir_in(&endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); -+ endp->hcpriv = &vdev->tq[endp_index]; -+ vdev->tq_mask |= BIT(endp_index); -+ -+ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &endp->desc); -+ return 0; -+} -+ -+static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp) -+{ -+ u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress); -+ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); -+ bce_vhci_device_t devid = vhci->port_to_device[udev->portnum]; -+ struct bce_vhci_transfer_queue *q = endp->hcpriv; -+ struct bce_vhci_device *vdev = vhci->devices[devid]; -+ pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index); -+ if (!q) { -+ if (vdev && vdev->tq_mask & BIT(endp_index)) { -+ pr_err("something deleted the hcpriv?\n"); -+ q = &vdev->tq[endp_index]; -+ } else { -+ return 0; -+ } -+ } -+ -+ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu)); -+ vhci->devices[devid]->tq_mask &= ~BIT(endp_index); -+ bce_vhci_destroy_transfer_queue(vhci, q); -+ return 0; -+} -+ -+static int bce_vhci_create_message_queues(struct bce_vhci *vhci) -+{ -+ if (bce_vhci_message_queue_create(vhci, &vhci->msg_commands, "VHC1HostCommands") || -+ bce_vhci_message_queue_create(vhci, &vhci->msg_system, "VHC1HostSystemEvents") || -+ bce_vhci_message_queue_create(vhci, &vhci->msg_isochronous, "VHC1HostIsochronousEvents") || -+ bce_vhci_message_queue_create(vhci, &vhci->msg_interrupt, "VHC1HostInterruptEvents") || -+ bce_vhci_message_queue_create(vhci, &vhci->msg_asynchronous, "VHC1HostAsynchronousEvents")) { -+ bce_vhci_destroy_message_queues(vhci); -+ return -EINVAL; -+ } -+ spin_lock_init(&vhci->msg_asynchronous_lock); -+ bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands); -+ return 0; -+} -+ -+static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci) -+{ -+ bce_vhci_command_queue_destroy(&vhci->cq); -+ bce_vhci_message_queue_destroy(vhci, &vhci->msg_commands); -+ bce_vhci_message_queue_destroy(vhci, &vhci->msg_system); -+ bce_vhci_message_queue_destroy(vhci, &vhci->msg_isochronous); -+ bce_vhci_message_queue_destroy(vhci, &vhci->msg_interrupt); -+ bce_vhci_message_queue_destroy(vhci, &vhci->msg_asynchronous); -+} -+ -+static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg); -+static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg); -+ -+static int bce_vhci_create_event_queues(struct bce_vhci *vhci) -+{ -+ vhci->ev_cq = bce_create_cq(vhci->dev, 0x100); -+ if (!vhci->ev_cq) -+ return -EINVAL; -+#define CREATE_EVENT_QUEUE(field, name, cb) bce_vhci_event_queue_create(vhci, &vhci->field, name, cb) -+ if (__bce_vhci_event_queue_create(vhci, &vhci->ev_commands, "VHC1FirmwareCommands", -+ bce_vhci_firmware_event_completion) || -+ CREATE_EVENT_QUEUE(ev_system, "VHC1FirmwareSystemEvents", bce_vhci_handle_system_event) || -+ CREATE_EVENT_QUEUE(ev_isochronous, "VHC1FirmwareIsochronousEvents", bce_vhci_handle_usb_event) || -+ CREATE_EVENT_QUEUE(ev_interrupt, "VHC1FirmwareInterruptEvents", bce_vhci_handle_usb_event) || -+ CREATE_EVENT_QUEUE(ev_asynchronous, "VHC1FirmwareAsynchronousEvents", bce_vhci_handle_usb_event)) { -+ bce_vhci_destroy_event_queues(vhci); -+ return -EINVAL; -+ } -+#undef CREATE_EVENT_QUEUE -+ return 0; -+} -+ -+static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci) -+{ -+ bce_vhci_event_queue_destroy(vhci, &vhci->ev_commands); -+ bce_vhci_event_queue_destroy(vhci, &vhci->ev_system); -+ bce_vhci_event_queue_destroy(vhci, &vhci->ev_isochronous); -+ bce_vhci_event_queue_destroy(vhci, &vhci->ev_interrupt); -+ bce_vhci_event_queue_destroy(vhci, &vhci->ev_asynchronous); -+ if (vhci->ev_cq) -+ bce_destroy_cq(vhci->dev, vhci->ev_cq); -+} -+ -+static void bce_vhci_send_fw_event_response(struct bce_vhci *vhci, struct bce_vhci_message *req, u16 status) -+{ -+ unsigned long timeout = 1000; -+ struct bce_vhci_message r = *req; -+ r.cmd = (u16) (req->cmd | 0x8000u); -+ r.status = status; -+ r.param1 = req->param1; -+ r.param2 = 0; -+ -+ if (bce_reserve_submission(vhci->msg_system.sq, &timeout)) { -+ pr_err("bce-vhci: Cannot reserve submision for FW event reply\n"); -+ return; -+ } -+ bce_vhci_message_queue_write(&vhci->msg_system, &r); -+} -+ -+static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci_message *msg) -+{ -+ unsigned long flags; -+ bce_vhci_device_t devid; -+ u8 endp; -+ struct bce_vhci_device *dev; -+ struct bce_vhci_transfer_queue *tq; -+ if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) { -+ devid = (bce_vhci_device_t) (msg->param1 & 0xff); -+ endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff)); -+ dev = vhci->devices[devid]; -+ if (!dev || !(dev->tq_mask & BIT(endp))) -+ return BCE_VHCI_BAD_ARGUMENT; -+ tq = &dev->tq[endp]; -+ } -+ -+ if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE) { -+ if (msg->param2 == BCE_VHCI_ENDPOINT_ACTIVE) { -+ bce_vhci_transfer_queue_resume(tq, BCE_VHCI_PAUSE_FIRMWARE); -+ return BCE_VHCI_SUCCESS; -+ } else if (msg->param2 == BCE_VHCI_ENDPOINT_PAUSED) { -+ bce_vhci_transfer_queue_pause(tq, BCE_VHCI_PAUSE_FIRMWARE); -+ return BCE_VHCI_SUCCESS; -+ } -+ return BCE_VHCI_BAD_ARGUMENT; -+ } else if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) { -+ if (msg->param2 == BCE_VHCI_ENDPOINT_STALLED) { -+ tq->state = msg->param2; -+ spin_lock_irqsave(&tq->urb_lock, flags); -+ tq->stalled = true; -+ spin_unlock_irqrestore(&tq->urb_lock, flags); -+ return BCE_VHCI_SUCCESS; -+ } -+ return BCE_VHCI_BAD_ARGUMENT; -+ } -+ pr_warn("bce-vhci: Unhandled firmware event: %x s=%x p1=%x p2=%llx\n", -+ msg->cmd, msg->status, msg->param1, msg->param2); -+ return BCE_VHCI_BAD_ARGUMENT; -+} -+ -+static void bce_vhci_handle_firmware_events_w(struct work_struct *ws) -+{ -+ size_t cnt = 0; -+ int result; -+ struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_fw_events); -+ struct bce_queue_sq *sq = vhci->ev_commands.sq; -+ struct bce_sq_completion_data *cq; -+ struct bce_vhci_message *msg, *msg2 = NULL; -+ -+ while (true) { -+ if (msg2) { -+ msg = msg2; -+ msg2 = NULL; -+ } else if ((cq = bce_next_completion(sq))) { -+ if (cq->status == BCE_COMPLETION_ABORTED) { -+ bce_notify_submission_complete(sq); -+ continue; -+ } -+ msg = &vhci->ev_commands.data[sq->head]; -+ } else { -+ break; -+ } -+ -+ pr_debug("bce-vhci: Got fw event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2); -+ if ((cq = bce_next_completion(sq))) { -+ msg2 = &vhci->ev_commands.data[(sq->head + 1) % sq->el_count]; -+ pr_debug("bce-vhci: Got second fw event: %x s=%x p1=%x p2=%llx\n", -+ msg->cmd, msg->status, msg->param1, msg->param2); -+ if (cq->status != BCE_COMPLETION_ABORTED && -+ msg2->cmd == (msg->cmd | 0x4000) && msg2->param1 == msg->param1) { -+ /* Take two elements */ -+ pr_debug("bce-vhci: Cancelled\n"); -+ bce_vhci_send_fw_event_response(vhci, msg, BCE_VHCI_ABORT); -+ -+ bce_notify_submission_complete(sq); -+ bce_notify_submission_complete(sq); -+ msg2 = NULL; -+ cnt += 2; -+ continue; -+ } -+ -+ pr_warn("bce-vhci: Handle fw event - unexpected cancellation\n"); -+ } -+ -+ result = bce_vhci_handle_firmware_event(vhci, msg); -+ bce_vhci_send_fw_event_response(vhci, msg, (u16) result); -+ -+ -+ bce_notify_submission_complete(sq); -+ ++cnt; -+ } -+ bce_vhci_event_queue_submit_pending(&vhci->ev_commands, cnt); -+ if (atomic_read(&sq->available_commands) == sq->el_count - 1) { -+ pr_debug("bce-vhci: complete\n"); -+ complete(&vhci->ev_commands.queue_empty_completion); -+ } -+} -+ -+static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq) -+{ -+ struct bce_vhci_event_queue *q = sq->userdata; -+ queue_work(q->vhci->tq_state_wq, &q->vhci->w_fw_events); -+} -+ -+static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg) -+{ -+ if (msg->cmd & 0x8000) { -+ bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg); -+ } else { -+ pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n", -+ msg->cmd, msg->status, msg->param1, msg->param2); -+ } -+} -+ -+static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg) -+{ -+ bce_vhci_device_t devid; -+ u8 endp; -+ struct bce_vhci_device *dev; -+ if (msg->cmd & 0x8000) { -+ bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg); -+ } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) { -+ devid = (bce_vhci_device_t) (msg->param1 & 0xff); -+ endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff)); -+ dev = q->vhci->devices[devid]; -+ if (!dev || (dev->tq_mask & BIT(endp)) == 0) { -+ pr_err("bce-vhci: Didn't find destination for transfer queue event\n"); -+ return; -+ } -+ bce_vhci_transfer_queue_event(&dev->tq[endp], msg); -+ } else { -+ pr_warn("bce-vhci: Unhandled USB event: %x s=%x p1=%x p2=%llx\n", -+ msg->cmd, msg->status, msg->param1, msg->param2); -+ } -+} -+ -+ -+ -+static const struct hc_driver bce_vhci_driver = { -+ .description = "bce-vhci", -+ .product_desc = "BCE VHCI Host Controller", -+ .hcd_priv_size = sizeof(struct bce_vhci *), -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0) -+ .flags = HCD_USB2, -+#else -+ .flags = HCD_USB2 | HCD_DMA, -+#endif -+ -+ .start = bce_vhci_start, -+ .stop = bce_vhci_stop, -+ .hub_status_data = bce_vhci_hub_status_data, -+ .hub_control = bce_vhci_hub_control, -+ .urb_enqueue = bce_vhci_urb_enqueue, -+ .urb_dequeue = bce_vhci_urb_dequeue, -+ .enable_device = bce_vhci_enable_device, -+ .free_dev = bce_vhci_free_device, -+ .address_device = bce_vhci_address_device, -+ .add_endpoint = bce_vhci_add_endpoint, -+ .drop_endpoint = bce_vhci_drop_endpoint, -+ .endpoint_reset = bce_vhci_endpoint_reset, -+ .check_bandwidth = bce_vhci_check_bandwidth, -+ .get_frame_number = bce_vhci_get_frame_number, -+ .bus_suspend = bce_vhci_bus_suspend, -+ .bus_resume = bce_vhci_bus_resume -+}; -+ -+ -+int __init bce_vhci_module_init(void) -+{ -+ int result; -+ if ((result = alloc_chrdev_region(&bce_vhci_chrdev, 0, 1, "bce-vhci"))) -+ goto fail_chrdev; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0) -+ bce_vhci_class = class_create(THIS_MODULE, "bce-vhci"); -+#else -+ bce_vhci_class = class_create("bce-vhci"); -+#endif -+ if (IS_ERR(bce_vhci_class)) { -+ result = PTR_ERR(bce_vhci_class); -+ goto fail_class; -+ } -+ return 0; -+ -+fail_class: -+ class_destroy(bce_vhci_class); -+fail_chrdev: -+ unregister_chrdev_region(bce_vhci_chrdev, 1); -+ if (!result) -+ result = -EINVAL; -+ return result; -+} -+void __exit bce_vhci_module_exit(void) -+{ -+ class_destroy(bce_vhci_class); -+ unregister_chrdev_region(bce_vhci_chrdev, 1); -+} -+ -+module_param_named(vhci_port_mask, bce_vhci_port_mask, ushort, 0444); -+MODULE_PARM_DESC(vhci_port_mask, "Specifies which VHCI ports are enabled"); -diff --git a/drivers/staging/apple-bce/vhci/vhci.h b/drivers/staging/apple-bce/vhci/vhci.h -new file mode 100644 -index 000000000000..6c2e22622f4c ---- /dev/null -+++ b/drivers/staging/apple-bce/vhci/vhci.h -@@ -0,0 +1,52 @@ -+#ifndef BCE_VHCI_H -+#define BCE_VHCI_H -+ -+#include "queue.h" -+#include "transfer.h" -+ -+struct usb_hcd; -+struct bce_queue_cq; -+ -+struct bce_vhci_device { -+ struct bce_vhci_transfer_queue tq[32]; -+ u32 tq_mask; -+}; -+struct bce_vhci { -+ struct apple_bce_device *dev; -+ dev_t vdevt; -+ struct device *vdev; -+ struct usb_hcd *hcd; -+ struct spinlock hcd_spinlock; -+ struct bce_vhci_message_queue msg_commands; -+ struct bce_vhci_message_queue msg_system; -+ struct bce_vhci_message_queue msg_isochronous; -+ struct bce_vhci_message_queue msg_interrupt; -+ struct bce_vhci_message_queue msg_asynchronous; -+ struct spinlock msg_asynchronous_lock; -+ struct bce_vhci_command_queue cq; -+ struct bce_queue_cq *ev_cq; -+ struct bce_vhci_event_queue ev_commands; -+ struct bce_vhci_event_queue ev_system; -+ struct bce_vhci_event_queue ev_isochronous; -+ struct bce_vhci_event_queue ev_interrupt; -+ struct bce_vhci_event_queue ev_asynchronous; -+ u16 port_mask; -+ u8 port_count; -+ u16 port_power_mask; -+ bce_vhci_device_t port_to_device[16]; -+ struct bce_vhci_device *devices[16]; -+ struct workqueue_struct *tq_state_wq; -+ struct work_struct w_fw_events; -+}; -+ -+int __init bce_vhci_module_init(void); -+void __exit bce_vhci_module_exit(void); -+ -+int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci); -+void bce_vhci_destroy(struct bce_vhci *vhci); -+int bce_vhci_start(struct usb_hcd *hcd); -+void bce_vhci_stop(struct usb_hcd *hcd); -+ -+struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd); -+ -+#endif //BCE_VHCI_H -diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c -index e02ba15f6e34..b35734d03109 100644 ---- a/drivers/usb/core/driver.c -+++ b/drivers/usb/core/driver.c -@@ -517,6 +517,19 @@ static int usb_unbind_interface(struct device *dev) - return 0; - } - -+static void usb_shutdown_interface(struct device *dev) -+{ -+ struct usb_interface *intf = to_usb_interface(dev); -+ struct usb_driver *driver; -+ -+ if (!dev->driver) -+ return; -+ -+ driver = to_usb_driver(dev->driver); -+ if (driver->shutdown) -+ driver->shutdown(intf); -+} -+ - /** - * usb_driver_claim_interface - bind a driver to an interface - * @driver: the driver to be bound -@@ -1059,6 +1072,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner, - new_driver->driver.bus = &usb_bus_type; - new_driver->driver.probe = usb_probe_interface; - new_driver->driver.remove = usb_unbind_interface; -+ new_driver->driver.shutdown = usb_shutdown_interface; - new_driver->driver.owner = owner; - new_driver->driver.mod_name = mod_name; - new_driver->driver.dev_groups = new_driver->dev_groups; -diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c -index b610a2de4ae5..0cdbcf82554f 100644 ---- a/drivers/usb/storage/uas.c -+++ b/drivers/usb/storage/uas.c -@@ -1232,9 +1232,8 @@ static void uas_disconnect(struct usb_interface *intf) - * hang on reboot when the device is still in uas mode. Note the reset is - * necessary as some devices won't revert to usb-storage mode without it. - */ --static void uas_shutdown(struct device *dev) -+static void uas_shutdown(struct usb_interface *intf) - { -- struct usb_interface *intf = to_usb_interface(dev); - struct usb_device *udev = interface_to_usbdev(intf); - struct Scsi_Host *shost = usb_get_intfdata(intf); - struct uas_dev_info *devinfo = (struct uas_dev_info *)shost->hostdata; -@@ -1257,7 +1256,7 @@ static struct usb_driver uas_driver = { - .suspend = uas_suspend, - .resume = uas_resume, - .reset_resume = uas_reset_resume, -- .driver.shutdown = uas_shutdown, -+ .shutdown = uas_shutdown, - .id_table = uas_usb_ids, - }; - -diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h -index 428d81afe215..aa1604d92c1a 100644 ---- a/include/drm/drm_format_helper.h -+++ b/include/drm/drm_format_helper.h -@@ -96,6 +96,9 @@ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_ - void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, - const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, struct drm_format_conv_state *state); -+void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch, -+ const struct iosys_map *src, const struct drm_framebuffer *fb, -+ const struct drm_rect *clip, struct drm_format_conv_state *state); - void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, - const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, struct drm_format_conv_state *state); -diff --git a/include/linux/efi.h b/include/linux/efi.h -index 418e555459da..e28873eb19ed 100644 ---- a/include/linux/efi.h -+++ b/include/linux/efi.h -@@ -385,6 +385,7 @@ void efi_native_runtime_setup(void); - #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) - #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) - #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) -+#define APPLE_SET_OS_PROTOCOL_GUID EFI_GUID(0xc5c5da95, 0x7d5c, 0x45e6, 0xb2, 0xf1, 0x3f, 0xd5, 0x2b, 0xb1, 0x00, 0x77) - #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) - #define EFI_TCG2_FINAL_EVENTS_TABLE_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) - #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) -diff --git a/include/linux/hid.h b/include/linux/hid.h -index 8e06d89698e6..6cdb5a451453 100644 ---- a/include/linux/hid.h -+++ b/include/linux/hid.h -@@ -940,6 +940,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep - extern int hidinput_connect(struct hid_device *hid, unsigned int force); - extern void hidinput_disconnect(struct hid_device *); - -+struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, -+ unsigned int application, unsigned int usage); - int hid_set_field(struct hid_field *, unsigned, __s32); - int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, - int interrupt); -diff --git a/include/linux/usb.h b/include/linux/usb.h -index 1913a13833f2..832997a9da0a 100644 ---- a/include/linux/usb.h -+++ b/include/linux/usb.h -@@ -1171,6 +1171,7 @@ extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf); - * post_reset method is called. - * @post_reset: Called by usb_reset_device() after the device - * has been reset -+ * @shutdown: Called at shut-down time to quiesce the device. - * @id_table: USB drivers use ID table to support hotplugging. - * Export this with MODULE_DEVICE_TABLE(usb,...). This must be set - * or your driver's probe function will never get called. -@@ -1222,6 +1223,8 @@ struct usb_driver { - int (*pre_reset)(struct usb_interface *intf); - int (*post_reset)(struct usb_interface *intf); - -+ void (*shutdown)(struct usb_interface *intf); -+ - const struct usb_device_id *id_table; - const struct attribute_group **dev_groups; - -diff --git a/lib/test_printf.c b/lib/test_printf.c -index 69b6a5e177f2..a318bb72a165 100644 ---- a/lib/test_printf.c -+++ b/lib/test_printf.c -@@ -745,18 +745,26 @@ static void __init fwnode_pointer(void) - static void __init fourcc_pointer(void) - { - struct { -+ char type; - u32 code; - char *str; - } const try[] = { -- { 0x3231564e, "NV12 little-endian (0x3231564e)", }, -- { 0xb231564e, "NV12 big-endian (0xb231564e)", }, -- { 0x10111213, ".... little-endian (0x10111213)", }, -- { 0x20303159, "Y10 little-endian (0x20303159)", }, -+ { 'c', 0x3231564e, "NV12 little-endian (0x3231564e)", }, -+ { 'c', 0xb231564e, "NV12 big-endian (0xb231564e)", }, -+ { 'c', 0x10111213, ".... little-endian (0x10111213)", }, -+ { 'c', 0x20303159, "Y10 little-endian (0x20303159)", }, -+ { 'h', 0x67503030, "gP00 (0x67503030)", }, -+ { 'r', 0x30305067, "gP00 (0x67503030)", }, -+ { 'l', cpu_to_le32(0x67503030), "gP00 (0x67503030)", }, -+ { 'b', cpu_to_be32(0x67503030), "gP00 (0x67503030)", }, - }; - unsigned int i; - -- for (i = 0; i < ARRAY_SIZE(try); i++) -- test(try[i].str, "%p4cc", &try[i].code); -+ for (i = 0; i < ARRAY_SIZE(try); i++) { -+ char fmt[] = { '%', 'p', '4', 'c', try[i].type, '\0' }; -+ -+ test(try[i].str, fmt, &try[i].code); -+ } - } - - static void __init -diff --git a/lib/vsprintf.c b/lib/vsprintf.c -index cdd4e2314bfc..4feaea1815fa 100644 ---- a/lib/vsprintf.c -+++ b/lib/vsprintf.c -@@ -1760,27 +1760,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, - char output[sizeof("0123 little-endian (0x01234567)")]; - char *p = output; - unsigned int i; -+ bool pix_fmt = false; - u32 orig, val; - -- if (fmt[1] != 'c' || fmt[2] != 'c') -+ if (fmt[1] != 'c') - return error_string(buf, end, "(%p4?)", spec); - - if (check_pointer(&buf, end, fourcc, spec)) - return buf; - - orig = get_unaligned(fourcc); -- val = orig & ~BIT(31); -+ switch (fmt[2]) { -+ case 'h': -+ val = orig; -+ break; -+ case 'r': -+ val = orig = swab32(orig); -+ break; -+ case 'l': -+ val = orig = le32_to_cpu(orig); -+ break; -+ case 'b': -+ val = orig = be32_to_cpu(orig); -+ break; -+ case 'c': -+ /* Pixel formats are printed LSB-first */ -+ val = swab32(orig & ~BIT(31)); -+ pix_fmt = true; -+ break; -+ default: -+ return error_string(buf, end, "(%p4?)", spec); -+ } - - for (i = 0; i < sizeof(u32); i++) { -- unsigned char c = val >> (i * 8); -+ unsigned char c = val >> ((3 - i) * 8); - - /* Print non-control ASCII characters as-is, dot otherwise */ - *p++ = isascii(c) && isprint(c) ? c : '.'; - } - -- *p++ = ' '; -- strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian"); -- p += strlen(p); -+ if (pix_fmt) { -+ *p++ = ' '; -+ strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian"); -+ p += strlen(p); -+ } - - *p++ = ' '; - *p++ = '('; -@@ -2355,6 +2378,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr); - * read the documentation (path below) first. - * - 'NF' For a netdev_features_t - * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value. -+ * - '4c[hlbr]' Generic FourCC code. - * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with - * a certain separator (' ' by default): - * C colon -diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl -index 2b812210b412..4c3a8cc6ef15 100755 ---- a/scripts/checkpatch.pl -+++ b/scripts/checkpatch.pl -@@ -6909,7 +6909,7 @@ sub process { - ($extension eq "f" && - defined $qualifier && $qualifier !~ /^w/) || - ($extension eq "4" && -- defined $qualifier && $qualifier !~ /^cc/)) { -+ defined $qualifier && $qualifier !~ /^c[chlbr]/)) { - $bad_specifier = $specifier; - last; - } --- -2.46.0.rc1 - -From 5e342e16601d0e39334a97b24dcda3a4f086ad1a Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 15 Jul 2024 13:27:23 +0200 -Subject: [PATCH 11/11] zstd - -Signed-off-by: Peter Jung ---- - include/linux/zstd.h | 2 +- - include/linux/zstd_errors.h | 23 +- - include/linux/zstd_lib.h | 850 +++++-- - lib/zstd/Makefile | 2 +- - lib/zstd/common/allocations.h | 56 + - lib/zstd/common/bits.h | 149 ++ - lib/zstd/common/bitstream.h | 127 +- - lib/zstd/common/compiler.h | 134 +- - lib/zstd/common/cpu.h | 3 +- - lib/zstd/common/debug.c | 9 +- - lib/zstd/common/debug.h | 34 +- - lib/zstd/common/entropy_common.c | 42 +- - lib/zstd/common/error_private.c | 12 +- - lib/zstd/common/error_private.h | 84 +- - lib/zstd/common/fse.h | 94 +- - lib/zstd/common/fse_decompress.c | 130 +- - lib/zstd/common/huf.h | 237 +- - lib/zstd/common/mem.h | 3 +- - lib/zstd/common/portability_macros.h | 28 +- - lib/zstd/common/zstd_common.c | 38 +- - lib/zstd/common/zstd_deps.h | 16 +- - lib/zstd/common/zstd_internal.h | 109 +- - lib/zstd/compress/clevels.h | 3 +- - lib/zstd/compress/fse_compress.c | 74 +- - lib/zstd/compress/hist.c | 3 +- - lib/zstd/compress/hist.h | 3 +- - lib/zstd/compress/huf_compress.c | 441 ++-- - lib/zstd/compress/zstd_compress.c | 2111 ++++++++++++----- - lib/zstd/compress/zstd_compress_internal.h | 359 ++- - lib/zstd/compress/zstd_compress_literals.c | 155 +- - lib/zstd/compress/zstd_compress_literals.h | 25 +- - lib/zstd/compress/zstd_compress_sequences.c | 7 +- - lib/zstd/compress/zstd_compress_sequences.h | 3 +- - lib/zstd/compress/zstd_compress_superblock.c | 376 ++- - lib/zstd/compress/zstd_compress_superblock.h | 3 +- - lib/zstd/compress/zstd_cwksp.h | 169 +- - lib/zstd/compress/zstd_double_fast.c | 143 +- - lib/zstd/compress/zstd_double_fast.h | 17 +- - lib/zstd/compress/zstd_fast.c | 596 +++-- - lib/zstd/compress/zstd_fast.h | 6 +- - lib/zstd/compress/zstd_lazy.c | 732 +++--- - lib/zstd/compress/zstd_lazy.h | 138 +- - lib/zstd/compress/zstd_ldm.c | 21 +- - lib/zstd/compress/zstd_ldm.h | 3 +- - lib/zstd/compress/zstd_ldm_geartab.h | 3 +- - lib/zstd/compress/zstd_opt.c | 497 ++-- - lib/zstd/compress/zstd_opt.h | 41 +- - lib/zstd/decompress/huf_decompress.c | 887 ++++--- - lib/zstd/decompress/zstd_ddict.c | 9 +- - lib/zstd/decompress/zstd_ddict.h | 3 +- - lib/zstd/decompress/zstd_decompress.c | 358 ++- - lib/zstd/decompress/zstd_decompress_block.c | 708 +++--- - lib/zstd/decompress/zstd_decompress_block.h | 10 +- - .../decompress/zstd_decompress_internal.h | 9 +- - lib/zstd/decompress_sources.h | 2 +- - lib/zstd/zstd_common_module.c | 5 +- - lib/zstd/zstd_compress_module.c | 2 +- - lib/zstd/zstd_decompress_module.c | 4 +- - 58 files changed, 6577 insertions(+), 3531 deletions(-) - create mode 100644 lib/zstd/common/allocations.h - create mode 100644 lib/zstd/common/bits.h - -diff --git a/include/linux/zstd.h b/include/linux/zstd.h -index 113408eef6ec..f109d49f43f8 100644 ---- a/include/linux/zstd.h -+++ b/include/linux/zstd.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h -index 58b6dd45a969..6d5cf55f0bf3 100644 ---- a/include/linux/zstd_errors.h -+++ b/include/linux/zstd_errors.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -17,8 +18,17 @@ - - - /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ --#define ZSTDERRORLIB_VISIBILITY --#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY -+#define ZSTDERRORLIB_VISIBLE -+ -+#ifndef ZSTDERRORLIB_HIDDEN -+# if (__GNUC__ >= 4) && !defined(__MINGW32__) -+# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) -+# else -+# define ZSTDERRORLIB_HIDDEN -+# endif -+#endif -+ -+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE - - /*-********************************************* - * Error codes list -@@ -43,14 +53,17 @@ typedef enum { - ZSTD_error_frameParameter_windowTooLarge = 16, - ZSTD_error_corruption_detected = 20, - ZSTD_error_checksum_wrong = 22, -+ ZSTD_error_literals_headerWrong = 24, - ZSTD_error_dictionary_corrupted = 30, - ZSTD_error_dictionary_wrong = 32, - ZSTD_error_dictionaryCreation_failed = 34, - ZSTD_error_parameter_unsupported = 40, -+ ZSTD_error_parameter_combination_unsupported = 41, - ZSTD_error_parameter_outOfBound = 42, - ZSTD_error_tableLog_tooLarge = 44, - ZSTD_error_maxSymbolValue_tooLarge = 46, - ZSTD_error_maxSymbolValue_tooSmall = 48, -+ ZSTD_error_stabilityCondition_notRespected = 50, - ZSTD_error_stage_wrong = 60, - ZSTD_error_init_missing = 62, - ZSTD_error_memory_allocation = 64, -@@ -58,11 +71,15 @@ typedef enum { - ZSTD_error_dstSize_tooSmall = 70, - ZSTD_error_srcSize_wrong = 72, - ZSTD_error_dstBuffer_null = 74, -+ ZSTD_error_noForwardProgress_destFull = 80, -+ ZSTD_error_noForwardProgress_inputEmpty = 82, - /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ - ZSTD_error_frameIndex_tooLarge = 100, - ZSTD_error_seekableIO = 102, - ZSTD_error_dstBuffer_wrong = 104, - ZSTD_error_srcBuffer_wrong = 105, -+ ZSTD_error_sequenceProducer_failed = 106, -+ ZSTD_error_externalSequences_invalid = 107, - ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ - } ZSTD_ErrorCode; - -diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h -index 79d55465d5c1..6320fedcf8a4 100644 ---- a/include/linux/zstd_lib.h -+++ b/include/linux/zstd_lib.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,23 +12,42 @@ - #ifndef ZSTD_H_235446 - #define ZSTD_H_235446 - --/* ====== Dependency ======*/ -+/* ====== Dependencies ======*/ - #include /* INT_MAX */ - #include /* size_t */ - - - /* ===== ZSTDLIB_API : control library symbols visibility ===== */ --#ifndef ZSTDLIB_VISIBLE -+#define ZSTDLIB_VISIBLE -+ -+#ifndef ZSTDLIB_HIDDEN - # if (__GNUC__ >= 4) && !defined(__MINGW32__) --# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) - # define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) - # else --# define ZSTDLIB_VISIBLE - # define ZSTDLIB_HIDDEN - # endif - #endif -+ - #define ZSTDLIB_API ZSTDLIB_VISIBLE - -+/* Deprecation warnings : -+ * Should these warnings be a problem, it is generally possible to disable them, -+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. -+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. -+ */ -+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS -+# define ZSTD_DEPRECATED(message) /* disable deprecation warnings */ -+#else -+# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) -+# define ZSTD_DEPRECATED(message) __attribute__((deprecated(message))) -+# elif (__GNUC__ >= 3) -+# define ZSTD_DEPRECATED(message) __attribute__((deprecated)) -+# else -+# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") -+# define ZSTD_DEPRECATED(message) -+# endif -+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ -+ - - /* ***************************************************************************** - Introduction -@@ -65,7 +85,7 @@ - /*------ Version ------*/ - #define ZSTD_VERSION_MAJOR 1 - #define ZSTD_VERSION_MINOR 5 --#define ZSTD_VERSION_RELEASE 2 -+#define ZSTD_VERSION_RELEASE 6 - #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) - - /*! ZSTD_versionNumber() : -@@ -107,7 +127,8 @@ ZSTDLIB_API const char* ZSTD_versionString(void); - ***************************************/ - /*! ZSTD_compress() : - * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. -- * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. -+ * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have -+ * enough space to successfully compress the data. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). */ - ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, -@@ -156,7 +177,9 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t - * "empty", "unknown" and "error" results to the same return value (0), - * while ZSTD_getFrameContentSize() gives them separate return values. - * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ --ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); -+ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize") -+ZSTDLIB_API -+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); - - /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ - * `src` should point to the start of a ZSTD frame or skippable frame. -@@ -168,8 +191,30 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) - - - /*====== Helper functions ======*/ --#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ --ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -+/* ZSTD_compressBound() : -+ * maximum compressed size in worst case single-pass scenario. -+ * When invoking `ZSTD_compress()` or any other one-pass compression function, -+ * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) -+ * as it eliminates one potential failure scenario, -+ * aka not enough room in dst buffer to write the compressed frame. -+ * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE . -+ * In which case, ZSTD_compressBound() will return an error code -+ * which can be tested using ZSTD_isError(). -+ * -+ * ZSTD_COMPRESSBOUND() : -+ * same as ZSTD_compressBound(), but as a macro. -+ * It can be used to produce constants, which can be useful for static allocation, -+ * for example to size a static array on stack. -+ * Will produce constant value 0 if srcSize too large. -+ */ -+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) -+#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -+ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -+/* ZSTD_isError() : -+ * Most ZSTD_* functions returning a size_t value can be tested for error, -+ * using ZSTD_isError(). -+ * @return 1 if error, 0 otherwise -+ */ - ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ - ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ - ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ -@@ -183,7 +228,7 @@ ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compres - /*= Compression context - * When compressing many times, - * it is recommended to allocate a context just once, -- * and re-use it for each successive compression operation. -+ * and reuse it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Note : re-using context is just a speed / resource optimization. - * It doesn't change the compression ratio, which remains identical. -@@ -196,9 +241,9 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer * - - /*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. -- * Important : in order to behave similarly to `ZSTD_compress()`, -- * this function compresses at requested compression level, -- * __ignoring any other parameter__ . -+ * Important : in order to mirror `ZSTD_compress()` behavior, -+ * this function compresses at the requested compression level, -+ * __ignoring any other advanced parameter__ . - * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. - */ -@@ -210,7 +255,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - /*= Decompression context - * When decompressing many times, - * it is recommended to allocate a context only once, -- * and re-use it for each successive compression operation. -+ * and reuse it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution. */ - typedef struct ZSTD_DCtx_s ZSTD_DCtx; -@@ -220,7 +265,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer * - /*! ZSTD_decompressDCtx() : - * Same as ZSTD_decompress(), - * requires an allocated ZSTD_DCtx. -- * Compatible with sticky parameters. -+ * Compatible with sticky parameters (see below). - */ - ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, -@@ -236,12 +281,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - * using ZSTD_CCtx_set*() functions. - * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. - * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! -- * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . -+ * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . - * - * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). - * - * This API supersedes all other "advanced" API entry points in the experimental section. -- * In the future, we expect to remove from experimental API entry points which are redundant with this API. -+ * In the future, we expect to remove API entry points from experimental which are redundant with this API. - */ - - -@@ -324,6 +369,19 @@ typedef enum { - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ -+ -+ ZSTD_c_targetCBlockSize=130, /* v1.5.6+ -+ * Attempts to fit compressed block size into approximatively targetCBlockSize. -+ * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. -+ * Note that it's not a guarantee, just a convergence target (default:0). -+ * No target when targetCBlockSize == 0. -+ * This is helpful in low bandwidth streaming environments to improve end-to-end latency, -+ * when a client can make use of partial documents (a prominent example being Chrome). -+ * Note: this parameter is stable since v1.5.6. -+ * It was present as an experimental parameter in earlier versions, -+ * but it's not recommended using it with earlier library versions -+ * due to massive performance regressions. -+ */ - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve compression ratio -@@ -403,7 +461,6 @@ typedef enum { - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode -- * ZSTD_c_targetCBlockSize - * ZSTD_c_srcSizeHint - * ZSTD_c_enableDedicatedDictSearch - * ZSTD_c_stableInBuffer -@@ -412,6 +469,9 @@ typedef enum { - * ZSTD_c_validateSequences - * ZSTD_c_useBlockSplitter - * ZSTD_c_useRowMatchFinder -+ * ZSTD_c_prefetchCDictTables -+ * ZSTD_c_enableSeqProducerFallback -+ * ZSTD_c_maxBlockSize - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. -@@ -421,7 +481,7 @@ typedef enum { - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, -- ZSTD_c_experimentalParam6=1003, -+ /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */ - ZSTD_c_experimentalParam7=1004, - ZSTD_c_experimentalParam8=1005, - ZSTD_c_experimentalParam9=1006, -@@ -430,7 +490,11 @@ typedef enum { - ZSTD_c_experimentalParam12=1009, - ZSTD_c_experimentalParam13=1010, - ZSTD_c_experimentalParam14=1011, -- ZSTD_c_experimentalParam15=1012 -+ ZSTD_c_experimentalParam15=1012, -+ ZSTD_c_experimentalParam16=1013, -+ ZSTD_c_experimentalParam17=1014, -+ ZSTD_c_experimentalParam18=1015, -+ ZSTD_c_experimentalParam19=1016 - } ZSTD_cParameter; - - typedef struct { -@@ -493,7 +557,7 @@ typedef enum { - * They will be used to compress next frame. - * Resetting session never fails. - * - The parameters : changes all parameters back to "default". -- * This removes any reference to any dictionary too. -+ * This also removes any reference to any dictionary or external sequence producer. - * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - * - Both : similar to resetting the session, followed by resetting parameters. -@@ -502,11 +566,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); - - /*! ZSTD_compress2() : - * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. -+ * (note that this entry point doesn't even expose a compression level parameter). - * ZSTD_compress2() always starts a new frame. - * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - The function is always blocking, returns when compression is completed. -- * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. -+ * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have -+ * enough space to successfully compress the data, though it is possible it fails for other reasons. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). - */ -@@ -543,13 +609,17 @@ typedef enum { - * ZSTD_d_stableOutBuffer - * ZSTD_d_forceIgnoreChecksum - * ZSTD_d_refMultipleDDicts -+ * ZSTD_d_disableHuffmanAssembly -+ * ZSTD_d_maxBlockSize - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001, - ZSTD_d_experimentalParam3=1002, -- ZSTD_d_experimentalParam4=1003 -+ ZSTD_d_experimentalParam4=1003, -+ ZSTD_d_experimentalParam5=1004, -+ ZSTD_d_experimentalParam6=1005 - - } ZSTD_dParameter; - -@@ -604,14 +674,14 @@ typedef struct ZSTD_outBuffer_s { - * A ZSTD_CStream object is required to track streaming operation. - * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. - * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. --* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -+* It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. - * - * For parallel execution, use one separate ZSTD_CStream per thread. - * - * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. - * - * Parameters are sticky : when starting a new compression on the same context, --* it will re-use the same sticky parameters as previous compression session. -+* it will reuse the same sticky parameters as previous compression session. - * When in doubt, it's recommended to fully initialize the context before usage. - * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), - * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to -@@ -700,6 +770,11 @@ typedef enum { - * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * Before starting a new compression job, or changing compression parameters, - * it is required to fully flush internal buffers. -+ * - note: if an operation ends with an error, it may leave @cctx in an undefined state. -+ * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. -+ * In order to be re-employed after an error, a state must be reset, -+ * which can be done explicitly (ZSTD_CCtx_reset()), -+ * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) - */ - ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, -@@ -728,8 +803,6 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output - * This following is a legacy streaming API, available since v1.0+ . - * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). - * It is redundant, but remains fully supported. -- * Streaming in combination with advanced parameters and dictionary compression -- * can only be used through the new API. - ******************************************************************************/ - - /*! -@@ -738,6 +811,9 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); -+ * -+ * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API -+ * to compress with a dictionary. - */ - ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); - /*! -@@ -758,7 +834,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); - * - * A ZSTD_DStream object is required to track streaming operations. - * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. --* ZSTD_DStream objects can be re-used multiple times. -+* ZSTD_DStream objects can be reused multiple times. - * - * Use ZSTD_initDStream() to start a new decompression operation. - * @return : recommended first input size -@@ -788,13 +864,37 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer - - /*===== Streaming decompression functions =====*/ - --/* This function is redundant with the advanced API and equivalent to: -+/*! ZSTD_initDStream() : -+ * Initialize/reset DStream state for new decompression operation. -+ * Call before new decompression operation using same DStream. - * -+ * Note : This function is redundant with the advanced API and equivalent to: - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, NULL); - */ - ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); - -+/*! ZSTD_decompressStream() : -+ * Streaming decompression function. -+ * Call repetitively to consume full input updating it as necessary. -+ * Function will update both input and output `pos` fields exposing current state via these fields: -+ * - `input.pos < input.size`, some input remaining and caller should provide remaining input -+ * on the next call. -+ * - `output.pos < output.size`, decoder finished and flushed all remaining buffers. -+ * - `output.pos == output.size`, potentially uncflushed data present in the internal buffers, -+ * call ZSTD_decompressStream() again to flush remaining data to output. -+ * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. -+ * -+ * @return : 0 when a frame is completely decoded and fully flushed, -+ * or an error code, which can be tested using ZSTD_isError(), -+ * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. -+ * -+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state. -+ * It's UB to invoke `ZSTD_decompressStream()` on such a state. -+ * In order to re-use such a state, it must be first reset, -+ * which can be done explicitly (`ZSTD_DCtx_reset()`), -+ * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) -+ */ - ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - - ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -@@ -913,7 +1013,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). -- * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. -+ * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. -@@ -925,9 +1025,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - * Advanced dictionary and prefix API (Requires v1.4.0+) - * - * This API allows dictionaries to be used with ZSTD_compress2(), -- * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and -- * only reset with the context is reset with ZSTD_reset_parameters or -- * ZSTD_reset_session_and_parameters. Prefixes are single-use. -+ * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). -+ * Dictionaries are sticky, they remain valid when same context is reused, -+ * they only reset when the context is reset -+ * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. -+ * In contrast, Prefixes are single-use. - ******************************************************************************/ - - -@@ -937,8 +1039,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". -- * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. -- * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). -+ * Note 1 : Dictionary is sticky, it will be used for all future compressed frames, -+ * until parameters are reset, a new dictionary is loaded, or the dictionary -+ * is explicitly invalidated by loading a NULL dictionary. - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, -@@ -947,11 +1050,15 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() -- * to precisely select how dictionary content must be interpreted. */ -+ * to precisely select how dictionary content must be interpreted. -+ * Note 5 : This method does not benefit from LDM (long distance mode). -+ * If you want to employ LDM on some large dictionary content, -+ * prefer employing ZSTD_CCtx_refPrefix() described below. -+ */ - ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - - /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ -- * Reference a prepared dictionary, to be used for all next compressed frames. -+ * Reference a prepared dictionary, to be used for all future compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. -@@ -970,6 +1077,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). -+ * This method is compatible with LDM (long distance mode). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. -@@ -986,9 +1094,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); - - /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ -- * Create an internal DDict from dict buffer, -- * to be used to decompress next frames. -- * The dictionary remains valid for all future frames, until explicitly invalidated. -+ * Create an internal DDict from dict buffer, to be used to decompress all future frames. -+ * The dictionary remains valid for all future frames, until explicitly invalidated, or -+ * a new dictionary is loaded. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". -@@ -1012,9 +1120,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s - * The memory for the table is allocated on the first call to refDDict, and can be - * freed with ZSTD_freeDCtx(). - * -+ * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary -+ * will be managed, and referencing a dictionary effectively "discards" any previous one. -+ * - * @result : 0, or an error code (which can be tested with ZSTD_isError()). -- * Note 1 : Currently, only one dictionary can be managed. -- * Referencing a new dictionary effectively "discards" any previous one. - * Special: referencing a NULL DDict means "return to no-dictionary mode". - * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. - */ -@@ -1071,24 +1180,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE - #endif - --/* Deprecation warnings : -- * Should these warnings be a problem, it is generally possible to disable them, -- * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. -- * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. -- */ --#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ --#else --# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) --# elif (__GNUC__ >= 3) --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) --# else --# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") --# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API --# endif --#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ -- - /* ************************************************************************************** - * experimental API (static linking only) - **************************************************************************************** -@@ -1123,6 +1214,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ - #define ZSTD_STRATEGY_MIN ZSTD_fast - #define ZSTD_STRATEGY_MAX ZSTD_btultra2 -+#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */ - - - #define ZSTD_OVERLAPLOG_MIN 0 -@@ -1146,7 +1238,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) - - /* Advanced parameter bounds */ --#define ZSTD_TARGETCBLOCKSIZE_MIN 64 -+#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ - #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX - #define ZSTD_SRCSIZEHINT_MIN 0 - #define ZSTD_SRCSIZEHINT_MAX INT_MAX -@@ -1303,7 +1395,7 @@ typedef enum { - } ZSTD_paramSwitch_e; - - /* ************************************* --* Frame size functions -+* Frame header and size functions - ***************************************/ - - /*! ZSTD_findDecompressedSize() : -@@ -1350,29 +1442,122 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size - * or an error code (if srcSize is too small) */ - ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); - -+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -+typedef struct { -+ unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ -+ unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ -+ unsigned blockSizeMax; -+ ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ -+ unsigned headerSize; -+ unsigned dictID; -+ unsigned checksumFlag; -+ unsigned _reserved1; -+ unsigned _reserved2; -+} ZSTD_frameHeader; -+ -+/*! ZSTD_getFrameHeader() : -+ * decode Frame Header, or requires larger `srcSize`. -+ * @return : 0, `zfhPtr` is correctly filled, -+ * >0, `srcSize` is too small, value is wanted `srcSize` amount, -+ * or an error code, which can be tested using ZSTD_isError() */ -+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ -+/*! ZSTD_getFrameHeader_advanced() : -+ * same as ZSTD_getFrameHeader(), -+ * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -+ -+/*! ZSTD_decompressionMargin() : -+ * Zstd supports in-place decompression, where the input and output buffers overlap. -+ * In this case, the output buffer must be at least (Margin + Output_Size) bytes large, -+ * and the input buffer must be at the end of the output buffer. -+ * -+ * _______________________ Output Buffer ________________________ -+ * | | -+ * | ____ Input Buffer ____| -+ * | | | -+ * v v v -+ * |---------------------------------------|-----------|----------| -+ * ^ ^ ^ -+ * |___________________ Output_Size ___________________|_ Margin _| -+ * -+ * NOTE: See also ZSTD_DECOMPRESSION_MARGIN(). -+ * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or -+ * ZSTD_decompressDCtx(). -+ * NOTE: This function supports multi-frame input. -+ * -+ * @param src The compressed frame(s) -+ * @param srcSize The size of the compressed frame(s) -+ * @returns The decompression margin or an error that can be checked with ZSTD_isError(). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize); -+ -+/*! ZSTD_DECOMPRESS_MARGIN() : -+ * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from -+ * the compressed frame, compute it from the original size and the blockSizeLog. -+ * See ZSTD_decompressionMargin() for details. -+ * -+ * WARNING: This macro does not support multi-frame input, the input must be a single -+ * zstd frame. If you need that support use the function, or implement it yourself. -+ * -+ * @param originalSize The original uncompressed size of the data. -+ * @param blockSize The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX). -+ * Unless you explicitly set the windowLog smaller than -+ * ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX. -+ */ -+#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)( \ -+ ZSTD_FRAMEHEADERSIZE_MAX /* Frame header */ + \ -+ 4 /* checksum */ + \ -+ ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \ -+ (blockSize) /* One block of margin */ \ -+ )) -+ - typedef enum { - ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ - ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ - } ZSTD_sequenceFormat_e; - -+/*! ZSTD_sequenceBound() : -+ * `srcSize` : size of the input buffer -+ * @return : upper-bound for the number of sequences that can be generated -+ * from a buffer of srcSize bytes -+ * -+ * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); -+ - /*! ZSTD_generateSequences() : -- * Generate sequences using ZSTD_compress2, given a source buffer. -+ * WARNING: This function is meant for debugging and informational purposes ONLY! -+ * Its implementation is flawed, and it will be deleted in a future version. -+ * It is not guaranteed to succeed, as there are several cases where it will give -+ * up and fail. You should NOT use this function in production code. -+ * -+ * This function is deprecated, and will be removed in a future version. -+ * -+ * Generate sequences using ZSTD_compress2(), given a source buffer. -+ * -+ * @param zc The compression context to be used for ZSTD_compress2(). Set any -+ * compression parameters you need on this context. -+ * @param outSeqs The output sequences buffer of size @p outSeqsSize -+ * @param outSeqsSize The size of the output sequences buffer. -+ * ZSTD_sequenceBound(srcSize) is an upper bound on the number -+ * of sequences that can be generated. -+ * @param src The source buffer to generate sequences from of size @p srcSize. -+ * @param srcSize The size of the source buffer. - * - * Each block will end with a dummy sequence - * with offset == 0, matchLength == 0, and litLength == length of last literals. - * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) - * simply acts as a block delimiter. - * -- * zc can be used to insert custom compression params. -- * This function invokes ZSTD_compress2 -- * -- * The output of this function can be fed into ZSTD_compressSequences() with CCtx -- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters -- * @return : number of sequences generated -+ * @returns The number of sequences generated, necessarily less than -+ * ZSTD_sequenceBound(srcSize), or an error code that can be checked -+ * with ZSTD_isError(). - */ -- --ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, -- size_t outSeqsSize, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") -+ZSTDLIB_STATIC_API size_t -+ZSTD_generateSequences(ZSTD_CCtx* zc, -+ ZSTD_Sequence* outSeqs, size_t outSeqsSize, -+ const void* src, size_t srcSize); - - /*! ZSTD_mergeBlockDelimiters() : - * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals -@@ -1388,7 +1573,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* o - ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); - - /*! ZSTD_compressSequences() : -- * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. -+ * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst. -+ * @src contains the entire input (not just the literals). -+ * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals - * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) - * The entire source is compressed into a single frame. - * -@@ -1413,11 +1600,12 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si - * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. - * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, - * and cannot emit an RLE block that disagrees with the repcode history -- * @return : final compressed size or a ZSTD error. -+ * @return : final compressed size, or a ZSTD error code. - */ --ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, -- const ZSTD_Sequence* inSeqs, size_t inSeqsSize, -- const void* src, size_t srcSize); -+ZSTDLIB_STATIC_API size_t -+ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, -+ const ZSTD_Sequence* inSeqs, size_t inSeqsSize, -+ const void* src, size_t srcSize); - - - /*! ZSTD_writeSkippableFrame() : -@@ -1464,48 +1652,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); - /*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. -+ * This is useful in combination with ZSTD_initStatic(), -+ * which makes it possible to employ a static buffer for ZSTD_CCtx* state. - * - * ZSTD_estimateCCtxSize() will provide a memory budget large enough -- * for any compression level up to selected one. -- * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate -- * does not include space for a window buffer. -- * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. -+ * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() -+ * associated with any compression level up to max specified one. - * The estimate will assume the input may be arbitrarily large, - * which is the worst case. - * -+ * Note that the size estimation is specific for one-shot compression, -+ * it is not valid for streaming (see ZSTD_estimateCStreamSize*()) -+ * nor other potential ways of using a ZSTD_CCtx* state. -+ * - * When srcSize can be bound by a known and rather "small" value, -- * this fact can be used to provide a tighter estimation -- * because the CCtx compression context will need less memory. -- * This tighter estimation can be provided by more advanced functions -+ * this knowledge can be used to provide a tighter budget estimation -+ * because the ZSTD_CCtx* state will need less memory for small inputs. -+ * This tighter estimation can be provided by employing more advanced functions - * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), - * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). - * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. - * -- * Note 2 : only single-threaded compression is supported. -+ * Note : only single-threaded compression is supported. - * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - */ --ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); - ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); - - /*! ZSTD_estimateCStreamSize() : -- * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. -- * It will also consider src size to be arbitrarily "large", which is worst case. -+ * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression -+ * using any compression level up to the max specified one. -+ * It will also consider src size to be arbitrarily "large", which is a worst case scenario. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CStream size estimation is only correct for single-threaded compression. -- * ZSTD_DStream memory budget depends on window Size. -+ * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. -+ * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. -+ * Size estimates assume that no external sequence producer is registered. -+ * -+ * ZSTD_DStream memory budget depends on frame's window Size. - * This information can be passed manually, using ZSTD_estimateDStreamSize, - * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); -+ * Any frame requesting a window size larger than max specified one will be rejected. - * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - * an internal ?Dict will be created, which additional size is not estimated here. -- * In this case, get total size by adding ZSTD_estimate?DictSize */ --ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -+ * In this case, get total size by adding ZSTD_estimate?DictSize -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); - ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); --ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); - ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); - - /*! ZSTD_estimate?DictSize() : -@@ -1649,22 +1848,45 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); - * This function never fails (wide contract) */ - ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); - -+/*! ZSTD_CCtx_setCParams() : -+ * Set all parameters provided within @p cparams into the working @p cctx. -+ * Note : if modifying parameters during compression (MT mode only), -+ * note that changes to the .windowLog parameter will be ignored. -+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()). -+ * On failure, no parameters are updated. -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); -+ -+/*! ZSTD_CCtx_setFParams() : -+ * Set all parameters provided within @p fparams into the working @p cctx. -+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams); -+ -+/*! ZSTD_CCtx_setParams() : -+ * Set all parameters provided within @p params into the working @p cctx. -+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()). -+ */ -+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params); -+ - /*! ZSTD_compress_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will generate compilation warnings. */ - ZSTD_DEPRECATED("use ZSTD_compress2") -+ZSTDLIB_STATIC_API - size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- const void* dict,size_t dictSize, -- ZSTD_parameters params); -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize, -+ const void* dict,size_t dictSize, -+ ZSTD_parameters params); - - /*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will generate compilation warnings. */ - ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") -+ZSTDLIB_STATIC_API - size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, -@@ -1737,11 +1959,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo - */ - #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 - --/* Tries to fit compressed block size to be around targetCBlockSize. -- * No target when targetCBlockSize == 0. -- * There is no guarantee on compressed block size (default:0) */ --#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 -- - /* User's best guess of source size. - * Hint is not valid when srcSizeHint == 0. - * There is no guarantee that hint is close to actual source size, -@@ -1808,13 +2025,16 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo - * Experimental parameter. - * Default is 0 == disabled. Set to 1 to enable. - * -- * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same -- * between calls, except for the modifications that zstd makes to pos (the -- * caller must not modify pos). This is checked by the compressor, and -- * compression will fail if it ever changes. This means the only flush -- * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end -- * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) -- * MUST not be modified during compression or you will get data corruption. -+ * Tells the compressor that input data presented with ZSTD_inBuffer -+ * will ALWAYS be the same between calls. -+ * Technically, the @src pointer must never be changed, -+ * and the @pos field can only be updated by zstd. -+ * However, it's possible to increase the @size field, -+ * allowing scenarios where more data can be appended after compressions starts. -+ * These conditions are checked by the compressor, -+ * and compression will fail if they are not respected. -+ * Also, data in the ZSTD_inBuffer within the range [src, src + pos) -+ * MUST not be modified during compression or it will result in data corruption. - * - * When this flag is enabled zstd won't allocate an input window buffer, - * because the user guarantees it can reference the ZSTD_inBuffer until -@@ -1822,18 +2042,15 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo - * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also - * avoid the memcpy() from the input buffer to the input window buffer. - * -- * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. -- * That means this flag cannot be used with ZSTD_compressStream(). -- * - * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using - * this flag is ALWAYS memory safe, and will never access out-of-bounds -- * memory. However, compression WILL fail if you violate the preconditions. -+ * memory. However, compression WILL fail if conditions are not respected. - * -- * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST -- * not be modified during compression or you will get data corruption. This -- * is because zstd needs to reference data in the ZSTD_inBuffer to find -+ * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST -+ * not be modified during compression or it will result in data corruption. -+ * This is because zstd needs to reference data in the ZSTD_inBuffer to find - * matches. Normally zstd maintains its own window buffer for this purpose, -- * but passing this flag tells zstd to use the user provided buffer. -+ * but passing this flag tells zstd to rely on user provided buffer instead. - */ - #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 - -@@ -1878,7 +2095,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo - * Without validation, providing a sequence that does not conform to the zstd spec will cause - * undefined behavior, and may produce a corrupted block. - * -- * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for -+ * With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for - * specifics regarding offset/matchlength requirements) then the function will bail out and - * return an error. - * -@@ -1928,6 +2145,79 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo - */ - #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 - -+/* ZSTD_c_prefetchCDictTables -+ * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto. -+ * -+ * In some situations, zstd uses CDict tables in-place rather than copying them -+ * into the working context. (See docs on ZSTD_dictAttachPref_e above for details). -+ * In such situations, compression speed is seriously impacted when CDict tables are -+ * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables -+ * when they are used in-place. -+ * -+ * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit. -+ * For sufficiently large inputs, zstd will by default memcpy() CDict tables -+ * into the working context, so there is no need to prefetch. This parameter is -+ * targeted at a middle range of input sizes, where a prefetch is cheap enough to be -+ * useful but memcpy() is too expensive. The exact range of input sizes where this -+ * makes sense is best determined by careful experimentation. -+ * -+ * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable, -+ * but in the future zstd may conditionally enable this feature via an auto-detection -+ * heuristic for cold CDicts. -+ * Use ZSTD_ps_disable to opt out of prefetching under any circumstances. -+ */ -+#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 -+ -+/* ZSTD_c_enableSeqProducerFallback -+ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. -+ * -+ * Controls whether zstd will fall back to an internal sequence producer if an -+ * external sequence producer is registered and returns an error code. This fallback -+ * is block-by-block: the internal sequence producer will only be called for blocks -+ * where the external sequence producer returns an error code. Fallback parsing will -+ * follow any other cParam settings, such as compression level, the same as in a -+ * normal (fully-internal) compression operation. -+ * -+ * The user is strongly encouraged to read the full Block-Level Sequence Producer API -+ * documentation (below) before setting this parameter. */ -+#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17 -+ -+/* ZSTD_c_maxBlockSize -+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). -+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. -+ * -+ * This parameter can be used to set an upper bound on the blocksize -+ * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper -+ * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make -+ * compressBound() inaccurate). Only currently meant to be used for testing. -+ * -+ */ -+#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18 -+ -+/* ZSTD_c_searchForExternalRepcodes -+ * This parameter affects how zstd parses external sequences, such as sequences -+ * provided through the compressSequences() API or from an external block-level -+ * sequence producer. -+ * -+ * If set to ZSTD_ps_enable, the library will check for repeated offsets in -+ * external sequences, even if those repcodes are not explicitly indicated in -+ * the "rep" field. Note that this is the only way to exploit repcode matches -+ * while using compressSequences() or an external sequence producer, since zstd -+ * currently ignores the "rep" field of external sequences. -+ * -+ * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in -+ * external sequences, regardless of whether the "rep" field has been set. This -+ * reduces sequence compression overhead by about 25% while sacrificing some -+ * compression ratio. -+ * -+ * The default value is ZSTD_ps_auto, for which the library will enable/disable -+ * based on compression level. -+ * -+ * Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is -+ * set to ZSTD_sf_explicitBlockDelimiters. That may change in the future. -+ */ -+#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 -+ - /*! ZSTD_CCtx_getParameter() : - * Get the requested compression parameter value, selected by enum ZSTD_cParameter, - * and store it into int* value. -@@ -2084,7 +2374,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete - * in the range [dst, dst + pos) MUST not be modified during decompression - * or you will get data corruption. - * -- * When this flags is enabled zstd won't allocate an output buffer, because -+ * When this flag is enabled zstd won't allocate an output buffer, because - * it can write directly to the ZSTD_outBuffer, but it will still allocate - * an input buffer large enough to fit any compressed block. This will also - * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. -@@ -2137,6 +2427,33 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete - */ - #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 - -+/* ZSTD_d_disableHuffmanAssembly -+ * Set to 1 to disable the Huffman assembly implementation. -+ * The default value is 0, which allows zstd to use the Huffman assembly -+ * implementation if available. -+ * -+ * This parameter can be used to disable Huffman assembly at runtime. -+ * If you want to disable it at compile time you can define the macro -+ * ZSTD_DISABLE_ASM. -+ */ -+#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 -+ -+/* ZSTD_d_maxBlockSize -+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). -+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. -+ * -+ * Forces the decompressor to reject blocks whose content size is -+ * larger than the configured maxBlockSize. When maxBlockSize is -+ * larger than the windowSize, the windowSize is used instead. -+ * This saves memory on the decoder when you know all blocks are small. -+ * -+ * This option is typically used in conjunction with ZSTD_c_maxBlockSize. -+ * -+ * WARNING: This causes the decoder to reject otherwise valid frames -+ * that have block sizes larger than the configured maxBlockSize. -+ */ -+#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6 -+ - - /*! ZSTD_DCtx_setFormat() : - * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). -@@ -2145,6 +2462,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete - * such ZSTD_f_zstd1_magicless for example. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ - ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") -+ZSTDLIB_STATIC_API - size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); - - /*! ZSTD_decompressStream_simpleArgs() : -@@ -2181,6 +2499,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs ( - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); -@@ -2198,17 +2517,15 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); - - /*! ZSTD_initCStream_advanced() : -- * This function is DEPRECATED, and is approximately equivalent to: -+ * This function is DEPRECATED, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); -- * // Pseudocode: Set each zstd parameter and leave the rest as-is. -- * for ((param, value) : params) { -- * ZSTD_CCtx_setParameter(zcs, param, value); -- * } -+ * ZSTD_CCtx_setParams(zcs, params); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * -@@ -2218,6 +2535,7 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, -@@ -2232,15 +2550,13 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); - - /*! ZSTD_initCStream_usingCDict_advanced() : -- * This function is DEPRECATED, and is approximately equivalent to: -+ * This function is DEPRECATED, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); -- * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. -- * for ((fParam, value) : fParams) { -- * ZSTD_CCtx_setParameter(zcs, fParam, value); -- * } -+ * ZSTD_CCtx_setFParams(zcs, fParams); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_refCDict(zcs, cdict); - * -@@ -2250,6 +2566,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, -@@ -2264,7 +2581,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - * explicitly specified. - * - * start a new frame, using same parameters from previous frame. -- * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. -+ * This is typically useful to skip dictionary loading stage, since it will reuse it in-place. - * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. -@@ -2274,6 +2591,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - * This prototype will generate compilation warnings. - */ - ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -+ZSTDLIB_STATIC_API - size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); - - -@@ -2319,8 +2637,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); - * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - * - * note: no dictionary will be used if dict == NULL or dictSize < 8 -- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions") - ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); - - /*! -@@ -2330,8 +2648,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const vo - * ZSTD_DCtx_refDDict(zds, ddict); - * - * note : ddict is referenced, it must outlive decompression session -- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions") - ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); - - /*! -@@ -2339,18 +2657,202 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * -- * re-use decompression parameters from previous init; saves dictionary loading -- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x -+ * reuse decompression parameters from previous init; saves dictionary loading - */ -+ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") - ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); - - -+/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API ********************* -+ * -+ * *** OVERVIEW *** -+ * The Block-Level Sequence Producer API allows users to provide their own custom -+ * sequence producer which libzstd invokes to process each block. The produced list -+ * of sequences (literals and matches) is then post-processed by libzstd to produce -+ * valid compressed blocks. -+ * -+ * This block-level offload API is a more granular complement of the existing -+ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers -+ * an easier migration story for applications already integrated with libzstd: the -+ * user application continues to invoke the same compression functions -+ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits -+ * from the specific advantages of the external sequence producer. For example, -+ * the sequence producer could be tuned to take advantage of known characteristics -+ * of the input, to offer better speed / ratio, or could leverage hardware -+ * acceleration not available within libzstd itself. -+ * -+ * See contrib/externalSequenceProducer for an example program employing the -+ * Block-Level Sequence Producer API. -+ * -+ * *** USAGE *** -+ * The user is responsible for implementing a function of type -+ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following -+ * arguments to the user-provided function: -+ * -+ * - sequenceProducerState: a pointer to a user-managed state for the sequence -+ * producer. -+ * -+ * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer. -+ * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory -+ * backing outSeqs is managed by the CCtx. -+ * -+ * - src, srcSize: an input buffer for the sequence producer to parse. -+ * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. -+ * -+ * - dict, dictSize: a history buffer, which may be empty, which the sequence -+ * producer may reference as it parses the src buffer. Currently, zstd will -+ * always pass dictSize == 0 into external sequence producers, but this will -+ * change in the future. -+ * -+ * - compressionLevel: a signed integer representing the zstd compression level -+ * set by the user for the current operation. The sequence producer may choose -+ * to use this information to change its compression strategy and speed/ratio -+ * tradeoff. Note: the compression level does not reflect zstd parameters set -+ * through the advanced API. -+ * -+ * - windowSize: a size_t representing the maximum allowed offset for external -+ * sequences. Note that sequence offsets are sometimes allowed to exceed the -+ * windowSize if a dictionary is present, see doc/zstd_compression_format.md -+ * for details. -+ * -+ * The user-provided function shall return a size_t representing the number of -+ * sequences written to outSeqs. This return value will be treated as an error -+ * code if it is greater than outSeqsCapacity. The return value must be non-zero -+ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided -+ * for convenience, but any value greater than outSeqsCapacity will be treated as -+ * an error code. -+ * -+ * If the user-provided function does not return an error code, the sequences -+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may -+ * occur if the parse is not valid. A parse is defined to be valid if the -+ * following conditions hold: -+ * - The sum of matchLengths and literalLengths must equal srcSize. -+ * - All sequences in the parse, except for the final sequence, must have -+ * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have -+ * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0. -+ * - All offsets must respect the windowSize parameter as specified in -+ * doc/zstd_compression_format.md. -+ * - If the final sequence has matchLength == 0, it must also have offset == 0. -+ * -+ * zstd will only validate these conditions (and fail compression if they do not -+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence -+ * validation has a performance cost. -+ * -+ * If the user-provided function returns an error, zstd will either fall back -+ * to an internal sequence producer or fail the compression operation. The user can -+ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback -+ * cParam. Fallback compression will follow any other cParam settings, such as -+ * compression level, the same as in a normal compression operation. -+ * -+ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F -+ * function by calling -+ * ZSTD_registerSequenceProducer(cctx, -+ * sequenceProducerState, -+ * sequenceProducer) -+ * This setting will persist until the next parameter reset of the CCtx. -+ * -+ * The sequenceProducerState must be initialized by the user before calling -+ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the -+ * sequenceProducerState. -+ * -+ * *** LIMITATIONS *** -+ * This API is compatible with all zstd compression APIs which respect advanced parameters. -+ * However, there are three limitations: -+ * -+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported. -+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level -+ * external sequence producer. -+ * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some -+ * cases (see its documentation for details). Users must explicitly set -+ * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external -+ * sequence producer is registered. -+ * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default -+ * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should -+ * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence -+ * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog). -+ * -+ * Second, history buffers are not currently supported. Concretely, zstd will always pass -+ * dictSize == 0 to the external sequence producer (for now). This has two implications: -+ * - Dictionaries are not currently supported. Compression will *not* fail if the user -+ * references a dictionary, but the dictionary won't have any effect. -+ * - Stream history is not currently supported. All advanced compression APIs, including -+ * streaming APIs, work with external sequence producers, but each block is treated as -+ * an independent chunk without history from previous blocks. -+ * -+ * Third, multi-threading within a single compression is not currently supported. In other words, -+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. -+ * Multi-threading across compressions is fine: simply create one CCtx per thread. -+ * -+ * Long-term, we plan to overcome all three limitations. There is no technical blocker to -+ * overcoming them. It is purely a question of engineering effort. -+ */ -+ -+#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) -+ -+typedef size_t (*ZSTD_sequenceProducer_F) ( -+ void* sequenceProducerState, -+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, -+ const void* src, size_t srcSize, -+ const void* dict, size_t dictSize, -+ int compressionLevel, -+ size_t windowSize -+); -+ -+/*! ZSTD_registerSequenceProducer() : -+ * Instruct zstd to use a block-level external sequence producer function. -+ * -+ * The sequenceProducerState must be initialized by the caller, and the caller is -+ * responsible for managing its lifetime. This parameter is sticky across -+ * compressions. It will remain set until the user explicitly resets compression -+ * parameters. -+ * -+ * Sequence producer registration is considered to be an "advanced parameter", -+ * part of the "advanced API". This means it will only have an effect on compression -+ * APIs which respect advanced parameters, such as compress2() and compressStream2(). -+ * Older compression APIs such as compressCCtx(), which predate the introduction of -+ * "advanced parameters", will ignore any external sequence producer setting. -+ * -+ * The sequence producer can be "cleared" by registering a NULL function pointer. This -+ * removes all limitations described above in the "LIMITATIONS" section of the API docs. -+ * -+ * The user is strongly encouraged to read the full API documentation (above) before -+ * calling this function. */ -+ZSTDLIB_STATIC_API void -+ZSTD_registerSequenceProducer( -+ ZSTD_CCtx* cctx, -+ void* sequenceProducerState, -+ ZSTD_sequenceProducer_F sequenceProducer -+); -+ -+/*! ZSTD_CCtxParams_registerSequenceProducer() : -+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. -+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), -+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). -+ * -+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() -+ * is required, then this function is for you. Otherwise, you probably don't need it. -+ * -+ * See tests/zstreamtest.c for example usage. */ -+ZSTDLIB_STATIC_API void -+ZSTD_CCtxParams_registerSequenceProducer( -+ ZSTD_CCtx_params* params, -+ void* sequenceProducerState, -+ ZSTD_sequenceProducer_F sequenceProducer -+); -+ -+ - /* ******************************************************************* --* Buffer-less and synchronous inner streaming functions -+* Buffer-less and synchronous inner streaming functions (DEPRECATED) -+* -+* This API is deprecated, and will be removed in a future version. -+* It allows streaming (de)compression with user allocated buffers. -+* However, it is hard to use, and not as well tested as the rest of -+* our API. - * --* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. --* But it's also a complex one, with several restrictions, documented below. --* Prefer normal streaming API for an easier experience. -+* Please use the normal streaming API instead: ZSTD_compressStream2, -+* and ZSTD_decompressStream. -+* If there is functionality that you need, but it doesn't provide, -+* please open an issue on our GitHub. - ********************************************************************* */ - - /* -@@ -2358,11 +2860,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); - - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. -- ZSTD_CCtx object can be re-used multiple times within successive compression operations. -+ ZSTD_CCtx object can be reused multiple times within successive compression operations. - - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. -- It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : -@@ -2380,36 +2881,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - -- `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. -+ `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again. - */ - - /*===== Buffer-less streaming compression functions =====*/ -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ --ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ - -+ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.") -+ZSTDLIB_STATIC_API -+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ -+ -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ - ZSTD_DEPRECATED("use advanced API to access custom parameters") -+ZSTDLIB_STATIC_API - size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ - ZSTD_DEPRECATED("use advanced API to access custom parameters") -+ZSTDLIB_STATIC_API - size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ - /* - Buffer-less streaming decompression (synchronous mode) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. -- A ZSTD_DCtx object can be re-used multiple times. -+ A ZSTD_DCtx object can be reused multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. -- @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. -- >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. -+ result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. -+ >0 : `srcSize` is too small, please provide at least result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, -@@ -2428,7 +2939,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ - - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), -- which can @return an error code if required value is too large for current system (in 32-bits mode). -+ which can return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. -@@ -2448,7 +2959,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - -- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). -+ result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - -@@ -2471,27 +2982,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ - */ - - /*===== Buffer-less streaming decompression functions =====*/ --typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; --typedef struct { -- unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ -- unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ -- unsigned blockSizeMax; -- ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ -- unsigned headerSize; -- unsigned dictID; -- unsigned checksumFlag; --} ZSTD_frameHeader; - --/*! ZSTD_getFrameHeader() : -- * decode Frame Header, or requires larger `srcSize`. -- * @return : 0, `zfhPtr` is correctly filled, -- * >0, `srcSize` is too small, value is wanted `srcSize` amount, -- * or an error code, which can be tested using ZSTD_isError() */ --ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ --/*! ZSTD_getFrameHeader_advanced() : -- * same as ZSTD_getFrameHeader(), -- * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ --ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); - ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ - - ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -@@ -2502,6 +2993,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); - ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - /* misc */ -+ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.") - ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); - typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; - ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -@@ -2509,11 +3001,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - - - --/* ============================ */ --/* Block level API */ --/* ============================ */ -+/* ========================================= */ -+/* Block level API (DEPRECATED) */ -+/* ========================================= */ - - /*! -+ -+ This API is deprecated in favor of the regular compression API. -+ You can get the frame header down to 2 bytes by setting: -+ - ZSTD_c_format = ZSTD_f_zstd1_magicless -+ - ZSTD_c_contentSizeFlag = 0 -+ - ZSTD_c_checksumFlag = 0 -+ - ZSTD_c_dictIDFlag = 0 -+ -+ This API is not as well tested as our normal API, so we recommend not using it. -+ We will be removing it in a future version. If the normal API doesn't provide -+ the functionality you need, please open a GitHub issue. -+ - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. -@@ -2524,7 +3028,6 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary -- + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. -@@ -2541,11 +3044,14 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - */ - - /*===== Raw zstd block functions =====*/ -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") - ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ - -- - #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ - -diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile -index 20f08c644b71..464c410b2768 100644 ---- a/lib/zstd/Makefile -+++ b/lib/zstd/Makefile -@@ -1,6 +1,6 @@ - # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - # ################################################################ --# Copyright (c) Facebook, Inc. -+# Copyright (c) Meta Platforms, Inc. and affiliates. - # All rights reserved. - # - # This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/common/allocations.h b/lib/zstd/common/allocations.h -new file mode 100644 -index 000000000000..16c3d08e8d1a ---- /dev/null -+++ b/lib/zstd/common/allocations.h -@@ -0,0 +1,56 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under both the BSD-style license (found in the -+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found -+ * in the COPYING file in the root directory of this source tree). -+ * You may select, at your option, one of the above-listed licenses. -+ */ -+ -+/* This file provides custom allocation primitives -+ */ -+ -+#define ZSTD_DEPS_NEED_MALLOC -+#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ -+ -+#include "compiler.h" /* MEM_STATIC */ -+#define ZSTD_STATIC_LINKING_ONLY -+#include /* ZSTD_customMem */ -+ -+#ifndef ZSTD_ALLOCATIONS_H -+#define ZSTD_ALLOCATIONS_H -+ -+/* custom memory allocation functions */ -+ -+MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) -+{ -+ if (customMem.customAlloc) -+ return customMem.customAlloc(customMem.opaque, size); -+ return ZSTD_malloc(size); -+} -+ -+MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) -+{ -+ if (customMem.customAlloc) { -+ /* calloc implemented as malloc+memset; -+ * not as efficient as calloc, but next best guess for custom malloc */ -+ void* const ptr = customMem.customAlloc(customMem.opaque, size); -+ ZSTD_memset(ptr, 0, size); -+ return ptr; -+ } -+ return ZSTD_calloc(1, size); -+} -+ -+MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) -+{ -+ if (ptr!=NULL) { -+ if (customMem.customFree) -+ customMem.customFree(customMem.opaque, ptr); -+ else -+ ZSTD_free(ptr); -+ } -+} -+ -+#endif /* ZSTD_ALLOCATIONS_H */ -diff --git a/lib/zstd/common/bits.h b/lib/zstd/common/bits.h -new file mode 100644 -index 000000000000..aa3487ec4b6a ---- /dev/null -+++ b/lib/zstd/common/bits.h -@@ -0,0 +1,149 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under both the BSD-style license (found in the -+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found -+ * in the COPYING file in the root directory of this source tree). -+ * You may select, at your option, one of the above-listed licenses. -+ */ -+ -+#ifndef ZSTD_BITS_H -+#define ZSTD_BITS_H -+ -+#include "mem.h" -+ -+MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) -+{ -+ assert(val != 0); -+ { -+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, -+ 30, 22, 20, 15, 25, 17, 4, 8, -+ 31, 27, 13, 23, 21, 19, 16, 7, -+ 26, 12, 18, 6, 11, 5, 10, 9}; -+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; -+ } -+} -+ -+MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) -+{ -+ assert(val != 0); -+# if (__GNUC__ >= 4) -+ return (unsigned)__builtin_ctz(val); -+# else -+ return ZSTD_countTrailingZeros32_fallback(val); -+# endif -+} -+ -+MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) { -+ assert(val != 0); -+ { -+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, -+ 11, 14, 16, 18, 22, 25, 3, 30, -+ 8, 12, 20, 28, 15, 17, 24, 7, -+ 19, 27, 23, 6, 26, 5, 4, 31}; -+ val |= val >> 1; -+ val |= val >> 2; -+ val |= val >> 4; -+ val |= val >> 8; -+ val |= val >> 16; -+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; -+ } -+} -+ -+MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) -+{ -+ assert(val != 0); -+# if (__GNUC__ >= 4) -+ return (unsigned)__builtin_clz(val); -+# else -+ return ZSTD_countLeadingZeros32_fallback(val); -+# endif -+} -+ -+MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) -+{ -+ assert(val != 0); -+# if (__GNUC__ >= 4) && defined(__LP64__) -+ return (unsigned)__builtin_ctzll(val); -+# else -+ { -+ U32 mostSignificantWord = (U32)(val >> 32); -+ U32 leastSignificantWord = (U32)val; -+ if (leastSignificantWord == 0) { -+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); -+ } else { -+ return ZSTD_countTrailingZeros32(leastSignificantWord); -+ } -+ } -+# endif -+} -+ -+MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) -+{ -+ assert(val != 0); -+# if (__GNUC__ >= 4) -+ return (unsigned)(__builtin_clzll(val)); -+# else -+ { -+ U32 mostSignificantWord = (U32)(val >> 32); -+ U32 leastSignificantWord = (U32)val; -+ if (mostSignificantWord == 0) { -+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); -+ } else { -+ return ZSTD_countLeadingZeros32(mostSignificantWord); -+ } -+ } -+# endif -+} -+ -+MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) -+{ -+ if (MEM_isLittleEndian()) { -+ if (MEM_64bits()) { -+ return ZSTD_countTrailingZeros64((U64)val) >> 3; -+ } else { -+ return ZSTD_countTrailingZeros32((U32)val) >> 3; -+ } -+ } else { /* Big Endian CPU */ -+ if (MEM_64bits()) { -+ return ZSTD_countLeadingZeros64((U64)val) >> 3; -+ } else { -+ return ZSTD_countLeadingZeros32((U32)val) >> 3; -+ } -+ } -+} -+ -+MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ -+{ -+ assert(val != 0); -+ return 31 - ZSTD_countLeadingZeros32(val); -+} -+ -+/* ZSTD_rotateRight_*(): -+ * Rotates a bitfield to the right by "count" bits. -+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts -+ */ -+MEM_STATIC -+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { -+ assert(count < 64); -+ count &= 0x3F; /* for fickle pattern recognition */ -+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); -+} -+ -+MEM_STATIC -+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { -+ assert(count < 32); -+ count &= 0x1F; /* for fickle pattern recognition */ -+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); -+} -+ -+MEM_STATIC -+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { -+ assert(count < 16); -+ count &= 0x0F; /* for fickle pattern recognition */ -+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); -+} -+ -+#endif /* ZSTD_BITS_H */ -diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h -index feef3a1b1d60..6a13f1f0f1e8 100644 ---- a/lib/zstd/common/bitstream.h -+++ b/lib/zstd/common/bitstream.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * bitstream - * Part of FSE library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -27,6 +28,7 @@ - #include "compiler.h" /* UNLIKELY() */ - #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ - #include "error_private.h" /* error codes and messages */ -+#include "bits.h" /* ZSTD_highbit32 */ - - - /*========================================= -@@ -79,19 +81,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); - /*-******************************************** - * bitStream decoding API (read backward) - **********************************************/ -+typedef size_t BitContainerType; - typedef struct { -- size_t bitContainer; -+ BitContainerType bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; - const char* limitPtr; - } BIT_DStream_t; - --typedef enum { BIT_DStream_unfinished = 0, -- BIT_DStream_endOfBuffer = 1, -- BIT_DStream_completed = 2, -- BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ -- /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ -+typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */ -+ BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */ -+ BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */ -+ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */ -+ } BIT_DStream_status; /* result of BIT_reloadDStream() */ - - MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); - MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -@@ -101,7 +104,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - /* Start by invoking BIT_initDStream(). - * A chunk of the bitStream is then stored into a local register. --* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType). - * You can then retrieve bitFields stored into the local register, **in reverse order**. - * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. - * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -@@ -122,33 +125,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); - MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); - /* faster, but works only if nbBits >= 1 */ - -- -- --/*-************************************************************** --* Internal functions --****************************************************************/ --MEM_STATIC unsigned BIT_highbit32 (U32 val) --{ -- assert(val != 0); -- { --# if (__GNUC__ >= 3) /* Use GCC Intrinsic */ -- return __builtin_clz (val) ^ 31; --# else /* Software version */ -- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, -- 11, 14, 16, 18, 22, 25, 3, 30, -- 8, 12, 20, 28, 15, 17, 24, 7, -- 19, 27, 23, 6, 26, 5, 4, 31 }; -- U32 v = val; -- v |= v >> 1; -- v |= v >> 2; -- v |= v >> 4; -- v |= v >> 8; -- v |= v >> 16; -- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; --# endif -- } --} -- - /*===== Local Constants =====*/ - static const unsigned BIT_mask[] = { - 0, 1, 3, 7, 0xF, 0x1F, -@@ -178,6 +154,12 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, - return 0; - } - -+FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) -+{ -+ assert(nbBits < BIT_MASK_SIZE); -+ return bitContainer & BIT_mask[nbBits]; -+} -+ - /*! BIT_addBits() : - * can add up to 31 bits into `bitC`. - * Note : does not check for register overflow ! */ -@@ -187,7 +169,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, - DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); - assert(nbBits < BIT_MASK_SIZE); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); -- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; -+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; - bitC->bitPos += nbBits; - } - -@@ -266,35 +248,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; -- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ -+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } - } else { - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { -- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); -+ case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - ZSTD_FALLTHROUGH; - -- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); -+ case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - ZSTD_FALLTHROUGH; - -- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); -+ case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - ZSTD_FALLTHROUGH; - -- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; -+ case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24; - ZSTD_FALLTHROUGH; - -- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; -+ case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16; - ZSTD_FALLTHROUGH; - -- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; -+ case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8; - ZSTD_FALLTHROUGH; - - default: break; - } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; -- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; -+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; - if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; -@@ -303,12 +285,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si - return srcSize; - } - --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) -+FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start) - { - return bitContainer >> start; - } - --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) -+FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits) - { - U32 const regMask = sizeof(bitContainer)*8 - 1; - /* if start > regMask, bitstream is corrupted, and result is undefined */ -@@ -325,19 +307,13 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c - #endif - } - --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) --{ -- assert(nbBits < BIT_MASK_SIZE); -- return bitContainer & BIT_mask[nbBits]; --} -- - /*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted */ --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) -+FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) - { - /* arbitrate between double-shift and shift+mask */ - #if 1 -@@ -360,7 +336,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) - return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); - } - --MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) - { - bitD->bitsConsumed += nbBits; - } -@@ -369,7 +345,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. */ --MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) -+FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) - { - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); -@@ -377,7 +353,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n - } - - /*! BIT_readBitsFast() : -- * unsafe version; only works only if nbBits >= 1 */ -+ * unsafe version; only works if nbBits >= 1 */ - MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) - { - size_t const value = BIT_lookBitsFast(bitD, nbBits); -@@ -386,6 +362,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) - return value; - } - -+/*! BIT_reloadDStream_internal() : -+ * Simple variant of BIT_reloadDStream(), with two conditions: -+ * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8 -+ * 2. look window is valid after shifted down : bitD->ptr >= bitD->start -+ */ -+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) -+{ -+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); -+ bitD->ptr -= bitD->bitsConsumed >> 3; -+ assert(bitD->ptr >= bitD->start); -+ bitD->bitsConsumed &= 7; -+ bitD->bitContainer = MEM_readLEST(bitD->ptr); -+ return BIT_DStream_unfinished; -+} -+ - /*! BIT_reloadDStreamFast() : - * Similar to BIT_reloadDStream(), but with two differences: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! -@@ -396,31 +387,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) - { - if (UNLIKELY(bitD->ptr < bitD->limitPtr)) - return BIT_DStream_overflow; -- assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); -- bitD->ptr -= bitD->bitsConsumed >> 3; -- bitD->bitsConsumed &= 7; -- bitD->bitContainer = MEM_readLEST(bitD->ptr); -- return BIT_DStream_unfinished; -+ return BIT_reloadDStream_internal(bitD); - } - - /*! BIT_reloadDStream() : - * Refill `bitD` from buffer previously set in BIT_initDStream() . -- * This function is safe, it guarantees it will not read beyond src buffer. -+ * This function is safe, it guarantees it will not never beyond src buffer. - * @return : status of `BIT_DStream_t` internal register. - * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ --MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) - { -- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ -+ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ -+ if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) { -+ static const BitContainerType zeroFilled = 0; -+ bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ -+ /* overflow detected, erroneous scenario or end of stream: no update */ - return BIT_DStream_overflow; -+ } -+ -+ assert(bitD->ptr >= bitD->start); - - if (bitD->ptr >= bitD->limitPtr) { -- return BIT_reloadDStreamFast(bitD); -+ return BIT_reloadDStream_internal(bitD); - } - if (bitD->ptr == bitD->start) { -+ /* reached end of bitStream => no update */ - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } -- /* start < ptr < limitPtr */ -+ /* start < ptr < limitPtr => cautious update */ - { U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { -diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h -index c42d39faf9bd..508ee25537bb 100644 ---- a/lib/zstd/common/compiler.h -+++ b/lib/zstd/common/compiler.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,6 +12,8 @@ - #ifndef ZSTD_COMPILER_H - #define ZSTD_COMPILER_H - -+#include -+ - #include "portability_macros.h" - - /*-******************************************************* -@@ -41,12 +44,15 @@ - */ - #define WIN_CDECL - -+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -+#define UNUSED_ATTR __attribute__((unused)) -+ - /* - * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant - * parameters. They must be inlined for the compiler to eliminate the constant - * branches. - */ --#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR -+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR - /* - * HINT_INLINE is used to help the compiler generate better code. It is *not* - * used for "templates", so it can be tweaked based on the compilers -@@ -61,11 +67,21 @@ - #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 - # define HINT_INLINE static INLINE_KEYWORD - #else --# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -+# define HINT_INLINE FORCE_INLINE_TEMPLATE - #endif - --/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ --#define UNUSED_ATTR __attribute__((unused)) -+/* "soft" inline : -+ * The compiler is free to select if it's a good idea to inline or not. -+ * The main objective is to silence compiler warnings -+ * when a defined function in included but not used. -+ * -+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit. -+ * Updating the prefix is probably preferable, but requires a fairly large codemod, -+ * since this name is used everywhere. -+ */ -+#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */ -+#define MEM_STATIC static __inline UNUSED_ATTR -+#endif - - /* force no inlining */ - #define FORCE_NOINLINE static __attribute__((__noinline__)) -@@ -86,23 +102,24 @@ - # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) - # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) - #elif defined(__aarch64__) --# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) --# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) -+# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0) -+# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0) - #else --# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ --# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -+# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */ -+# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */ - #endif /* NO_PREFETCH */ - - #define CACHELINE_SIZE 64 - --#define PREFETCH_AREA(p, s) { \ -- const char* const _ptr = (const char*)(p); \ -- size_t const _size = (size_t)(s); \ -- size_t _pos; \ -- for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ -- PREFETCH_L2(_ptr + _pos); \ -- } \ --} -+#define PREFETCH_AREA(p, s) \ -+ do { \ -+ const char* const _ptr = (const char*)(p); \ -+ size_t const _size = (size_t)(s); \ -+ size_t _pos; \ -+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ -+ PREFETCH_L2(_ptr + _pos); \ -+ } \ -+ } while (0) - - /* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, -@@ -126,9 +143,9 @@ - #define UNLIKELY(x) (__builtin_expect((x), 0)) - - #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) --# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } -+# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0) - #else --# define ZSTD_UNREACHABLE { assert(0); } -+# define ZSTD_UNREACHABLE do { assert(0); } while (0) - #endif - - /* disable warnings */ -@@ -179,6 +196,85 @@ - * Sanitizer - *****************************************************************/ - -+/* -+ * Zstd relies on pointer overflow in its decompressor. -+ * We add this attribute to functions that rely on pointer overflow. -+ */ -+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+# if __has_attribute(no_sanitize) -+# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8 -+ /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */ -+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow"))) -+# else -+ /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */ -+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow"))) -+# endif -+# else -+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+# endif -+#endif -+ -+/* -+ * Helper function to perform a wrapped pointer difference without trigging -+ * UBSAN. -+ * -+ * @returns lhs - rhs with wrapping -+ */ -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs) -+{ -+ return lhs - rhs; -+} -+ -+/* -+ * Helper function to perform a wrapped pointer add without triggering UBSAN. -+ * -+ * @return ptr + add with wrapping -+ */ -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add) -+{ -+ return ptr + add; -+} -+ -+/* -+ * Helper function to perform a wrapped pointer subtraction without triggering -+ * UBSAN. -+ * -+ * @return ptr - sub with wrapping -+ */ -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub) -+{ -+ return ptr - sub; -+} -+ -+/* -+ * Helper function to add to a pointer that works around C's undefined behavior -+ * of adding 0 to NULL. -+ * -+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`. -+ */ -+MEM_STATIC -+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add) -+{ -+ return add > 0 ? ptr + add : ptr; -+} -+ -+/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an -+ * abundance of caution, disable our custom poisoning on mingw. */ -+#ifdef __MINGW32__ -+#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE -+#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1 -+#endif -+#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE -+#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1 -+#endif -+#endif -+ - - - #endif /* ZSTD_COMPILER_H */ -diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h -index 0db7b42407ee..d8319a2bef4c 100644 ---- a/lib/zstd/common/cpu.h -+++ b/lib/zstd/common/cpu.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c -index bb863c9ea616..8eb6aa9a3b20 100644 ---- a/lib/zstd/common/debug.c -+++ b/lib/zstd/common/debug.c -@@ -1,7 +1,8 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * debug - * Part of FSE library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -21,4 +22,10 @@ - - #include "debug.h" - -+#if (DEBUGLEVEL>=2) -+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a -+ * translation unit is empty. So remove this from Linux kernel builds, but -+ * otherwise just leave it in. -+ */ - int g_debuglevel = DEBUGLEVEL; -+#endif -diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h -index 6dd88d1fbd02..226ba3c57ec3 100644 ---- a/lib/zstd/common/debug.h -+++ b/lib/zstd/common/debug.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * debug - * Part of FSE library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -82,18 +83,27 @@ extern int g_debuglevel; /* the variable is only declared, - It's useful when enabling very verbose levels - on selective conditions (such as position in src) */ - --# define RAWLOG(l, ...) { \ -- if (l<=g_debuglevel) { \ -- ZSTD_DEBUG_PRINT(__VA_ARGS__); \ -- } } --# define DEBUGLOG(l, ...) { \ -- if (l<=g_debuglevel) { \ -- ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ -- ZSTD_DEBUG_PRINT(" \n"); \ -- } } -+# define RAWLOG(l, ...) \ -+ do { \ -+ if (l<=g_debuglevel) { \ -+ ZSTD_DEBUG_PRINT(__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define STRINGIFY(x) #x -+#define TOSTRING(x) STRINGIFY(x) -+#define LINE_AS_STRING TOSTRING(__LINE__) -+ -+# define DEBUGLOG(l, ...) \ -+ do { \ -+ if (l<=g_debuglevel) { \ -+ ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \ -+ ZSTD_DEBUG_PRINT(" \n"); \ -+ } \ -+ } while (0) - #else --# define RAWLOG(l, ...) {} /* disabled */ --# define DEBUGLOG(l, ...) {} /* disabled */ -+# define RAWLOG(l, ...) do { } while (0) /* disabled */ -+# define DEBUGLOG(l, ...) do { } while (0) /* disabled */ - #endif - - -diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c -index fef67056f052..6cdd82233fb5 100644 ---- a/lib/zstd/common/entropy_common.c -+++ b/lib/zstd/common/entropy_common.c -@@ -1,6 +1,7 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * Common functions of New Generation Entropy library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -19,8 +20,8 @@ - #include "error_private.h" /* ERR_*, ERROR */ - #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ - #include "fse.h" --#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ - #include "huf.h" -+#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ - - - /*=== Version ===*/ -@@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } - /*-************************************************************** - * FSE NCount encoding-decoding - ****************************************************************/ --static U32 FSE_ctz(U32 val) --{ -- assert(val != 0); -- { --# if (__GNUC__ >= 3) /* GCC Intrinsic */ -- return __builtin_ctz(val); --# else /* Software version */ -- U32 count = 0; -- while ((val & 1) == 0) { -- val >>= 1; -- ++count; -- } -- return count; --# endif -- } --} -- - FORCE_INLINE_TEMPLATE - size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) -@@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne - * repeat. - * Avoid UB by setting the high bit to 1. - */ -- int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; -+ int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; - while (repeats >= 12) { - charnum += 3 * 12; - if (LIKELY(ip <= iend-7)) { -@@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne - ip = iend - 4; - } - bitStream = MEM_readLE32(ip) >> bitCount; -- repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; -+ repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; - } - charnum += 3 * repeats; - bitStream >>= 2 * repeats; -@@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne - * know that threshold > 1. - */ - if (remaining <= 1) break; -- nbBits = BIT_highbit32(remaining) + 1; -+ nbBits = ZSTD_highbit32(remaining) + 1; - threshold = 1 << (nbBits - 1); - } - if (charnum >= maxSV1) break; -@@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, - const void* src, size_t srcSize) - { - U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; -- return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); -+ return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); - } - - FORCE_INLINE_TEMPLATE size_t -@@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, - if (weightTotal == 0) return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ -- { U32 const tableLog = BIT_highbit32(weightTotal) + 1; -+ { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; -- U32 const verif = 1 << BIT_highbit32(rest); -- U32 const lastWeight = BIT_highbit32(rest) + 1; -+ U32 const verif = 1 << ZSTD_highbit32(rest); -+ U32 const lastWeight = ZSTD_highbit32(rest) + 1; - if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; -@@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, - U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize, - void* workSpace, size_t wkspSize, -- int bmi2) -+ int flags) - { - #if DYNAMIC_BMI2 -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { - return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); - } - #endif -- (void)bmi2; -+ (void)flags; - return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); - } -diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c -index 6d1135f8c373..a4062d30d170 100644 ---- a/lib/zstd/common/error_private.c -+++ b/lib/zstd/common/error_private.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -27,9 +28,11 @@ const char* ERR_getErrorString(ERR_enum code) - case PREFIX(version_unsupported): return "Version not supported"; - case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; -- case PREFIX(corruption_detected): return "Corrupted block detected"; -+ case PREFIX(corruption_detected): return "Data corruption detected"; - case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; -+ case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; - case PREFIX(parameter_unsupported): return "Unsupported parameter"; -+ case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; - case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; - case PREFIX(init_missing): return "Context should be init first"; - case PREFIX(memory_allocation): return "Allocation error : not enough memory"; -@@ -38,17 +41,22 @@ const char* ERR_getErrorString(ERR_enum code) - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; - case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; -+ case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected"; - case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; - case PREFIX(dictionary_wrong): return "Dictionary mismatch"; - case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size is incorrect"; - case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; -+ case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full"; -+ case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty"; - /* following error codes are not stable and may be removed or changed in a future version */ - case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; - case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; - case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; - case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; -+ case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; -+ case PREFIX(externalSequences_invalid): return "External sequences are not valid"; - case PREFIX(maxCode): - default: return notErrorCode; - } -diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h -index ca5101e542fa..0410ca415b54 100644 ---- a/lib/zstd/common/error_private.h -+++ b/lib/zstd/common/error_private.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -49,8 +50,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } - - /* check and forward error code */ --#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e --#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } -+#define CHECK_V_F(e, f) \ -+ size_t const e = f; \ -+ do { \ -+ if (ERR_isError(e)) \ -+ return e; \ -+ } while (0) -+#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0) - - - /*-**************************************** -@@ -84,10 +90,12 @@ void _force_has_format_string(const char *format, ...) { - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ --#define _FORCE_HAS_FORMAT_STRING(...) \ -- if (0) { \ -- _force_has_format_string(__VA_ARGS__); \ -- } -+#define _FORCE_HAS_FORMAT_STRING(...) \ -+ do { \ -+ if (0) { \ -+ _force_has_format_string(__VA_ARGS__); \ -+ } \ -+ } while (0) - - #define ERR_QUOTE(str) #str - -@@ -98,48 +106,50 @@ void _force_has_format_string(const char *format, ...) { - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ --#define RETURN_ERROR_IF(cond, err, ...) \ -- if (cond) { \ -- RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ -- __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ -- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -- RAWLOG(3, ": " __VA_ARGS__); \ -- RAWLOG(3, "\n"); \ -- return ERROR(err); \ -- } -+#define RETURN_ERROR_IF(cond, err, ...) \ -+ do { \ -+ if (cond) { \ -+ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ -+ __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ -+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -+ RAWLOG(3, ": " __VA_ARGS__); \ -+ RAWLOG(3, "\n"); \ -+ return ERROR(err); \ -+ } \ -+ } while (0) - - /* - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ --#define RETURN_ERROR(err, ...) \ -- do { \ -- RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ -- __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ -- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -- RAWLOG(3, ": " __VA_ARGS__); \ -- RAWLOG(3, "\n"); \ -- return ERROR(err); \ -- } while(0); -+#define RETURN_ERROR(err, ...) \ -+ do { \ -+ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ -+ __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ -+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -+ RAWLOG(3, ": " __VA_ARGS__); \ -+ RAWLOG(3, "\n"); \ -+ return ERROR(err); \ -+ } while(0) - - /* - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ --#define FORWARD_IF_ERROR(err, ...) \ -- do { \ -- size_t const err_code = (err); \ -- if (ERR_isError(err_code)) { \ -- RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ -- __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ -- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -- RAWLOG(3, ": " __VA_ARGS__); \ -- RAWLOG(3, "\n"); \ -- return err_code; \ -- } \ -- } while(0); -+#define FORWARD_IF_ERROR(err, ...) \ -+ do { \ -+ size_t const err_code = (err); \ -+ if (ERR_isError(err_code)) { \ -+ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ -+ __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ -+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -+ RAWLOG(3, ": " __VA_ARGS__); \ -+ RAWLOG(3, "\n"); \ -+ return err_code; \ -+ } \ -+ } while(0) - - - #endif /* ERROR_H_MODULE */ -diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h -index 4507043b2287..2185a578617d 100644 ---- a/lib/zstd/common/fse.h -+++ b/lib/zstd/common/fse.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -50,34 +51,6 @@ - FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ - - --/*-**************************************** --* FSE simple functions --******************************************/ --/*! FSE_compress() : -- Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. -- 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). -- @return : size of compressed data (<= dstCapacity). -- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! -- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. -- if FSE_isError(return), compression failed (more details using FSE_getErrorName()) --*/ --FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, -- const void* src, size_t srcSize); -- --/*! FSE_decompress(): -- Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', -- into already allocated destination buffer 'dst', of size 'dstCapacity'. -- @return : size of regenerated data (<= maxDstSize), -- or an error code, which can be tested using FSE_isError() . -- -- ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! -- Why ? : making this distinction requires a header. -- Header management is intentionally delegated to the user layer, which can better manage special cases. --*/ --FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, -- const void* cSrc, size_t cSrcSize); -- -- - /*-***************************************** - * Tool functions - ******************************************/ -@@ -88,20 +61,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return - FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ - - --/*-***************************************** --* FSE advanced functions --******************************************/ --/*! FSE_compress2() : -- Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' -- Both parameters can be defined as '0' to mean : use default value -- @return : size of compressed data -- Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! -- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. -- if FSE_isError(return), it's an error code. --*/ --FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -- -- - /*-***************************************** - * FSE detailed API - ******************************************/ -@@ -161,8 +120,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, - /*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ - typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ --FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); --FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); - - /*! FSE_buildCTable(): - Builds `ct`, which must be already allocated, using FSE_createCTable(). -@@ -238,23 +195,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, - unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, - const void* rBuffer, size_t rBuffSize, int bmi2); - --/*! Constructor and Destructor of FSE_DTable. -- Note that its size depends on 'tableLog' */ - typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ --FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); --FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); -- --/*! FSE_buildDTable(): -- Builds 'dt', which must be already allocated, using FSE_createDTable(). -- return : 0, or an errorCode, which can be tested using FSE_isError() */ --FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -- --/*! FSE_decompress_usingDTable(): -- Decompress compressed source `cSrc` of size `cSrcSize` using `dt` -- into `dst` which must be already allocated. -- @return : size of regenerated data (necessarily <= `dstCapacity`), -- or an errorCode, which can be tested using FSE_isError() */ --FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); - - /*! - Tutorial : -@@ -286,6 +227,7 @@ If there is an error, the function will return an error code, which can be teste - - #endif /* FSE_H */ - -+ - #if !defined(FSE_H_FSE_STATIC_LINKING_ONLY) - #define FSE_H_FSE_STATIC_LINKING_ONLY - -@@ -317,16 +259,6 @@ If there is an error, the function will return an error code, which can be teste - unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); - /*< same as FSE_optimalTableLog(), which used `minus==2` */ - --/* FSE_compress_wksp() : -- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). -- * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. -- */ --#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) --size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); -- --size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); --/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ -- - size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); - /*< build a fake FSE_CTable, designed to compress always the same symbolValue */ - -@@ -344,19 +276,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi - FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); - /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ - --size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); --/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ -- --size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); --/*< build a fake FSE_DTable, designed to always generate the same symbolValue */ -- --#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) -+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) - #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) --size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); --/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ -- - size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); --/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ -+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`. -+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */ - - typedef enum { - FSE_repeat_none, /*< Cannot use the previous table */ -@@ -539,20 +463,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un - FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); -- BIT_addBits(bitC, statePtr->value, nbBitsOut); -+ BIT_addBits(bitC, (size_t)statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } - - MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) - { -- BIT_addBits(bitC, statePtr->value, statePtr->stateLog); -+ BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); - } - - - /* FSE_getMaxNbBits() : - * Approximate maximum cost of a symbol, in bits. -- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) -+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ - MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) -diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c -index 8dcb8ca39767..3a17e84f27bf 100644 ---- a/lib/zstd/common/fse_decompress.c -+++ b/lib/zstd/common/fse_decompress.c -@@ -1,6 +1,7 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * FSE : Finite State Entropy decoder -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -22,8 +23,8 @@ - #define FSE_STATIC_LINKING_ONLY - #include "fse.h" - #include "error_private.h" --#define ZSTD_DEPS_NEED_MALLOC --#include "zstd_deps.h" -+#include "zstd_deps.h" /* ZSTD_memcpy */ -+#include "bits.h" /* ZSTD_highbit32 */ - - - /* ************************************************************** -@@ -55,19 +56,6 @@ - #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) - #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -- --/* Function templates */ --FSE_DTable* FSE_createDTable (unsigned tableLog) --{ -- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; -- return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); --} -- --void FSE_freeDTable (FSE_DTable* dt) --{ -- ZSTD_free(dt); --} -- - static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) - { - void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ -@@ -96,7 +84,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo - symbolNext[s] = 1; - } else { - if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; -- symbolNext[s] = normalizedCounter[s]; -+ symbolNext[s] = (U16)normalizedCounter[s]; - } } } - ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); - } -@@ -111,8 +99,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo - * all symbols have counts <= 8. We ensure we have 8 bytes at the end of - * our buffer to handle the over-write. - */ -- { -- U64 const add = 0x0101010101010101ull; -+ { U64 const add = 0x0101010101010101ull; - size_t pos = 0; - U64 sv = 0; - U32 s; -@@ -123,14 +110,13 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo - for (i = 8; i < n; i += 8) { - MEM_write64(spread + pos + i, sv); - } -- pos += n; -- } -- } -+ pos += (size_t)n; -+ } } - /* Now we spread those positions across the table. -- * The benefit of doing it in two stages is that we avoid the the -+ * The benefit of doing it in two stages is that we avoid the - * variable size inner loop, which caused lots of branch misses. - * Now we can run through all the positions without any branch misses. -- * We unroll the loop twice, since that is what emperically worked best. -+ * We unroll the loop twice, since that is what empirically worked best. - */ - { - size_t position = 0; -@@ -166,7 +152,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo - for (u=0; utableLog = 0; -- DTableH->fastMode = 0; -- -- cell->newState = 0; -- cell->symbol = symbolValue; -- cell->nbBits = 0; -- -- return 0; --} -- -- --size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) --{ -- void* ptr = dt; -- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; -- void* dPtr = dt + 1; -- FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; -- const unsigned tableSize = 1 << nbBits; -- const unsigned tableMask = tableSize - 1; -- const unsigned maxSV1 = tableMask+1; -- unsigned s; -- -- /* Sanity checks */ -- if (nbBits < 1) return ERROR(GENERIC); /* min size */ -- -- /* Build Decoding Table */ -- DTableH->tableLog = (U16)nbBits; -- DTableH->fastMode = 1; -- for (s=0; sfastMode; -- -- /* select fast mode (static) */ -- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); -- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); --} -- -- --size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) --{ -- return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); -+ assert(op >= ostart); -+ return (size_t)(op-ostart); - } - - typedef struct { - short ncount[FSE_MAX_SYMBOL_VALUE + 1]; -- FSE_DTable dtable[]; /* Dynamically sized */ - } FSE_DecompressWksp; - - -@@ -327,13 +250,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; -+ size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable); -+ FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos; - -- DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); -+ FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); - if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); - -+ /* correct offset to dtable depends on this property */ -+ FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0); -+ - /* normal FSE decoding mode */ -- { -- size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); -+ { size_t const NCountLength = -+ FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); - if (FSE_isError(NCountLength)) return NCountLength; - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - assert(NCountLength <= cSrcSize); -@@ -342,19 +270,20 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( - } - - if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); -- workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); -+ assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); -+ workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); - wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); - -- CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); -+ CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); - - { -- const void* ptr = wksp->dtable; -+ const void* ptr = dtable; - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ -- if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); -- return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); -+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); -+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); - } - } - -@@ -382,9 +311,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, - return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); - } - -- --typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; -- -- -- - #endif /* FSE_COMMONDEFS_ONLY */ -diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h -index 5042ff870308..57462466e188 100644 ---- a/lib/zstd/common/huf.h -+++ b/lib/zstd/common/huf.h -@@ -1,7 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* ****************************************************************** - * huff0 huffman codec, - * part of Finite State Entropy library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -18,99 +19,22 @@ - - /* *** Dependencies *** */ - #include "zstd_deps.h" /* size_t */ -- -- --/* *** library symbols visibility *** */ --/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, -- * HUF symbols remain "private" (internal symbols for library only). -- * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ --#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) --# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) --#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ --# define HUF_PUBLIC_API __declspec(dllexport) --#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) --# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ --#else --# define HUF_PUBLIC_API --#endif -- -- --/* ========================== */ --/* *** simple functions *** */ --/* ========================== */ -- --/* HUF_compress() : -- * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. -- * 'dst' buffer must be already allocated. -- * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). -- * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. -- * @return : size of compressed data (<= `dstCapacity`). -- * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! -- * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) -- */ --HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, -- const void* src, size_t srcSize); -- --/* HUF_decompress() : -- * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', -- * into already allocated buffer 'dst', of minimum size 'dstSize'. -- * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. -- * Note : in contrast with FSE, HUF_decompress can regenerate -- * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, -- * because it knows size to regenerate (originalSize). -- * @return : size of regenerated data (== originalSize), -- * or an error code, which can be tested using HUF_isError() -- */ --HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, -- const void* cSrc, size_t cSrcSize); -+#include "mem.h" /* U32 */ -+#define FSE_STATIC_LINKING_ONLY -+#include "fse.h" - - - /* *** Tool functions *** */ --#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ --HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ -+#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ -+size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ - - /* Error Management */ --HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ --HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ -- -+unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ -+const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ - --/* *** Advanced function *** */ - --/* HUF_compress2() : -- * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. -- * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . -- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ --HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned tableLog); -- --/* HUF_compress4X_wksp() : -- * Same as HUF_compress2(), but uses externally allocated `workSpace`. -- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ - #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) - #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) --HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned tableLog, -- void* workSpace, size_t wkspSize); -- --#endif /* HUF_H_298734234 */ -- --/* ****************************************************************** -- * WARNING !! -- * The following section contains advanced and experimental definitions -- * which shall never be used in the context of a dynamic library, -- * because they are not guaranteed to remain stable in the future. -- * Only consider them in association with static linking. -- * *****************************************************************/ --#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY) --#define HUF_H_HUF_STATIC_LINKING_ONLY -- --/* *** Dependencies *** */ --#include "mem.h" /* U32 */ --#define FSE_STATIC_LINKING_ONLY --#include "fse.h" -- - - /* *** Constants *** */ - #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ -@@ -151,25 +75,49 @@ typedef U32 HUF_DTable; - /* **************************************** - * Advanced decompression functions - ******************************************/ --size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ --#endif - --size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ --size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ --size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ --size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ --size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ --size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ --#endif -+/* -+ * Huffman flags bitset. -+ * For all flags, 0 is the default value. -+ */ -+typedef enum { -+ /* -+ * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. -+ * Otherwise: Ignored. -+ */ -+ HUF_flags_bmi2 = (1 << 0), -+ /* -+ * If set: Test possible table depths to find the one that produces the smallest header + encoded size. -+ * If unset: Use heuristic to find the table depth. -+ */ -+ HUF_flags_optimalDepth = (1 << 1), -+ /* -+ * If set: If the previous table can encode the input, always reuse the previous table. -+ * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. -+ */ -+ HUF_flags_preferRepeat = (1 << 2), -+ /* -+ * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. -+ * If unset: Always histogram the entire input. -+ */ -+ HUF_flags_suspectUncompressible = (1 << 3), -+ /* -+ * If set: Don't use assembly implementations -+ * If unset: Allow using assembly implementations -+ */ -+ HUF_flags_disableAsm = (1 << 4), -+ /* -+ * If set: Don't use the fast decoding loop, always use the fallback decoding loop. -+ * If unset: Use the fast decoding loop when possible. -+ */ -+ HUF_flags_disableFast = (1 << 5) -+} HUF_flags_e; - - - /* **************************************** - * HUF detailed API - * ****************************************/ -+#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra - - /*! HUF_compress() does the following: - * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") -@@ -182,12 +130,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - * For example, it's possible to compress several blocks using the same 'CTable', - * or to save and regenerate 'CTable' using external methods. - */ --unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); --size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ --size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); -+unsigned HUF_minTableLog(unsigned symbolCardinality); -+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); -+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, -+ size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ - size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); --size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); --size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); -+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); - size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - -@@ -196,6 +144,7 @@ typedef enum { - HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ - } HUF_repeat; -+ - /* HUF_compress4X_repeat() : - * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. -@@ -206,13 +155,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags); - - /* HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. - */ --#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) -+#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192) - #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) - size_t HUF_buildCTable_wksp (HUF_CElt* tree, - const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, -@@ -238,7 +187,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, - U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize, - void* workspace, size_t wkspSize, -- int bmi2); -+ int flags); - - /* HUF_readCTable() : - * Loading a CTable saved with HUF_writeCTable() */ -@@ -246,9 +195,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void - - /* HUF_getNbBitsFromCTable() : - * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX -- * Note 1 : is not inlined, as HUF_CElt definition is private */ -+ * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0 -+ * Note 2 : is not inlined, as HUF_CElt definition is private -+ */ - U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); - -+typedef struct { -+ BYTE tableLog; -+ BYTE maxSymbolValue; -+ BYTE unused[sizeof(size_t) - 2]; -+} HUF_CTableHeader; -+ -+/* HUF_readCTableHeader() : -+ * @returns The header from the CTable specifying the tableLog and the maxSymbolValue. -+ */ -+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable); -+ - /* - * HUF_decompress() does the following: - * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics -@@ -276,32 +238,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); - #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) - #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); --size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); --#endif --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); --size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); --#endif -- --size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#endif --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#endif -- - - /* ====================== */ - /* single stream variants */ - /* ====================== */ - --size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); --size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ --size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); --size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); -+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); - /* HUF_compress1X_repeat() : - * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. -@@ -312,47 +254,28 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags); - --size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ --#endif -- --size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); --size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ --size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ --#endif -+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); - #ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ --size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ --#endif -- --size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */ --#ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); --#endif --#ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /*< double-symbols decoder */ - #endif - - /* BMI2 variants. - * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. - */ --size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); - #ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); - #endif --size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); --size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); -+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); - #ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); - #endif - #ifndef HUF_FORCE_DECOMPRESS_X1 --size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); -+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); - #endif - --#endif /* HUF_STATIC_LINKING_ONLY */ -+#endif /* HUF_H_298734234 */ - -diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h -index 1d9cc03924ca..2e91e7780c1f 100644 ---- a/lib/zstd/common/mem.h -+++ b/lib/zstd/common/mem.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -24,6 +24,7 @@ - /*-**************************************** - * Compiler specifics - ******************************************/ -+#undef MEM_STATIC /* may be already defined from common/compiler.h */ - #define MEM_STATIC static inline - - /*-************************************************************** -diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h -index 0e3b2c0a527d..f08638cced6c 100644 ---- a/lib/zstd/common/portability_macros.h -+++ b/lib/zstd/common/portability_macros.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -12,7 +13,7 @@ - #define ZSTD_PORTABILITY_MACROS_H - - /* -- * This header file contains macro defintions to support portability. -+ * This header file contains macro definitions to support portability. - * This header is shared between C and ASM code, so it MUST only - * contain macro definitions. It MUST not contain any C code. - * -@@ -45,6 +46,8 @@ - /* Mark the internal assembly functions as hidden */ - #ifdef __ELF__ - # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func -+#elif defined(__APPLE__) -+# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func - #else - # define ZSTD_HIDE_ASM_FUNCTION(func) - #endif -@@ -65,7 +68,7 @@ - #endif - - /* -- * Only enable assembly for GNUC comptabile compilers, -+ * Only enable assembly for GNUC compatible compilers, - * because other platforms may not support GAS assembly syntax. - * - * Only enable assembly for Linux / MacOS, other platforms may -@@ -90,4 +93,23 @@ - */ - #define ZSTD_ENABLE_ASM_X86_64_BMI2 0 - -+/* -+ * For x86 ELF targets, add .note.gnu.property section for Intel CET in -+ * assembly sources when CET is enabled. -+ * -+ * Additionally, any function that may be called indirectly must begin -+ * with ZSTD_CET_ENDBRANCH. -+ */ -+#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \ -+ && defined(__has_include) -+# if __has_include() -+# include -+# define ZSTD_CET_ENDBRANCH _CET_ENDBR -+# endif -+#endif -+ -+#ifndef ZSTD_CET_ENDBRANCH -+# define ZSTD_CET_ENDBRANCH -+#endif -+ - #endif /* ZSTD_PORTABILITY_MACROS_H */ -diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c -index 3d7e35b309b5..44b95b25344a 100644 ---- a/lib/zstd/common/zstd_common.c -+++ b/lib/zstd/common/zstd_common.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,7 +15,6 @@ - * Dependencies - ***************************************/ - #define ZSTD_DEPS_NEED_MALLOC --#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ - #include "error_private.h" - #include "zstd_internal.h" - -@@ -47,37 +47,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } - /*! ZSTD_getErrorString() : - * provides error code string from enum */ - const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } -- -- -- --/*=************************************************************** --* Custom allocator --****************************************************************/ --void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) --{ -- if (customMem.customAlloc) -- return customMem.customAlloc(customMem.opaque, size); -- return ZSTD_malloc(size); --} -- --void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) --{ -- if (customMem.customAlloc) { -- /* calloc implemented as malloc+memset; -- * not as efficient as calloc, but next best guess for custom malloc */ -- void* const ptr = customMem.customAlloc(customMem.opaque, size); -- ZSTD_memset(ptr, 0, size); -- return ptr; -- } -- return ZSTD_calloc(1, size); --} -- --void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) --{ -- if (ptr!=NULL) { -- if (customMem.customFree) -- customMem.customFree(customMem.opaque, ptr); -- else -- ZSTD_free(ptr); -- } --} -diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h -index 2c34e8a33a1c..f931f7d0e294 100644 ---- a/lib/zstd/common/zstd_deps.h -+++ b/lib/zstd/common/zstd_deps.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -105,3 +105,17 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) { - - #endif /* ZSTD_DEPS_IO */ - #endif /* ZSTD_DEPS_NEED_IO */ -+ -+/* -+ * Only requested when MSAN is enabled. -+ * Need: -+ * intptr_t -+ */ -+#ifdef ZSTD_DEPS_NEED_STDINT -+#ifndef ZSTD_DEPS_STDINT -+#define ZSTD_DEPS_STDINT -+ -+/* intptr_t already provided by ZSTD_DEPS_COMMON */ -+ -+#endif /* ZSTD_DEPS_STDINT */ -+#endif /* ZSTD_DEPS_NEED_STDINT */ -diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h -index 93305d9b41bb..11da1233e890 100644 ---- a/lib/zstd/common/zstd_internal.h -+++ b/lib/zstd/common/zstd_internal.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -28,7 +29,6 @@ - #include - #define FSE_STATIC_LINKING_ONLY - #include "fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "huf.h" - #include /* XXH_reset, update, digest */ - #define ZSTD_TRACE 0 -@@ -83,9 +83,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; - #define ZSTD_FRAMECHECKSUMSIZE 4 - - #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ --#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ -+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ -+#define MIN_LITERALS_FOR_4_STREAMS 6 - --#define HufLog 12 - typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; - - #define LONGNBSEQ 0x7F00 -@@ -93,6 +93,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy - #define MINMATCH 3 - - #define Litbits 8 -+#define LitHufLog 11 - #define MaxLit ((1<= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); -@@ -225,12 +228,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e - * one COPY16() in the first call. Then, do two calls per loop since - * at that point it is more likely to have a high trip count. - */ --#ifdef __aarch64__ -- do { -- COPY16(op, ip); -- } -- while (op < oend); --#else - ZSTD_copy16(op, ip); - if (16 >= length) return; - op += 16; -@@ -240,7 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e - COPY16(op, ip); - } - while (op < oend); --#endif - } - } - -@@ -289,11 +285,11 @@ typedef enum { - typedef struct { - seqDef* sequencesStart; - seqDef* sequences; /* ptr to end of sequences */ -- BYTE* litStart; -- BYTE* lit; /* ptr to end of literals */ -- BYTE* llCode; -- BYTE* mlCode; -- BYTE* ofCode; -+ BYTE* litStart; -+ BYTE* lit; /* ptr to end of literals */ -+ BYTE* llCode; -+ BYTE* mlCode; -+ BYTE* ofCode; - size_t maxNbSeq; - size_t maxNbLit; - -@@ -301,8 +297,8 @@ typedef struct { - * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment - * the existing value of the litLength or matchLength by 0x10000. - */ -- ZSTD_longLengthType_e longLengthType; -- U32 longLengthPos; /* Index of the sequence to apply long length modification to */ -+ ZSTD_longLengthType_e longLengthType; -+ U32 longLengthPos; /* Index of the sequence to apply long length modification to */ - } seqStore_t; - - typedef struct { -@@ -321,10 +317,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore - seqLen.matchLength = seq->mlBase + MINMATCH; - if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthType == ZSTD_llt_literalLength) { -- seqLen.litLength += 0xFFFF; -+ seqLen.litLength += 0x10000; - } - if (seqStore->longLengthType == ZSTD_llt_matchLength) { -- seqLen.matchLength += 0xFFFF; -+ seqLen.matchLength += 0x10000; - } - } - return seqLen; -@@ -337,72 +333,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore - * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` - */ - typedef struct { -+ size_t nbBlocks; - size_t compressedSize; - unsigned long long decompressedBound; - } ZSTD_frameSizeInfo; /* decompress & legacy */ - - const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ --void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ -- --/* custom memory allocation functions */ --void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); --void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); --void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); -- -- --MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ --{ -- assert(val != 0); -- { --# if (__GNUC__ >= 3) /* GCC Intrinsic */ -- return __builtin_clz (val) ^ 31; --# else /* Software version */ -- static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; -- U32 v = val; -- v |= v >> 1; -- v |= v >> 2; -- v |= v >> 4; -- v |= v >> 8; -- v |= v >> 16; -- return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; --# endif -- } --} -- --/* -- * Counts the number of trailing zeros of a `size_t`. -- * Most compilers should support CTZ as a builtin. A backup -- * implementation is provided if the builtin isn't supported, but -- * it may not be terribly efficient. -- */ --MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) --{ -- if (MEM_64bits()) { --# if (__GNUC__ >= 4) -- return __builtin_ctzll((U64)val); --# else -- static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, -- 4, 25, 14, 28, 9, 34, 20, 56, -- 5, 17, 26, 54, 15, 41, 29, 43, -- 10, 31, 38, 35, 21, 45, 49, 57, -- 63, 6, 12, 18, 24, 27, 33, 55, -- 16, 53, 40, 42, 30, 37, 44, 48, -- 62, 11, 23, 32, 52, 39, 36, 47, -- 61, 22, 51, 46, 60, 50, 59, 58 }; -- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; --# endif -- } else { /* 32 bits */ --# if (__GNUC__ >= 3) -- return __builtin_ctz((U32)val); --# else -- static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, -- 30, 22, 20, 15, 25, 17, 4, 8, -- 31, 27, 13, 23, 21, 19, 16, 7, -- 26, 12, 18, 6, 11, 5, 10, 9 }; -- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; --# endif -- } --} -+int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ - - - /* ZSTD_invalidateRepCodes() : -@@ -420,13 +357,13 @@ typedef struct { - - /*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ --/* Used by: decompress, fullbench (does not get its definition from here) */ -+/* Used by: decompress, fullbench */ - size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr); - - /*! ZSTD_decodeSeqHeaders() : - * decode sequence header from src */ --/* Used by: decompress, fullbench (does not get its definition from here) */ -+/* Used by: zstd_decompress_block, fullbench */ - size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize); - -diff --git a/lib/zstd/compress/clevels.h b/lib/zstd/compress/clevels.h -index d9a76112ec3a..6ab8be6532ef 100644 ---- a/lib/zstd/compress/clevels.h -+++ b/lib/zstd/compress/clevels.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c -index ec5b1ca6d71a..44a3c10becf2 100644 ---- a/lib/zstd/compress/fse_compress.c -+++ b/lib/zstd/compress/fse_compress.c -@@ -1,6 +1,7 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * FSE : Finite State Entropy encoder -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -25,7 +26,8 @@ - #include "../common/error_private.h" - #define ZSTD_DEPS_NEED_MALLOC - #define ZSTD_DEPS_NEED_MATH64 --#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ -+#include "../common/zstd_deps.h" /* ZSTD_memset */ -+#include "../common/bits.h" /* ZSTD_highbit32 */ - - - /* ************************************************************** -@@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, - assert(tableLog < 16); /* required for threshold strategy to work */ - - /* For explanations on how to distribute symbol values over the table : -- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ -+ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - - #ifdef __clang_analyzer__ - ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ -@@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, - break; - default : - assert(normalizedCounter[s] > 1); -- { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); -+ { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); - U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; - symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); -@@ -224,8 +226,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) - size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog - + 4 /* bitCount initialized at 4 */ - + 2 /* first two symbols may use one additional bit each */) / 8) -- + 1 /* round up to whole nb bytes */ -- + 2 /* additional two bytes for bitstream flush */; -+ + 1 /* round up to whole nb bytes */ -+ + 2 /* additional two bytes for bitstream flush */; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ - } - -@@ -254,7 +256,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, - /* Init */ - remaining = tableSize+1; /* +1 for extra accuracy */ - threshold = tableSize; -- nbBits = tableLog+1; -+ nbBits = (int)tableLog+1; - - while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ - if (previousIs0) { -@@ -273,7 +275,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, - } - while (symbol >= start+3) { - start+=3; -- bitStream += 3 << bitCount; -+ bitStream += 3U << bitCount; - bitCount += 2; - } - bitStream += (symbol-start) << bitCount; -@@ -293,7 +295,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, - count++; /* +1 for extra accuracy */ - if (count>=threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ -- bitStream += count << bitCount; -+ bitStream += (U32)count << bitCount; - bitCount += nbBits; - bitCount -= (count>8); - out+= (bitCount+7) /8; - -- return (out-ostart); -+ assert(out >= ostart); -+ return (size_t)(out-ostart); - } - - -@@ -342,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, - * FSE Compression Code - ****************************************************************/ - --FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) --{ -- size_t size; -- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; -- size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); -- return (FSE_CTable*)ZSTD_malloc(size); --} -- --void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } -- - /* provides the minimum logSize to safely represent a distribution */ - static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) - { -- U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; -- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; -+ U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; -+ U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - return minBits; -@@ -364,7 +357,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) - - unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) - { -- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; -+ U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - assert(srcSize > 1); /* Not supported, RLE should be used instead */ -@@ -532,40 +525,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, - return tableLog; - } - -- --/* fake FSE_CTable, for raw (uncompressed) input */ --size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) --{ -- const unsigned tableSize = 1 << nbBits; -- const unsigned tableMask = tableSize - 1; -- const unsigned maxSymbolValue = tableMask; -- void* const ptr = ct; -- U16* const tableU16 = ( (U16*) ptr) + 2; -- void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ -- FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); -- unsigned s; -- -- /* Sanity checks */ -- if (nbBits < 1) return ERROR(GENERIC); /* min size */ -- -- /* header */ -- tableU16[-2] = (U16) nbBits; -- tableU16[-1] = (U16) maxSymbolValue; -- -- /* Build table */ -- for (s=0; s= 2 -+ -+static size_t showU32(const U32* arr, size_t size) - { -- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -+ size_t u; -+ for (u=0; u= sizeof(HUF_WriteCTableWksp)); -+ -+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue); -+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog); -+ - /* check conditions */ - if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); -@@ -204,16 +286,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, - return ((maxSymbolValue+1)/2) + 1; - } - --/*! HUF_writeCTable() : -- `CTable` : Huffman tree to save, using huf representation. -- @return : size of saved CTable */ --size_t HUF_writeCTable (void* dst, size_t maxDstSize, -- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) --{ -- HUF_WriteCTableWksp wksp; -- return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); --} -- - - size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) - { -@@ -231,7 +303,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void - if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); - -- CTable[0] = tableLog; -+ *maxSymbolValuePtr = nbSymbols - 1; -+ -+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr); - - /* Prepare base value per rank */ - { U32 n, nextRankStart = 0; -@@ -263,74 +337,71 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void - { U32 n; for (n=0; n HUF_readCTableHeader(CTable).maxSymbolValue) -+ return 0; - return (U32)HUF_getNbBits(ct[symbolValue]); - } - - --typedef struct nodeElt_s { -- U32 count; -- U16 parent; -- BYTE byte; -- BYTE nbBits; --} nodeElt; -- - /* - * HUF_setMaxHeight(): -- * Enforces maxNbBits on the Huffman tree described in huffNode. -+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. - * -- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts -- * the tree to so that it is a valid canonical Huffman tree. -+ * It attempts to convert all nodes with nbBits > @targetNbBits -+ * to employ @targetNbBits instead. Then it adjusts the tree -+ * so that it remains a valid canonical Huffman tree. - * - * @pre The sum of the ranks of each symbol == 2^largestBits, - * where largestBits == huffNode[lastNonNull].nbBits. - * @post The sum of the ranks of each symbol == 2^largestBits, -- * where largestBits is the return value <= maxNbBits. -+ * where largestBits is the return value (expected <= targetNbBits). - * -- * @param huffNode The Huffman tree modified in place to enforce maxNbBits. -+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits. -+ * It's presumed sorted, from most frequent to rarest symbol. - * @param lastNonNull The symbol with the lowest count in the Huffman tree. -- * @param maxNbBits The maximum allowed number of bits, which the Huffman tree -+ * @param targetNbBits The allowed number of bits, which the Huffman tree - * may not respect. After this function the Huffman tree will -- * respect maxNbBits. -- * @return The maximum number of bits of the Huffman tree after adjustment, -- * necessarily no more than maxNbBits. -+ * respect targetNbBits. -+ * @return The maximum number of bits of the Huffman tree after adjustment. - */ --static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) -+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) - { - const U32 largestBits = huffNode[lastNonNull].nbBits; -- /* early exit : no elt > maxNbBits, so the tree is already valid. */ -- if (largestBits <= maxNbBits) return largestBits; -+ /* early exit : no elt > targetNbBits, so the tree is already valid. */ -+ if (largestBits <= targetNbBits) return largestBits; -+ -+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits); - - /* there are several too large elements (at least >= 2) */ - { int totalCost = 0; -- const U32 baseCost = 1 << (largestBits - maxNbBits); -+ const U32 baseCost = 1 << (largestBits - targetNbBits); - int n = (int)lastNonNull; - -- /* Adjust any ranks > maxNbBits to maxNbBits. -+ /* Adjust any ranks > targetNbBits to targetNbBits. - * Compute totalCost, which is how far the sum of the ranks is - * we are over 2^largestBits after adjust the offending ranks. - */ -- while (huffNode[n].nbBits > maxNbBits) { -+ while (huffNode[n].nbBits > targetNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); -- huffNode[n].nbBits = (BYTE)maxNbBits; -+ huffNode[n].nbBits = (BYTE)targetNbBits; - n--; - } -- /* n stops at huffNode[n].nbBits <= maxNbBits */ -- assert(huffNode[n].nbBits <= maxNbBits); -- /* n end at index of smallest symbol using < maxNbBits */ -- while (huffNode[n].nbBits == maxNbBits) --n; -+ /* n stops at huffNode[n].nbBits <= targetNbBits */ -+ assert(huffNode[n].nbBits <= targetNbBits); -+ /* n end at index of smallest symbol using < targetNbBits */ -+ while (huffNode[n].nbBits == targetNbBits) --n; - -- /* renorm totalCost from 2^largestBits to 2^maxNbBits -+ /* renorm totalCost from 2^largestBits to 2^targetNbBits - * note : totalCost is necessarily a multiple of baseCost */ -- assert((totalCost & (baseCost - 1)) == 0); -- totalCost >>= (largestBits - maxNbBits); -+ assert(((U32)totalCost & (baseCost - 1)) == 0); -+ totalCost >>= (largestBits - targetNbBits); - assert(totalCost > 0); - - /* repay normalized cost */ -@@ -339,19 +410,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) - - /* Get pos of last (smallest = lowest cum. count) symbol per rank */ - ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); -- { U32 currentNbBits = maxNbBits; -+ { U32 currentNbBits = targetNbBits; - int pos; - for (pos=n ; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currentNbBits) continue; -- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ -- rankLast[maxNbBits-currentNbBits] = (U32)pos; -+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */ -+ rankLast[targetNbBits-currentNbBits] = (U32)pos; - } } - - while (totalCost > 0) { - /* Try to reduce the next power of 2 above totalCost because we - * gain back half the rank. - */ -- U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; -+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; - for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 const highPos = rankLast[nBitsToDecrease]; - U32 const lowPos = rankLast[nBitsToDecrease-1]; -@@ -391,7 +462,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; -- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) -+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } - } /* while (totalCost > 0) */ -@@ -403,11 +474,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) - * TODO. - */ - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ -- /* special case : no rank 1 symbol (using maxNbBits-1); -- * let's create one from largest rank 0 (using maxNbBits). -+ /* special case : no rank 1 symbol (using targetNbBits-1); -+ * let's create one from largest rank 0 (using targetNbBits). - */ - if (rankLast[1] == noSymbol) { -- while (huffNode[n].nbBits == maxNbBits) n--; -+ while (huffNode[n].nbBits == targetNbBits) n--; - huffNode[n+1].nbBits--; - assert(n >= 0); - rankLast[1] = (U32)(n+1); -@@ -421,7 +492,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) - } /* repay normalized cost */ - } /* there are several too large elements (at least >= 2) */ - -- return maxNbBits; -+ return targetNbBits; - } - - typedef struct { -@@ -429,7 +500,7 @@ typedef struct { - U16 curr; - } rankPos; - --typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; -+typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)]; - - /* Number of buckets available for HUF_sort() */ - #define RANK_POSITION_TABLE_SIZE 192 -@@ -448,8 +519,8 @@ typedef struct { - * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. - */ - #define RANK_POSITION_MAX_COUNT_LOG 32 --#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ --#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ -+#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */) -+#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */) - - /* Return the appropriate bucket index for a given count. See definition of - * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. -@@ -457,7 +528,7 @@ typedef struct { - static U32 HUF_getIndex(U32 const count) { - return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) - ? count -- : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; -+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; - } - - /* Helper swap function for HUF_quickSortPartition() */ -@@ -580,7 +651,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy - - /* Sort each bucket. */ - for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { -- U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; -+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base; - U32 const bucketStartIdx = rankPosition[n].base; - if (bucketSize > 1) { - assert(bucketStartIdx < maxSymbolValue1); -@@ -591,6 +662,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy - assert(HUF_isSorted(huffNode, maxSymbolValue1)); - } - -+ - /* HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). -@@ -611,6 +683,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) - int lowS, lowN; - int nodeNb = STARTNODE; - int n, nodeRoot; -+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1); - /* init for parents */ - nonNullRank = (int)maxSymbolValue; - while(huffNode[nonNullRank].count == 0) nonNullRank--; -@@ -637,6 +710,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) - for (n=0; n<=nonNullRank; n++) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - -+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1)); -+ - return nonNullRank; - } - -@@ -671,31 +746,40 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i - HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ - for (n=0; nhuffNodeTbl; - nodeElt* const huffNode = huffNode0+1; - int nonNullRank; - -+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables)); -+ -+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1); -+ - /* safety checks */ - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) -- return ERROR(workSpace_tooSmall); -+ return ERROR(workSpace_tooSmall); - if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) -- return ERROR(maxSymbolValue_tooLarge); -+ return ERROR(maxSymbolValue_tooLarge); - ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); -+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1)); - - /* build tree */ - nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); - -- /* enforce maxTableLog */ -+ /* determine and enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); - if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - -@@ -716,13 +800,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, - } - - int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { -- HUF_CElt const* ct = CTable + 1; -- int bad = 0; -- int s; -- for (s = 0; s <= (int)maxSymbolValue; ++s) { -- bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); -- } -- return !bad; -+ HUF_CTableHeader header = HUF_readCTableHeader(CTable); -+ HUF_CElt const* ct = CTable + 1; -+ int bad = 0; -+ int s; -+ -+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX); -+ -+ if (header.maxSymbolValue < maxSymbolValue) -+ return 0; -+ -+ for (s = 0; s <= (int)maxSymbolValue; ++s) { -+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); -+ } -+ return !bad; - } - - size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } -@@ -804,7 +895,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id - #if DEBUGLEVEL >= 1 - { - size_t const nbBits = HUF_getNbBits(elt); -- size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; -+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; - (void)dirtyBits; - /* Middle bits are 0. */ - assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); -@@ -884,7 +975,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC) - { - size_t const nbBits = bitC->bitPos[0] & 0xFF; - if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ -- return (bitC->ptr - bitC->startPtr) + (nbBits > 0); -+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0); - } - } - -@@ -964,17 +1055,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) - { -- U32 const tableLog = (U32)CTable[0]; -+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog; - HUF_CElt const* ct = CTable + 1; - const BYTE* ip = (const BYTE*) src; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; -- BYTE* op = ostart; - HUF_CStream_t bitC; - - /* init */ - if (dstSize < 8) return 0; /* not enough space to compress */ -- { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); -+ { BYTE* op = ostart; -+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); - if (HUF_isError(initErr)) return 0; } - - if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) -@@ -1045,9 +1136,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, - static size_t - HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, -- const HUF_CElt* CTable, const int bmi2) -+ const HUF_CElt* CTable, const int flags) - { -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { - return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); - } - return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); -@@ -1058,28 +1149,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - static size_t - HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, -- const HUF_CElt* CTable, const int bmi2) -+ const HUF_CElt* CTable, const int flags) - { -- (void)bmi2; -+ (void)flags; - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); - } - - #endif - --size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) - { -- return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); --} -- --size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) --{ -- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); -+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); - } - - static size_t - HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, -- const HUF_CElt* CTable, int bmi2) -+ const HUF_CElt* CTable, int flags) - { - size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ - const BYTE* ip = (const BYTE*) src; -@@ -1093,7 +1179,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - op += 6; /* jumpTable */ - - assert(op <= oend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - MEM_writeLE16(ostart, (U16)cSize); - op += cSize; -@@ -1101,7 +1187,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - - ip += segmentSize; - assert(op <= oend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - MEM_writeLE16(ostart+2, (U16)cSize); - op += cSize; -@@ -1109,7 +1195,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - - ip += segmentSize; - assert(op <= oend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - MEM_writeLE16(ostart+4, (U16)cSize); - op += cSize; -@@ -1118,7 +1204,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - ip += segmentSize; - assert(op <= oend); - assert(ip <= iend); -- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); -+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); - if (cSize == 0 || cSize > 65535) return 0; - op += cSize; - } -@@ -1126,14 +1212,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - return (size_t)(op-ostart); - } - --size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) --{ -- return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); --} -- --size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) -+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) - { -- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); -+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); - } - - typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; -@@ -1141,11 +1222,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; - static size_t HUF_compressCTable_internal( - BYTE* const ostart, BYTE* op, BYTE* const oend, - const void* src, size_t srcSize, -- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) -+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) - { - size_t const cSize = (nbStreams==HUF_singleStream) ? -- HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : -- HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); -+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : -+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); - if (HUF_isError(cSize)) { return cSize; } - if (cSize==0) { return 0; } /* uncompressible */ - op += cSize; -@@ -1168,6 +1249,81 @@ typedef struct { - #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 - #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ - -+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) -+{ -+ unsigned cardinality = 0; -+ unsigned i; -+ -+ for (i = 0; i < maxSymbolValue + 1; i++) { -+ if (count[i] != 0) cardinality += 1; -+ } -+ -+ return cardinality; -+} -+ -+unsigned HUF_minTableLog(unsigned symbolCardinality) -+{ -+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1; -+ return minBitsSymbols; -+} -+ -+unsigned HUF_optimalTableLog( -+ unsigned maxTableLog, -+ size_t srcSize, -+ unsigned maxSymbolValue, -+ void* workSpace, size_t wkspSize, -+ HUF_CElt* table, -+ const unsigned* count, -+ int flags) -+{ -+ assert(srcSize > 1); /* Not supported, RLE should be used instead */ -+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); -+ -+ if (!(flags & HUF_flags_optimalDepth)) { -+ /* cheap evaluation, based on FSE */ -+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -+ } -+ -+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); -+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); -+ size_t hSize, newSize; -+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); -+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality); -+ size_t optSize = ((size_t) ~0) - 1; -+ unsigned optLog = maxTableLog, optLogGuess; -+ -+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); -+ -+ /* Search until size increases */ -+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { -+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); -+ -+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); -+ if (ERR_isError(maxBits)) continue; -+ -+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break; -+ -+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); -+ } -+ -+ if (ERR_isError(hSize)) continue; -+ -+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; -+ -+ if (newSize > optSize + 1) { -+ break; -+ } -+ -+ if (newSize < optSize) { -+ optSize = newSize; -+ optLog = optLogGuess; -+ } -+ } -+ assert(optLog <= HUF_TABLELOG_MAX); -+ return optLog; -+ } -+} -+ - /* HUF_compress_internal() : - * `workSpace_align4` must be aligned on 4-bytes boundaries, - * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ -@@ -1177,14 +1333,14 @@ HUF_compress_internal (void* dst, size_t dstSize, - unsigned maxSymbolValue, unsigned huffLog, - HUF_nbStreams_e nbStreams, - void* workSpace, size_t wkspSize, -- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, -- const int bmi2, unsigned suspectUncompressible) -+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) - { - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - -+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); - HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); - - /* checks & inits */ -@@ -1198,16 +1354,17 @@ HUF_compress_internal (void* dst, size_t dstSize, - if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; - - /* Heuristic : If old table is valid, use it for small inputs */ -- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { -+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, oldHufTable, bmi2); -+ nbStreams, oldHufTable, flags); - } - - /* If uncompressible data is suspected, do a smaller sampling first */ - DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); -- if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { -+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { - size_t largestTotal = 0; -+ DEBUGLOG(5, "input suspected incompressible : sampling to check"); - { unsigned maxSymbolValueBegin = maxSymbolValue; - CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); - largestTotal += largestBegin; -@@ -1224,6 +1381,7 @@ HUF_compress_internal (void* dst, size_t dstSize, - if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ - if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ - } -+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); - - /* Check validity of previous table */ - if ( repeat -@@ -1232,25 +1390,20 @@ HUF_compress_internal (void* dst, size_t dstSize, - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ -- if (preferRepeat && repeat && *repeat != HUF_repeat_none) { -+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, oldHufTable, bmi2); -+ nbStreams, oldHufTable, flags); - } - - /* Build Huffman Tree */ -- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); -+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); - { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, - maxSymbolValue, huffLog, - &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); - CHECK_F(maxBits); - huffLog = (U32)maxBits; -- } -- /* Zero unused symbols in CTable, so we can check it for validity */ -- { -- size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); -- size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); -- ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); -+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); - } - - /* Write table description header */ -@@ -1263,7 +1416,7 @@ HUF_compress_internal (void* dst, size_t dstSize, - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, oldHufTable, bmi2); -+ nbStreams, oldHufTable, flags); - } } - - /* Use the new huffman table */ -@@ -1275,61 +1428,35 @@ HUF_compress_internal (void* dst, size_t dstSize, - } - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, -- nbStreams, table->CTable, bmi2); --} -- -- --size_t HUF_compress1X_wksp (void* dst, size_t dstSize, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned huffLog, -- void* workSpace, size_t wkspSize) --{ -- return HUF_compress_internal(dst, dstSize, src, srcSize, -- maxSymbolValue, huffLog, HUF_singleStream, -- workSpace, wkspSize, -- NULL, NULL, 0, 0 /*bmi2*/, 0); -+ nbStreams, table->CTable, flags); - } - - size_t HUF_compress1X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, -- int bmi2, unsigned suspectUncompressible) -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags) - { -+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, hufTable, -- repeat, preferRepeat, bmi2, suspectUncompressible); --} -- --/* HUF_compress4X_repeat(): -- * compress input using 4 streams. -- * provide workspace to generate compression tables */ --size_t HUF_compress4X_wksp (void* dst, size_t dstSize, -- const void* src, size_t srcSize, -- unsigned maxSymbolValue, unsigned huffLog, -- void* workSpace, size_t wkspSize) --{ -- return HUF_compress_internal(dst, dstSize, src, srcSize, -- maxSymbolValue, huffLog, HUF_fourStreams, -- workSpace, wkspSize, -- NULL, NULL, 0, 0 /*bmi2*/, 0); -+ repeat, flags); - } - - /* HUF_compress4X_repeat(): - * compress input using 4 streams. - * consider skipping quickly -- * re-use an existing huffman compression table */ -+ * reuse an existing huffman compression table */ - size_t HUF_compress4X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, -- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) -+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags) - { -+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, -- hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); -+ hufTable, repeat, flags); - } -- -diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c -index f620cafca633..0d139727cd39 100644 ---- a/lib/zstd/compress/zstd_compress.c -+++ b/lib/zstd/compress/zstd_compress.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,12 +12,12 @@ - /*-************************************* - * Dependencies - ***************************************/ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ - #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ - #include "../common/mem.h" - #include "hist.h" /* HIST_countFast_wksp */ - #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "zstd_compress_internal.h" - #include "zstd_compress_sequences.h" -@@ -27,6 +28,7 @@ - #include "zstd_opt.h" - #include "zstd_ldm.h" - #include "zstd_compress_superblock.h" -+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */ - - /* *************************************************************** - * Tuning parameters -@@ -55,14 +57,17 @@ - * Helper functions - ***************************************/ - /* ZSTD_compressBound() -- * Note that the result from this function is only compatible with the "normal" -- * full-block strategy. -- * When there are a lot of small blocks due to frequent flush in streaming mode -- * the overhead of headers can make the compressed data to be larger than the -- * return value of ZSTD_compressBound(). -+ * Note that the result from this function is only valid for -+ * the one-pass compression functions. -+ * When employing the streaming mode, -+ * if flushes are frequently altering the size of blocks, -+ * the overhead from block headers can make the compressed data larger -+ * than the return value of ZSTD_compressBound(). - */ - size_t ZSTD_compressBound(size_t srcSize) { -- return ZSTD_COMPRESSBOUND(srcSize); -+ size_t const r = ZSTD_COMPRESSBOUND(srcSize); -+ if (r==0) return ERROR(srcSize_wrong); -+ return r; - } - - -@@ -168,15 +173,13 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) - - size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) - { -+ DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx); - if (cctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "not compatible with static CCtx"); -- { -- int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); -+ { int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); - ZSTD_freeCCtxContent(cctx); -- if (!cctxInWorkspace) { -- ZSTD_customFree(cctx, cctx->customMem); -- } -+ if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem); - } - return 0; - } -@@ -257,9 +260,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, - return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); - } - --/* Returns 1 if compression parameters are such that we should -+/* Returns ZSTD_ps_enable if compression parameters are such that we should - * enable long distance matching (wlog >= 27, strategy >= btopt). -- * Returns 0 otherwise. -+ * Returns ZSTD_ps_disable otherwise. - */ - static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, - const ZSTD_compressionParameters* const cParams) { -@@ -267,6 +270,34 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, - return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; - } - -+static int ZSTD_resolveExternalSequenceValidation(int mode) { -+ return mode; -+} -+ -+/* Resolves maxBlockSize to the default if no value is present. */ -+static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) { -+ if (maxBlockSize == 0) { -+ return ZSTD_BLOCKSIZE_MAX; -+ } else { -+ return maxBlockSize; -+ } -+} -+ -+static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) { -+ if (value != ZSTD_ps_auto) return value; -+ if (cLevel < 10) { -+ return ZSTD_ps_disable; -+ } else { -+ return ZSTD_ps_enable; -+ } -+} -+ -+/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. -+ * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ -+static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { -+ return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast; -+} -+ - static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - ZSTD_compressionParameters cParams) - { -@@ -284,6 +315,10 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - } - cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); - cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); -+ cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences); -+ cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize); -+ cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes, -+ cctxParams.compressionLevel); - assert(!ZSTD_checkCParams(cParams)); - return cctxParams; - } -@@ -329,10 +364,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) - #define ZSTD_NO_CLEVEL 0 - - /* -- * Initializes the cctxParams from params and compressionLevel. -+ * Initializes `cctxParams` from `params` and `compressionLevel`. - * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. - */ --static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) -+static void -+ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, -+ const ZSTD_parameters* params, -+ int compressionLevel) - { - assert(!ZSTD_checkCParams(params->cParams)); - ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); -@@ -345,6 +383,9 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par - cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); - cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); - cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); -+ cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); -+ cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize); -+ cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel); - DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", - cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); - } -@@ -359,7 +400,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete - - /* - * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. -- * @param param Validated zstd parameters. -+ * @param params Validated zstd parameters. - */ - static void ZSTD_CCtxParams_setZstdParams( - ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) -@@ -455,8 +496,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) - return bounds; - - case ZSTD_c_enableLongDistanceMatching: -- bounds.lowerBound = 0; -- bounds.upperBound = 1; -+ bounds.lowerBound = (int)ZSTD_ps_auto; -+ bounds.upperBound = (int)ZSTD_ps_disable; - return bounds; - - case ZSTD_c_ldmHashLog: -@@ -549,6 +590,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) - bounds.upperBound = 1; - return bounds; - -+ case ZSTD_c_prefetchCDictTables: -+ bounds.lowerBound = (int)ZSTD_ps_auto; -+ bounds.upperBound = (int)ZSTD_ps_disable; -+ return bounds; -+ -+ case ZSTD_c_enableSeqProducerFallback: -+ bounds.lowerBound = 0; -+ bounds.upperBound = 1; -+ return bounds; -+ -+ case ZSTD_c_maxBlockSize: -+ bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; -+ bounds.upperBound = ZSTD_BLOCKSIZE_MAX; -+ return bounds; -+ -+ case ZSTD_c_searchForExternalRepcodes: -+ bounds.lowerBound = (int)ZSTD_ps_auto; -+ bounds.upperBound = (int)ZSTD_ps_disable; -+ return bounds; -+ - default: - bounds.error = ERROR(parameter_unsupported); - return bounds; -@@ -567,10 +628,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) - return 0; - } - --#define BOUNDCHECK(cParam, val) { \ -- RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ -- parameter_outOfBound, "Param out of bounds"); \ --} -+#define BOUNDCHECK(cParam, val) \ -+ do { \ -+ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ -+ parameter_outOfBound, "Param out of bounds"); \ -+ } while (0) - - - static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) -@@ -613,6 +675,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) - case ZSTD_c_useBlockSplitter: - case ZSTD_c_useRowMatchFinder: - case ZSTD_c_deterministicRefPrefix: -+ case ZSTD_c_prefetchCDictTables: -+ case ZSTD_c_enableSeqProducerFallback: -+ case ZSTD_c_maxBlockSize: -+ case ZSTD_c_searchForExternalRepcodes: - default: - return 0; - } -@@ -625,7 +691,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) - if (ZSTD_isUpdateAuthorized(param)) { - cctx->cParamsChanged = 1; - } else { -- RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); -+ RETURN_ERROR(stage_wrong, "can only set params in cctx init stage"); - } } - - switch(param) -@@ -668,6 +734,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) - case ZSTD_c_useBlockSplitter: - case ZSTD_c_useRowMatchFinder: - case ZSTD_c_deterministicRefPrefix: -+ case ZSTD_c_prefetchCDictTables: -+ case ZSTD_c_enableSeqProducerFallback: -+ case ZSTD_c_maxBlockSize: -+ case ZSTD_c_searchForExternalRepcodes: - break; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); -@@ -723,12 +793,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - case ZSTD_c_minMatch : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_minMatch, value); -- CCtxParams->cParams.minMatch = value; -+ CCtxParams->cParams.minMatch = (U32)value; - return CCtxParams->cParams.minMatch; - - case ZSTD_c_targetLength : - BOUNDCHECK(ZSTD_c_targetLength, value); -- CCtxParams->cParams.targetLength = value; -+ CCtxParams->cParams.targetLength = (U32)value; - return CCtxParams->cParams.targetLength; - - case ZSTD_c_strategy : -@@ -741,12 +811,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - /* Content size written in frame header _when known_ (default:1) */ - DEBUGLOG(4, "set content size flag = %u", (value!=0)); - CCtxParams->fParams.contentSizeFlag = value != 0; -- return CCtxParams->fParams.contentSizeFlag; -+ return (size_t)CCtxParams->fParams.contentSizeFlag; - - case ZSTD_c_checksumFlag : - /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - CCtxParams->fParams.checksumFlag = value != 0; -- return CCtxParams->fParams.checksumFlag; -+ return (size_t)CCtxParams->fParams.checksumFlag; - - case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ - DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); -@@ -755,18 +825,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - - case ZSTD_c_forceMaxWindow : - CCtxParams->forceWindow = (value != 0); -- return CCtxParams->forceWindow; -+ return (size_t)CCtxParams->forceWindow; - - case ZSTD_c_forceAttachDict : { - const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; -- BOUNDCHECK(ZSTD_c_forceAttachDict, pref); -+ BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref); - CCtxParams->attachDictPref = pref; - return CCtxParams->attachDictPref; - } - - case ZSTD_c_literalCompressionMode : { - const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; -- BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); -+ BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm); - CCtxParams->literalCompressionMode = lcm; - return CCtxParams->literalCompressionMode; - } -@@ -789,47 +859,50 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - - case ZSTD_c_enableDedicatedDictSearch : - CCtxParams->enableDedicatedDictSearch = (value!=0); -- return CCtxParams->enableDedicatedDictSearch; -+ return (size_t)CCtxParams->enableDedicatedDictSearch; - - case ZSTD_c_enableLongDistanceMatching : -+ BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value); - CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; - return CCtxParams->ldmParams.enableLdm; - - case ZSTD_c_ldmHashLog : - if (value!=0) /* 0 ==> auto */ - BOUNDCHECK(ZSTD_c_ldmHashLog, value); -- CCtxParams->ldmParams.hashLog = value; -+ CCtxParams->ldmParams.hashLog = (U32)value; - return CCtxParams->ldmParams.hashLog; - - case ZSTD_c_ldmMinMatch : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmMinMatch, value); -- CCtxParams->ldmParams.minMatchLength = value; -+ CCtxParams->ldmParams.minMatchLength = (U32)value; - return CCtxParams->ldmParams.minMatchLength; - - case ZSTD_c_ldmBucketSizeLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); -- CCtxParams->ldmParams.bucketSizeLog = value; -+ CCtxParams->ldmParams.bucketSizeLog = (U32)value; - return CCtxParams->ldmParams.bucketSizeLog; - - case ZSTD_c_ldmHashRateLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); -- CCtxParams->ldmParams.hashRateLog = value; -+ CCtxParams->ldmParams.hashRateLog = (U32)value; - return CCtxParams->ldmParams.hashRateLog; - - case ZSTD_c_targetCBlockSize : -- if (value!=0) /* 0 ==> default */ -+ if (value!=0) { /* 0 ==> default */ -+ value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN); - BOUNDCHECK(ZSTD_c_targetCBlockSize, value); -- CCtxParams->targetCBlockSize = value; -+ } -+ CCtxParams->targetCBlockSize = (U32)value; - return CCtxParams->targetCBlockSize; - - case ZSTD_c_srcSizeHint : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_srcSizeHint, value); - CCtxParams->srcSizeHint = value; -- return CCtxParams->srcSizeHint; -+ return (size_t)CCtxParams->srcSizeHint; - - case ZSTD_c_stableInBuffer: - BOUNDCHECK(ZSTD_c_stableInBuffer, value); -@@ -849,7 +922,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - case ZSTD_c_validateSequences: - BOUNDCHECK(ZSTD_c_validateSequences, value); - CCtxParams->validateSequences = value; -- return CCtxParams->validateSequences; -+ return (size_t)CCtxParams->validateSequences; - - case ZSTD_c_useBlockSplitter: - BOUNDCHECK(ZSTD_c_useBlockSplitter, value); -@@ -864,7 +937,28 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - case ZSTD_c_deterministicRefPrefix: - BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); - CCtxParams->deterministicRefPrefix = !!value; -- return CCtxParams->deterministicRefPrefix; -+ return (size_t)CCtxParams->deterministicRefPrefix; -+ -+ case ZSTD_c_prefetchCDictTables: -+ BOUNDCHECK(ZSTD_c_prefetchCDictTables, value); -+ CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; -+ return CCtxParams->prefetchCDictTables; -+ -+ case ZSTD_c_enableSeqProducerFallback: -+ BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value); -+ CCtxParams->enableMatchFinderFallback = value; -+ return (size_t)CCtxParams->enableMatchFinderFallback; -+ -+ case ZSTD_c_maxBlockSize: -+ if (value!=0) /* 0 ==> default */ -+ BOUNDCHECK(ZSTD_c_maxBlockSize, value); -+ CCtxParams->maxBlockSize = value; -+ return CCtxParams->maxBlockSize; -+ -+ case ZSTD_c_searchForExternalRepcodes: -+ BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value); -+ CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value; -+ return CCtxParams->searchForExternalRepcodes; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } -@@ -980,6 +1074,18 @@ size_t ZSTD_CCtxParams_getParameter( - case ZSTD_c_deterministicRefPrefix: - *value = (int)CCtxParams->deterministicRefPrefix; - break; -+ case ZSTD_c_prefetchCDictTables: -+ *value = (int)CCtxParams->prefetchCDictTables; -+ break; -+ case ZSTD_c_enableSeqProducerFallback: -+ *value = CCtxParams->enableMatchFinderFallback; -+ break; -+ case ZSTD_c_maxBlockSize: -+ *value = (int)CCtxParams->maxBlockSize; -+ break; -+ case ZSTD_c_searchForExternalRepcodes: -+ *value = (int)CCtxParams->searchForExternalRepcodes; -+ break; - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return 0; -@@ -1006,9 +1112,47 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( - return 0; - } - -+size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams) -+{ -+ ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */); -+ DEBUGLOG(4, "ZSTD_CCtx_setCParams"); -+ /* only update if all parameters are valid */ -+ FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), ""); -+ return 0; -+} -+ -+size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams) -+{ -+ ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */); -+ DEBUGLOG(4, "ZSTD_CCtx_setFParams"); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), ""); -+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), ""); -+ return 0; -+} -+ -+size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params) -+{ -+ DEBUGLOG(4, "ZSTD_CCtx_setParams"); -+ /* First check cParams, because we want to update all or none. */ -+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); -+ /* Next set fParams, because this could fail if the cctx isn't in init stage. */ -+ FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), ""); -+ /* Finally set cParams, which should succeed. */ -+ FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), ""); -+ return 0; -+} -+ - size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) - { -- DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); -+ DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't set pledgedSrcSize when not in init stage."); - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; -@@ -1024,9 +1168,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams( - ZSTD_compressionParameters* cParams); - - /* -- * Initializes the local dict using the requested parameters. -- * NOTE: This does not use the pledged src size, because it may be used for more -- * than one compression. -+ * Initializes the local dictionary using requested parameters. -+ * NOTE: Initialization does not employ the pledged src size, -+ * because the dictionary may be used for multiple compressions. - */ - static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) - { -@@ -1039,8 +1183,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) - return 0; - } - if (dl->cdict != NULL) { -- assert(cctx->cdict == dl->cdict); - /* Local dictionary already initialized. */ -+ assert(cctx->cdict == dl->cdict); - return 0; - } - assert(dl->dictSize > 0); -@@ -1060,26 +1204,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) - } - - size_t ZSTD_CCtx_loadDictionary_advanced( -- ZSTD_CCtx* cctx, const void* dict, size_t dictSize, -- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) -+ ZSTD_CCtx* cctx, -+ const void* dict, size_t dictSize, -+ ZSTD_dictLoadMethod_e dictLoadMethod, -+ ZSTD_dictContentType_e dictContentType) - { -- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, -- "Can't load a dictionary when ctx is not in init stage."); - DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); -- ZSTD_clearAllDicts(cctx); /* in case one already exists */ -- if (dict == NULL || dictSize == 0) /* no dictionary mode */ -+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, -+ "Can't load a dictionary when cctx is not in init stage."); -+ ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */ -+ if (dict == NULL || dictSize == 0) /* no dictionary */ - return 0; - if (dictLoadMethod == ZSTD_dlm_byRef) { - cctx->localDict.dict = dict; - } else { -+ /* copy dictionary content inside CCtx to own its lifetime */ - void* dictBuffer; - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, -- "no malloc for static CCtx"); -+ "static CCtx can't allocate for an internal copy of dictionary"); - dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); -- RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); -+ RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation, -+ "allocation failed for dictionary content"); - ZSTD_memcpy(dictBuffer, dict, dictSize); -- cctx->localDict.dictBuffer = dictBuffer; -- cctx->localDict.dict = dictBuffer; -+ cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */ -+ cctx->localDict.dict = dictBuffer; /* read-only reference */ - } - cctx->localDict.dictSize = dictSize; - cctx->localDict.dictContentType = dictContentType; -@@ -1149,7 +1297,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, -- "Can't reset parameters only when not in init stage."); -+ "Reset parameters is only possible during init stage."); - ZSTD_clearAllDicts(cctx); - return ZSTD_CCtxParams_reset(&cctx->requestedParams); - } -@@ -1178,11 +1326,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) - static ZSTD_compressionParameters - ZSTD_clampCParams(ZSTD_compressionParameters cParams) - { --# define CLAMP_TYPE(cParam, val, type) { \ -- ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ -- if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ -- } -+# define CLAMP_TYPE(cParam, val, type) \ -+ do { \ -+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ -+ if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ -+ } while (0) - # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) - CLAMP(ZSTD_c_windowLog, cParams.windowLog); - CLAMP(ZSTD_c_chainLog, cParams.chainLog); -@@ -1247,12 +1396,55 @@ static ZSTD_compressionParameters - ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize, -- ZSTD_cParamMode_e mode) -+ ZSTD_cParamMode_e mode, -+ ZSTD_paramSwitch_e useRowMatchFinder) - { - const U64 minSrcSize = 513; /* (1<<9) + 1 */ - const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); - assert(ZSTD_checkCParams(cPar)==0); - -+ /* Cascade the selected strategy down to the next-highest one built into -+ * this binary. */ -+#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_btultra2) { -+ cPar.strategy = ZSTD_btultra; -+ } -+ if (cPar.strategy == ZSTD_btultra) { -+ cPar.strategy = ZSTD_btopt; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_btopt) { -+ cPar.strategy = ZSTD_btlazy2; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_btlazy2) { -+ cPar.strategy = ZSTD_lazy2; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_lazy2) { -+ cPar.strategy = ZSTD_lazy; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_lazy) { -+ cPar.strategy = ZSTD_greedy; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_greedy) { -+ cPar.strategy = ZSTD_dfast; -+ } -+#endif -+#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ if (cPar.strategy == ZSTD_dfast) { -+ cPar.strategy = ZSTD_fast; -+ cPar.targetLength = 0; -+ } -+#endif -+ - switch (mode) { - case ZSTD_cpm_unknown: - case ZSTD_cpm_noAttachDict: -@@ -1281,8 +1473,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - } - - /* resize windowLog if input is small enough, to use less memory */ -- if ( (srcSize < maxWindowResize) -- && (dictSize < maxWindowResize) ) { -+ if ( (srcSize <= maxWindowResize) -+ && (dictSize <= maxWindowResize) ) { - U32 const tSize = (U32)(srcSize + dictSize); - static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; - U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : -@@ -1300,6 +1492,42 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ - -+ /* We can't use more than 32 bits of hash in total, so that means that we require: -+ * (hashLog + 8) <= 32 && (chainLog + 8) <= 32 -+ */ -+ if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) { -+ U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS; -+ if (cPar.hashLog > maxShortCacheHashLog) { -+ cPar.hashLog = maxShortCacheHashLog; -+ } -+ if (cPar.chainLog > maxShortCacheHashLog) { -+ cPar.chainLog = maxShortCacheHashLog; -+ } -+ } -+ -+ -+ /* At this point, we aren't 100% sure if we are using the row match finder. -+ * Unless it is explicitly disabled, conservatively assume that it is enabled. -+ * In this case it will only be disabled for small sources, so shrinking the -+ * hash log a little bit shouldn't result in any ratio loss. -+ */ -+ if (useRowMatchFinder == ZSTD_ps_auto) -+ useRowMatchFinder = ZSTD_ps_enable; -+ -+ /* We can't hash more than 32-bits in total. So that means that we require: -+ * (hashLog - rowLog + 8) <= 32 -+ */ -+ if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) { -+ /* Switch to 32-entry rows if searchLog is 5 (or more) */ -+ U32 const rowLog = BOUNDED(4, cPar.searchLog, 6); -+ U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS; -+ U32 const maxHashLog = maxRowHashLog + rowLog; -+ assert(cPar.hashLog >= rowLog); -+ if (cPar.hashLog > maxHashLog) { -+ cPar.hashLog = maxHashLog; -+ } -+ } -+ - return cPar; - } - -@@ -1310,7 +1538,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, - { - cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ - if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; -- return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); -+ return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); - } - - static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); -@@ -1341,7 +1569,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); - assert(!ZSTD_checkCParams(cParams)); - /* srcSizeHint == 0 means 0 */ -- return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); -+ return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); - } - - static size_t -@@ -1367,10 +1595,10 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, - + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) -- ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) -+ ? ZSTD_cwksp_aligned_alloc_size(hSize) - : 0; - size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) - ? optPotentialSpace -@@ -1386,6 +1614,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, - return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; - } - -+/* Helper function for calculating memory requirements. -+ * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ -+static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) { -+ U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4; -+ return blockSize / divider; -+} -+ - static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( - const ZSTD_compressionParameters* cParams, - const ldmParams_t* ldmParams, -@@ -1393,12 +1628,13 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( - const ZSTD_paramSwitch_e useRowMatchFinder, - const size_t buffInSize, - const size_t buffOutSize, -- const U64 pledgedSrcSize) -+ const U64 pledgedSrcSize, -+ int useSequenceProducer, -+ size_t maxBlockSize) - { - size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); -- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); -- U32 const divider = (cParams->minMatch==3) ? 3 : 4; -- size_t const maxNbSeq = blockSize / divider; -+ size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize); -+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer); - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); -@@ -1417,6 +1653,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( - - size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - -+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); -+ size_t const externalSeqSpace = useSequenceProducer -+ ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) -+ : 0; -+ - size_t const neededSpace = - cctxSpace + - entropySpace + -@@ -1425,7 +1666,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ldmSeqSpace + - matchStateSize + - tokenSpace + -- bufferSpace; -+ bufferSpace + -+ externalSeqSpace; - - DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return neededSpace; -@@ -1443,7 +1685,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) - * be needed. However, we still allocate two 0-sized buffers, which can - * take space under ASAN. */ - return ZSTD_estimateCCtxSize_usingCCtxParams_internal( -- &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); -+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); - } - - size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) -@@ -1493,7 +1735,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); -- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); -+ size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog); - size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) - ? ((size_t)1 << cParams.windowLog) + blockSize - : 0; -@@ -1504,7 +1746,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) - - return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, -- ZSTD_CONTENTSIZE_UNKNOWN); -+ ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); - } - } - -@@ -1637,6 +1879,19 @@ typedef enum { - ZSTD_resetTarget_CCtx - } ZSTD_resetTarget_e; - -+/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */ -+static U64 ZSTD_bitmix(U64 val, U64 len) { -+ val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24); -+ val *= 0x9FB21C651E98DF25ULL; -+ val ^= (val >> 35) + len ; -+ val *= 0x9FB21C651E98DF25ULL; -+ return val ^ (val >> 28); -+} -+ -+/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */ -+static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) { -+ ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4); -+} - - static size_t - ZSTD_reset_matchState(ZSTD_matchState_t* ms, -@@ -1664,6 +1919,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, - } - - ms->hashLog3 = hashLog3; -+ ms->lazySkipping = 0; - - ZSTD_invalidateMatchState(ms); - -@@ -1685,22 +1941,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, - ZSTD_cwksp_clean_tables(ws); - } - -- /* opt parser space */ -- if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { -- DEBUGLOG(4, "reserving optimal parser space"); -- ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); -- ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); -- ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); -- ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); -- ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); -- } -- - if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { -- { /* Row match finder needs an additional table of hashes ("tags") */ -- size_t const tagTableSize = hSize*sizeof(U16); -- ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); -- if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); -+ /* Row match finder needs an additional table of hashes ("tags") */ -+ size_t const tagTableSize = hSize; -+ /* We want to generate a new salt in case we reset a Cctx, but we always want to use -+ * 0 when we reset a Cdict */ -+ if(forWho == ZSTD_resetTarget_CCtx) { -+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize); -+ ZSTD_advanceHashSalt(ms); -+ } else { -+ /* When we are not salting we want to always memset the memory */ -+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize); -+ ZSTD_memset(ms->tagTable, 0, tagTableSize); -+ ms->hashSalt = 0; - } - { /* Switch to 32-entry rows if searchLog is 5 (or more) */ - U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); -@@ -1709,6 +1962,17 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, - } - } - -+ /* opt parser space */ -+ if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { -+ DEBUGLOG(4, "reserving optimal parser space"); -+ ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); -+ ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); -+ ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); -+ ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t)); -+ ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t)); -+ } -+ - ms->cParams = *cParams; - - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, -@@ -1768,6 +2032,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - assert(params->useRowMatchFinder != ZSTD_ps_auto); - assert(params->useBlockSplitter != ZSTD_ps_auto); - assert(params->ldmParams.enableLdm != ZSTD_ps_auto); -+ assert(params->maxBlockSize != 0); - if (params->ldmParams.enableLdm == ZSTD_ps_enable) { - /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); -@@ -1776,9 +2041,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - } - - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); -- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); -- U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; -- size_t const maxNbSeq = blockSize / divider; -+ size_t const blockSize = MIN(params->maxBlockSize, windowSize); -+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params)); - size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) - ? ZSTD_compressBound(blockSize) + 1 - : 0; -@@ -1795,8 +2059,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - size_t const neededSpace = - ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, -- buffInSize, buffOutSize, pledgedSrcSize); -- int resizeWorkspace; -+ buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize); - - FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); - -@@ -1805,7 +2068,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - { /* Check if workspace is large enough, alloc a new one if needed */ - int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; - int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); -- resizeWorkspace = workspaceTooSmall || workspaceWasteful; -+ int resizeWorkspace = workspaceTooSmall || workspaceWasteful; - DEBUGLOG(4, "Need %zu B workspace", neededSpace); - DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - -@@ -1838,6 +2101,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - - /* init params */ - zc->blockState.matchState.cParams = params->cParams; -+ zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable; - zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; - zc->consumedSrcSize = 0; - zc->producedCSize = 0; -@@ -1854,13 +2118,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - - ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - -+ FORWARD_IF_ERROR(ZSTD_reset_matchState( -+ &zc->blockState.matchState, -+ ws, -+ ¶ms->cParams, -+ params->useRowMatchFinder, -+ crp, -+ needsIndexReset, -+ ZSTD_resetTarget_CCtx), ""); -+ -+ zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); -+ -+ /* ldm hash table */ -+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) { -+ /* TODO: avoid memset? */ -+ size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; -+ zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); -+ ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); -+ zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); -+ zc->maxNbLdmSequences = maxNbLdmSeq; -+ -+ ZSTD_window_init(&zc->ldmState.window); -+ zc->ldmState.loadedDictEnd = 0; -+ } -+ -+ /* reserve space for block-level external sequences */ -+ if (ZSTD_hasExtSeqProd(params)) { -+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); -+ zc->extSeqBufCapacity = maxNbExternalSeq; -+ zc->extSeqBuf = -+ (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); -+ } -+ -+ /* buffers */ -+ - /* ZSTD_wildcopy() is used to copy into the literals buffer, - * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. - */ - zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); - zc->seqStore.maxNbLit = blockSize; - -- /* buffers */ - zc->bufferedPolicy = zbuff; - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); -@@ -1883,32 +2180,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); -- zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); -- -- FORWARD_IF_ERROR(ZSTD_reset_matchState( -- &zc->blockState.matchState, -- ws, -- ¶ms->cParams, -- params->useRowMatchFinder, -- crp, -- needsIndexReset, -- ZSTD_resetTarget_CCtx), ""); -- -- /* ldm hash table */ -- if (params->ldmParams.enableLdm == ZSTD_ps_enable) { -- /* TODO: avoid memset? */ -- size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; -- zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); -- ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); -- zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); -- zc->maxNbLdmSequences = maxNbLdmSeq; -- -- ZSTD_window_init(&zc->ldmState.window); -- zc->ldmState.loadedDictEnd = 0; -- } - - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); -- assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); -+ assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace)); - - zc->initialized = 1; - -@@ -1980,7 +2254,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, - } - - params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, -- cdict->dictContentSize, ZSTD_cpm_attachDict); -+ cdict->dictContentSize, ZSTD_cpm_attachDict, -+ params.useRowMatchFinder); - params.cParams.windowLog = windowLog; - params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, -@@ -2019,6 +2294,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, - return 0; - } - -+static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize, -+ ZSTD_compressionParameters const* cParams) { -+ if (ZSTD_CDictIndicesAreTagged(cParams)){ -+ /* Remove tags from the CDict table if they are present. -+ * See docs on "short cache" in zstd_compress_internal.h for context. */ -+ size_t i; -+ for (i = 0; i < tableSize; i++) { -+ U32 const taggedIndex = src[i]; -+ U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS; -+ dst[i] = index; -+ } -+ } else { -+ ZSTD_memcpy(dst, src, tableSize * sizeof(U32)); -+ } -+} -+ - static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, -@@ -2054,21 +2345,23 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, - : 0; - size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - -- ZSTD_memcpy(cctx->blockState.matchState.hashTable, -- cdict->matchState.hashTable, -- hSize * sizeof(U32)); -+ ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable, -+ cdict->matchState.hashTable, -+ hSize, cdict_cParams); -+ - /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ - if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { -- ZSTD_memcpy(cctx->blockState.matchState.chainTable, -- cdict->matchState.chainTable, -- chainSize * sizeof(U32)); -+ ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable, -+ cdict->matchState.chainTable, -+ chainSize, cdict_cParams); - } - /* copy tag table */ - if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { -- size_t const tagTableSize = hSize*sizeof(U16); -+ size_t const tagTableSize = hSize; - ZSTD_memcpy(cctx->blockState.matchState.tagTable, -- cdict->matchState.tagTable, -- tagTableSize); -+ cdict->matchState.tagTable, -+ tagTableSize); -+ cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt; - } - } - -@@ -2147,6 +2440,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, - params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; - params.ldmParams = srcCCtx->appliedParams.ldmParams; - params.fParams = fParams; -+ params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize; - ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, - /* loadedDictSize */ 0, - ZSTDcrp_leaveDirty, zbuff); -@@ -2294,7 +2588,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par - - /* See doc/zstd_compression_format.md for detailed format description */ - --void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) -+int ZSTD_seqToCodes(const seqStore_t* seqStorePtr) - { - const seqDef* const sequences = seqStorePtr->sequencesStart; - BYTE* const llCodeTable = seqStorePtr->llCode; -@@ -2302,18 +2596,24 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) - BYTE* const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; -+ int longOffsets = 0; - assert(nbSeq <= seqStorePtr->maxNbSeq); - for (u=0; u= STREAM_ACCUMULATOR_MIN)); -+ if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN) -+ longOffsets = 1; - } - if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -+ return longOffsets; - } - - /* ZSTD_useTargetCBlockSize(): -@@ -2347,6 +2647,7 @@ typedef struct { - U32 MLtype; - size_t size; - size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ -+ int longOffsets; - } ZSTD_symbolEncodingTypeStats_t; - - /* ZSTD_buildSequencesStatistics(): -@@ -2357,11 +2658,13 @@ typedef struct { - * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) - */ - static ZSTD_symbolEncodingTypeStats_t --ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, -- const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, -- BYTE* dst, const BYTE* const dstEnd, -- ZSTD_strategy strategy, unsigned* countWorkspace, -- void* entropyWorkspace, size_t entropyWkspSize) { -+ZSTD_buildSequencesStatistics( -+ const seqStore_t* seqStorePtr, size_t nbSeq, -+ const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, -+ BYTE* dst, const BYTE* const dstEnd, -+ ZSTD_strategy strategy, unsigned* countWorkspace, -+ void* entropyWorkspace, size_t entropyWkspSize) -+{ - BYTE* const ostart = dst; - const BYTE* const oend = dstEnd; - BYTE* op = ostart; -@@ -2375,7 +2678,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, - - stats.lastCountSize = 0; - /* convert length/distances into codes */ -- ZSTD_seqToCodes(seqStorePtr); -+ stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); - assert(op <= oend); - assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ - /* build CTable for Literal Lengths */ -@@ -2480,22 +2783,22 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, - */ - #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 - MEM_STATIC size_t --ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- void* dst, size_t dstCapacity, -- void* entropyWorkspace, size_t entropyWkspSize, -- const int bmi2) -+ZSTD_entropyCompressSeqStore_internal( -+ const seqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ void* dst, size_t dstCapacity, -+ void* entropyWorkspace, size_t entropyWkspSize, -+ const int bmi2) - { -- const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned* count = (unsigned*)entropyWorkspace; - FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - const seqDef* const sequences = seqStorePtr->sequencesStart; -- const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; -+ const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; -@@ -2503,29 +2806,31 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t lastCountSize; -+ int longOffsets = 0; - - entropyWorkspace = count + (MaxSeq + 1); - entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); - -- DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); -+ DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity); - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); - - /* Compress literals */ - { const BYTE* const literals = seqStorePtr->litStart; -- size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; -- size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; -+ size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); -+ size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); - /* Base suspicion of uncompressibility on ratio of literals to sequences */ - unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); - size_t const litSize = (size_t)(seqStorePtr->lit - literals); -+ - size_t const cSize = ZSTD_compressLiterals( -- &prevEntropy->huf, &nextEntropy->huf, -- cctxParams->cParams.strategy, -- ZSTD_literalsCompressionIsDisabled(cctxParams), - op, dstCapacity, - literals, litSize, - entropyWorkspace, entropyWkspSize, -- bmi2, suspectUncompressible); -+ &prevEntropy->huf, &nextEntropy->huf, -+ cctxParams->cParams.strategy, -+ ZSTD_literalsCompressionIsDisabled(cctxParams), -+ suspectUncompressible, bmi2); - FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); - assert(cSize <= dstCapacity); - op += cSize; -@@ -2551,11 +2856,10 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, - ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return (size_t)(op - ostart); - } -- { -- ZSTD_symbolEncodingTypeStats_t stats; -- BYTE* seqHead = op++; -+ { BYTE* const seqHead = op++; - /* build stats for sequences */ -- stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, -+ const ZSTD_symbolEncodingTypeStats_t stats = -+ ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, - &prevEntropy->fse, &nextEntropy->fse, - op, oend, - strategy, count, -@@ -2564,6 +2868,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, - *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); - lastCountSize = stats.lastCountSize; - op += stats.size; -+ longOffsets = stats.longOffsets; - } - - { size_t const bitstreamSize = ZSTD_encodeSequences( -@@ -2598,14 +2903,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, - } - - MEM_STATIC size_t --ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- void* dst, size_t dstCapacity, -- size_t srcSize, -- void* entropyWorkspace, size_t entropyWkspSize, -- int bmi2) -+ZSTD_entropyCompressSeqStore( -+ const seqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ void* dst, size_t dstCapacity, -+ size_t srcSize, -+ void* entropyWorkspace, size_t entropyWkspSize, -+ int bmi2) - { - size_t const cSize = ZSTD_entropyCompressSeqStore_internal( - seqStorePtr, prevEntropy, nextEntropy, cctxParams, -@@ -2615,15 +2921,21 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, - /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. - * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. - */ -- if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) -+ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) { -+ DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity); - return 0; /* block not compressed */ -+ } - FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); - - /* Check compressibility */ - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); - if (cSize >= maxCSize) return 0; /* block not compressed */ - } -- DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); -+ DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); -+ /* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly. -+ * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above. -+ */ -+ assert(cSize < ZSTD_BLOCKSIZE_MAX); - return cSize; - } - -@@ -2635,40 +2947,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS - static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { - { ZSTD_compressBlock_fast /* default for 0 */, - ZSTD_compressBlock_fast, -- ZSTD_compressBlock_doubleFast, -- ZSTD_compressBlock_greedy, -- ZSTD_compressBlock_lazy, -- ZSTD_compressBlock_lazy2, -- ZSTD_compressBlock_btlazy2, -- ZSTD_compressBlock_btopt, -- ZSTD_compressBlock_btultra, -- ZSTD_compressBlock_btultra2 }, -+ ZSTD_COMPRESSBLOCK_DOUBLEFAST, -+ ZSTD_COMPRESSBLOCK_GREEDY, -+ ZSTD_COMPRESSBLOCK_LAZY, -+ ZSTD_COMPRESSBLOCK_LAZY2, -+ ZSTD_COMPRESSBLOCK_BTLAZY2, -+ ZSTD_COMPRESSBLOCK_BTOPT, -+ ZSTD_COMPRESSBLOCK_BTULTRA, -+ ZSTD_COMPRESSBLOCK_BTULTRA2 -+ }, - { ZSTD_compressBlock_fast_extDict /* default for 0 */, - ZSTD_compressBlock_fast_extDict, -- ZSTD_compressBlock_doubleFast_extDict, -- ZSTD_compressBlock_greedy_extDict, -- ZSTD_compressBlock_lazy_extDict, -- ZSTD_compressBlock_lazy2_extDict, -- ZSTD_compressBlock_btlazy2_extDict, -- ZSTD_compressBlock_btopt_extDict, -- ZSTD_compressBlock_btultra_extDict, -- ZSTD_compressBlock_btultra_extDict }, -+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT, -+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT, -+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT, -+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT, -+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT -+ }, - { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, - ZSTD_compressBlock_fast_dictMatchState, -- ZSTD_compressBlock_doubleFast_dictMatchState, -- ZSTD_compressBlock_greedy_dictMatchState, -- ZSTD_compressBlock_lazy_dictMatchState, -- ZSTD_compressBlock_lazy2_dictMatchState, -- ZSTD_compressBlock_btlazy2_dictMatchState, -- ZSTD_compressBlock_btopt_dictMatchState, -- ZSTD_compressBlock_btultra_dictMatchState, -- ZSTD_compressBlock_btultra_dictMatchState }, -+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE, -+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE -+ }, - { NULL /* default for 0 */, - NULL, - NULL, -- ZSTD_compressBlock_greedy_dedicatedDictSearch, -- ZSTD_compressBlock_lazy_dedicatedDictSearch, -- ZSTD_compressBlock_lazy2_dedicatedDictSearch, -+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH, -+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH, -+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH, - NULL, - NULL, - NULL, -@@ -2681,18 +2996,26 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS - DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); - if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { - static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { -- { ZSTD_compressBlock_greedy_row, -- ZSTD_compressBlock_lazy_row, -- ZSTD_compressBlock_lazy2_row }, -- { ZSTD_compressBlock_greedy_extDict_row, -- ZSTD_compressBlock_lazy_extDict_row, -- ZSTD_compressBlock_lazy2_extDict_row }, -- { ZSTD_compressBlock_greedy_dictMatchState_row, -- ZSTD_compressBlock_lazy_dictMatchState_row, -- ZSTD_compressBlock_lazy2_dictMatchState_row }, -- { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, -- ZSTD_compressBlock_lazy_dedicatedDictSearch_row, -- ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_ROW -+ }, -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW -+ }, -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW -+ }, -+ { -+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW, -+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW -+ } - }; - DEBUGLOG(4, "Selecting a row-based matchfinder"); - assert(useRowMatchFinder != ZSTD_ps_auto); -@@ -2718,6 +3041,72 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) - ssPtr->longLengthType = ZSTD_llt_none; - } - -+/* ZSTD_postProcessSequenceProducerResult() : -+ * Validates and post-processes sequences obtained through the external matchfinder API: -+ * - Checks whether nbExternalSeqs represents an error condition. -+ * - Appends a block delimiter to outSeqs if one is not already present. -+ * See zstd.h for context regarding block delimiters. -+ * Returns the number of sequences after post-processing, or an error code. */ -+static size_t ZSTD_postProcessSequenceProducerResult( -+ ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize -+) { -+ RETURN_ERROR_IF( -+ nbExternalSeqs > outSeqsCapacity, -+ sequenceProducer_failed, -+ "External sequence producer returned error code %lu", -+ (unsigned long)nbExternalSeqs -+ ); -+ -+ RETURN_ERROR_IF( -+ nbExternalSeqs == 0 && srcSize > 0, -+ sequenceProducer_failed, -+ "Got zero sequences from external sequence producer for a non-empty src buffer!" -+ ); -+ -+ if (srcSize == 0) { -+ ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); -+ return 1; -+ } -+ -+ { -+ ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; -+ -+ /* We can return early if lastSeq is already a block delimiter. */ -+ if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { -+ return nbExternalSeqs; -+ } -+ -+ /* This error condition is only possible if the external matchfinder -+ * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ -+ RETURN_ERROR_IF( -+ nbExternalSeqs == outSeqsCapacity, -+ sequenceProducer_failed, -+ "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" -+ ); -+ -+ /* lastSeq is not a block delimiter, so we need to append one. */ -+ ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); -+ return nbExternalSeqs + 1; -+ } -+} -+ -+/* ZSTD_fastSequenceLengthSum() : -+ * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*. -+ * Similar to another function in zstd_compress.c (determine_blockSize), -+ * except it doesn't check for a block delimiter to end summation. -+ * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P). -+ * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */ -+static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) { -+ size_t matchLenSum, litLenSum, i; -+ matchLenSum = 0; -+ litLenSum = 0; -+ for (i = 0; i < seqBufSize; i++) { -+ litLenSum += seqBuf[i].litLength; -+ matchLenSum += seqBuf[i].matchLength; -+ } -+ return litLenSum + matchLenSum; -+} -+ - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; - - static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) -@@ -2727,7 +3116,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ - ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); -- if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { -+ /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding -+ * additional 1. We need to revisit and change this logic to be more consistent */ -+ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { - if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { - ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); - } else { -@@ -2763,6 +3154,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - } - if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); -+ -+ /* External matchfinder + LDM is technically possible, just not implemented yet. -+ * We need to revisit soon and implement it. */ -+ RETURN_ERROR_IF( -+ ZSTD_hasExtSeqProd(&zc->appliedParams), -+ parameter_combination_unsupported, -+ "Long-distance matching with external sequence producer enabled is not currently supported." -+ ); -+ - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&zc->externSeqStore, -@@ -2774,6 +3174,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { - rawSeqStore_t ldmSeqStore = kNullRawSeqStore; - -+ /* External matchfinder + LDM is technically possible, just not implemented yet. -+ * We need to revisit soon and implement it. */ -+ RETURN_ERROR_IF( -+ ZSTD_hasExtSeqProd(&zc->appliedParams), -+ parameter_combination_unsupported, -+ "Long-distance matching with external sequence producer enabled is not currently supported." -+ ); -+ - ldmSeqStore.seq = zc->ldmSequences; - ldmSeqStore.capacity = zc->maxNbLdmSequences; - /* Updates ldmSeqStore.size */ -@@ -2788,10 +3196,74 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - zc->appliedParams.useRowMatchFinder, - src, srcSize); - assert(ldmSeqStore.pos == ldmSeqStore.size); -- } else { /* not long range mode */ -- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, -- zc->appliedParams.useRowMatchFinder, -- dictMode); -+ } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) { -+ assert( -+ zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize) -+ ); -+ assert(zc->appliedParams.extSeqProdFunc != NULL); -+ -+ { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; -+ -+ size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)( -+ zc->appliedParams.extSeqProdState, -+ zc->extSeqBuf, -+ zc->extSeqBufCapacity, -+ src, srcSize, -+ NULL, 0, /* dict and dictSize, currently not supported */ -+ zc->appliedParams.compressionLevel, -+ windowSize -+ ); -+ -+ size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult( -+ zc->extSeqBuf, -+ nbExternalSeqs, -+ zc->extSeqBufCapacity, -+ srcSize -+ ); -+ -+ /* Return early if there is no error, since we don't need to worry about last literals */ -+ if (!ZSTD_isError(nbPostProcessedSeqs)) { -+ ZSTD_sequencePosition seqPos = {0,0,0}; -+ size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs); -+ RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!"); -+ FORWARD_IF_ERROR( -+ ZSTD_copySequencesToSeqStoreExplicitBlockDelim( -+ zc, &seqPos, -+ zc->extSeqBuf, nbPostProcessedSeqs, -+ src, srcSize, -+ zc->appliedParams.searchForExternalRepcodes -+ ), -+ "Failed to copy external sequences to seqStore!" -+ ); -+ ms->ldmSeqStore = NULL; -+ DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs); -+ return ZSTDbss_compress; -+ } -+ -+ /* Propagate the error if fallback is disabled */ -+ if (!zc->appliedParams.enableMatchFinderFallback) { -+ return nbPostProcessedSeqs; -+ } -+ -+ /* Fallback to software matchfinder */ -+ { ZSTD_blockCompressor const blockCompressor = -+ ZSTD_selectBlockCompressor( -+ zc->appliedParams.cParams.strategy, -+ zc->appliedParams.useRowMatchFinder, -+ dictMode); -+ ms->ldmSeqStore = NULL; -+ DEBUGLOG( -+ 5, -+ "External sequence producer returned error code %lu. Falling back to internal parser.", -+ (unsigned long)nbExternalSeqs -+ ); -+ lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); -+ } } -+ } else { /* not long range mode and no external matchfinder */ -+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor( -+ zc->appliedParams.cParams.strategy, -+ zc->appliedParams.useRowMatchFinder, -+ dictMode); - ms->ldmSeqStore = NULL; - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); - } -@@ -2801,29 +3273,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) - return ZSTDbss_compress; - } - --static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) -+static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM]) - { -- const seqStore_t* seqStore = ZSTD_getSeqStore(zc); -- const seqDef* seqStoreSeqs = seqStore->sequencesStart; -- size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; -- size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); -- size_t literalsRead = 0; -- size_t lastLLSize; -+ const seqDef* inSeqs = seqStore->sequencesStart; -+ const size_t nbInSequences = seqStore->sequences - inSeqs; -+ const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart); - -- ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; -+ ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex; -+ const size_t nbOutSequences = nbInSequences + 1; -+ size_t nbOutLiterals = 0; -+ repcodes_t repcodes; - size_t i; -- repcodes_t updatedRepcodes; - -- assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); -- /* Ensure we have enough space for last literals "sequence" */ -- assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); -- ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); -- for (i = 0; i < seqStoreSeqSize; ++i) { -- U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; -- outSeqs[i].litLength = seqStoreSeqs[i].litLength; -- outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; -+ /* Bounds check that we have enough space for every input sequence -+ * and the block delimiter -+ */ -+ assert(seqCollector->seqIndex <= seqCollector->maxSequences); -+ RETURN_ERROR_IF( -+ nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex), -+ dstSize_tooSmall, -+ "Not enough space to copy sequences"); -+ -+ ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes)); -+ for (i = 0; i < nbInSequences; ++i) { -+ U32 rawOffset; -+ outSeqs[i].litLength = inSeqs[i].litLength; -+ outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH; - outSeqs[i].rep = 0; - -+ /* Handle the possible single length >= 64K -+ * There can only be one because we add MINMATCH to every match length, -+ * and blocks are at most 128K. -+ */ - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthType == ZSTD_llt_literalLength) { - outSeqs[i].litLength += 0x10000; -@@ -2832,37 +3313,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) - } - } - -- if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { -- /* Derive the correct offset corresponding to a repcode */ -- outSeqs[i].rep = seqStoreSeqs[i].offBase; -+ /* Determine the raw offset given the offBase, which may be a repcode. */ -+ if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) { -+ const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase); -+ assert(repcode > 0); -+ outSeqs[i].rep = repcode; - if (outSeqs[i].litLength != 0) { -- rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; -+ rawOffset = repcodes.rep[repcode - 1]; - } else { -- if (outSeqs[i].rep == 3) { -- rawOffset = updatedRepcodes.rep[0] - 1; -+ if (repcode == 3) { -+ assert(repcodes.rep[0] > 1); -+ rawOffset = repcodes.rep[0] - 1; - } else { -- rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; -+ rawOffset = repcodes.rep[repcode]; - } - } -+ } else { -+ rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase); - } - outSeqs[i].offset = rawOffset; -- /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode -- so we provide seqStoreSeqs[i].offset - 1 */ -- ZSTD_updateRep(updatedRepcodes.rep, -- seqStoreSeqs[i].offBase - 1, -- seqStoreSeqs[i].litLength == 0); -- literalsRead += outSeqs[i].litLength; -+ -+ /* Update repcode history for the sequence */ -+ ZSTD_updateRep(repcodes.rep, -+ inSeqs[i].offBase, -+ inSeqs[i].litLength == 0); -+ -+ nbOutLiterals += outSeqs[i].litLength; - } - /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. - * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker - * for the block boundary, according to the API. - */ -- assert(seqStoreLiteralsSize >= literalsRead); -- lastLLSize = seqStoreLiteralsSize - literalsRead; -- outSeqs[i].litLength = (U32)lastLLSize; -- outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; -- seqStoreSeqSize++; -- zc->seqCollector.seqIndex += seqStoreSeqSize; -+ assert(nbInLiterals >= nbOutLiterals); -+ { -+ const size_t lastLLSize = nbInLiterals - nbOutLiterals; -+ outSeqs[nbInSequences].litLength = (U32)lastLLSize; -+ outSeqs[nbInSequences].matchLength = 0; -+ outSeqs[nbInSequences].offset = 0; -+ assert(nbOutSequences == nbInSequences + 1); -+ } -+ seqCollector->seqIndex += nbOutSequences; -+ assert(seqCollector->seqIndex <= seqCollector->maxSequences); -+ -+ return 0; -+} -+ -+size_t ZSTD_sequenceBound(size_t srcSize) { -+ const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1; -+ const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1; -+ return maxNbSeq + maxNbDelims; - } - - size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, -@@ -2871,6 +3370,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - const size_t dstCapacity = ZSTD_compressBound(srcSize); - void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); - SeqCollector seqCollector; -+ { -+ int targetCBlockSize; -+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), ""); -+ RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0"); -+ } -+ { -+ int nbWorkers; -+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), ""); -+ RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0"); -+ } - - RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); - -@@ -2880,8 +3389,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; - -- ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); -- ZSTD_customFree(dst, ZSTD_defaultCMem); -+ { -+ const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); -+ ZSTD_customFree(dst, ZSTD_defaultCMem); -+ FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed"); -+ } -+ assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize)); - return zc->seqCollector.seqIndex; - } - -@@ -2910,19 +3423,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { - const size_t unrollMask = unrollSize - 1; - const size_t prefixLength = length & unrollMask; - size_t i; -- size_t u; - if (length == 1) return 1; - /* Check if prefix is RLE first before using unrolled loop */ - if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { - return 0; - } - for (i = prefixLength; i != length; i += unrollSize) { -+ size_t u; - for (u = 0; u < unrollSize; u += sizeof(size_t)) { - if (MEM_readST(ip + i + u) != valueST) { - return 0; -- } -- } -- } -+ } } } - return 1; - } - -@@ -2938,7 +3449,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore) - return nbSeqs < 4 && nbLits < 10; - } - --static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) -+static void -+ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) - { - ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; - bs->prevCBlock = bs->nextCBlock; -@@ -2946,7 +3458,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c - } - - /* Writes the block header */ --static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { -+static void -+writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) -+{ - U32 const cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); -@@ -2959,13 +3473,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * Requires ENTROPY_WORKSPACE_SIZE workspace -- * @return : size of huffman description table or error code */ --static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, -- const ZSTD_hufCTables_t* prevHuf, -- ZSTD_hufCTables_t* nextHuf, -- ZSTD_hufCTablesMetadata_t* hufMetadata, -- const int literalsCompressionIsDisabled, -- void* workspace, size_t wkspSize) -+ * @return : size of huffman description table, or an error code -+ */ -+static size_t -+ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, -+ const ZSTD_hufCTables_t* prevHuf, -+ ZSTD_hufCTables_t* nextHuf, -+ ZSTD_hufCTablesMetadata_t* hufMetadata, -+ const int literalsCompressionIsDisabled, -+ void* workspace, size_t wkspSize, -+ int hufFlags) - { - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; -@@ -2973,9 +3490,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; -- const size_t nodeWkspSize = wkspEnd-nodeWksp; -+ const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp); - unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; -- unsigned huffLog = HUF_TABLELOG_DEFAULT; -+ unsigned huffLog = LitHufLog; - HUF_repeat repeat = prevHuf->repeatMode; - DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); - -@@ -2990,73 +3507,77 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi - - /* small ? don't even attempt compression (speed opt) */ - #ifndef COMPRESS_LITERALS_SIZE_MIN --#define COMPRESS_LITERALS_SIZE_MIN 63 -+# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */ - #endif - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; -- } -- } -+ } } - - /* Scan input and build symbol stats */ -- { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); -+ { size_t const largest = -+ HIST_count_wksp (countWksp, &maxSymbolValue, -+ (const BYTE*)src, srcSize, -+ workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { -+ /* only one literal symbol */ - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { -+ /* heuristic: likely not compressible */ - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; -- } -- } -+ } } - - /* Validate the previous Huffman table */ -- if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { -+ if (repeat == HUF_repeat_check -+ && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); -- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); -+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags); -+ assert(huffLog <= LitHufLog); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; -- { /* Build and write the CTable */ -- size_t const newCSize = HUF_estimateCompressedSize( -- (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); -- size_t const hSize = HUF_writeCTable_wksp( -- hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), -- (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, -- nodeWksp, nodeWkspSize); -- /* Check against repeating the previous CTable */ -- if (repeat != HUF_repeat_none) { -- size_t const oldCSize = HUF_estimateCompressedSize( -- (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); -- if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { -- DEBUGLOG(5, "set_repeat - smaller"); -- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- hufMetadata->hType = set_repeat; -- return 0; -- } -- } -- if (newCSize + hSize >= srcSize) { -- DEBUGLOG(5, "set_basic - no gains"); -+ } -+ { /* Build and write the CTable */ -+ size_t const newCSize = HUF_estimateCompressedSize( -+ (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); -+ size_t const hSize = HUF_writeCTable_wksp( -+ hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), -+ (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, -+ nodeWksp, nodeWkspSize); -+ /* Check against repeating the previous CTable */ -+ if (repeat != HUF_repeat_none) { -+ size_t const oldCSize = HUF_estimateCompressedSize( -+ (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); -+ if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { -+ DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- hufMetadata->hType = set_basic; -+ hufMetadata->hType = set_repeat; - return 0; -- } -- DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); -- hufMetadata->hType = set_compressed; -- nextHuf->repeatMode = HUF_repeat_check; -- return hSize; -+ } } -+ if (newCSize + hSize >= srcSize) { -+ DEBUGLOG(5, "set_basic - no gains"); -+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -+ hufMetadata->hType = set_basic; -+ return 0; - } -+ DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); -+ hufMetadata->hType = set_compressed; -+ nextHuf->repeatMode = HUF_repeat_check; -+ return hSize; - } - } - -@@ -3066,8 +3587,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi - * and updates nextEntropy to the appropriate repeatMode. - */ - static ZSTD_symbolEncodingTypeStats_t --ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { -- ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; -+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) -+{ -+ ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; - nextEntropy->litlength_repeatMode = FSE_repeat_none; - nextEntropy->offcode_repeatMode = FSE_repeat_none; - nextEntropy->matchlength_repeatMode = FSE_repeat_none; -@@ -3078,16 +3600,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { - * Builds entropy for the sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * Requires ENTROPY_WORKSPACE_SIZE wksp. -- * @return : size of fse tables or error code */ --static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, -- const ZSTD_fseCTables_t* prevEntropy, -- ZSTD_fseCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- ZSTD_fseCTablesMetadata_t* fseMetadata, -- void* workspace, size_t wkspSize) -+ * @return : size of fse tables or error code */ -+static size_t -+ZSTD_buildBlockEntropyStats_sequences( -+ const seqStore_t* seqStorePtr, -+ const ZSTD_fseCTables_t* prevEntropy, -+ ZSTD_fseCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ ZSTD_fseCTablesMetadata_t* fseMetadata, -+ void* workspace, size_t wkspSize) - { - ZSTD_strategy const strategy = cctxParams->cParams.strategy; -- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; -+ size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; -@@ -3114,23 +3638,28 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, - /* ZSTD_buildBlockEntropyStats() : - * Builds entropy for the block. - * Requires workspace size ENTROPY_WORKSPACE_SIZE -- * -- * @return : 0 on success or error code -+ * @return : 0 on success, or an error code -+ * Note : also employed in superblock - */ --size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- void* workspace, size_t wkspSize) --{ -- size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; -+size_t ZSTD_buildBlockEntropyStats( -+ const seqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ ZSTD_entropyCTablesMetadata_t* entropyMetadata, -+ void* workspace, size_t wkspSize) -+{ -+ size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); -+ int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD); -+ int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0; -+ - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_literalsCompressionIsDisabled(cctxParams), -- workspace, wkspSize); -+ workspace, wkspSize, hufFlags); -+ - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, -@@ -3143,11 +3672,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, - } - - /* Returns the size estimate for the literals section (header + content) of a block */ --static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, -- const ZSTD_hufCTables_t* huf, -- const ZSTD_hufCTablesMetadata_t* hufMetadata, -- void* workspace, size_t wkspSize, -- int writeEntropy) -+static size_t -+ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, -+ const ZSTD_hufCTables_t* huf, -+ const ZSTD_hufCTablesMetadata_t* hufMetadata, -+ void* workspace, size_t wkspSize, -+ int writeEntropy) - { - unsigned* const countWksp = (unsigned*)workspace; - unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; -@@ -3169,12 +3699,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz - } - - /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ --static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, -- const BYTE* codeTable, size_t nbSeq, unsigned maxCode, -- const FSE_CTable* fseCTable, -- const U8* additionalBits, -- short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, -- void* workspace, size_t wkspSize) -+static size_t -+ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, -+ const BYTE* codeTable, size_t nbSeq, unsigned maxCode, -+ const FSE_CTable* fseCTable, -+ const U8* additionalBits, -+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, -+ void* workspace, size_t wkspSize) - { - unsigned* const countWksp = (unsigned*)workspace; - const BYTE* ctp = codeTable; -@@ -3206,99 +3737,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, - } - - /* Returns the size estimate for the sequences section (header + content) of a block */ --static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, -- const BYTE* llCodeTable, -- const BYTE* mlCodeTable, -- size_t nbSeq, -- const ZSTD_fseCTables_t* fseTables, -- const ZSTD_fseCTablesMetadata_t* fseMetadata, -- void* workspace, size_t wkspSize, -- int writeEntropy) -+static size_t -+ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, -+ const BYTE* llCodeTable, -+ const BYTE* mlCodeTable, -+ size_t nbSeq, -+ const ZSTD_fseCTables_t* fseTables, -+ const ZSTD_fseCTablesMetadata_t* fseMetadata, -+ void* workspace, size_t wkspSize, -+ int writeEntropy) - { - size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); - size_t cSeqSizeEstimate = 0; - cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, -- fseTables->offcodeCTable, NULL, -- OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, -- workspace, wkspSize); -+ fseTables->offcodeCTable, NULL, -+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, -+ workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, -- fseTables->litlengthCTable, LL_bits, -- LL_defaultNorm, LL_defaultNormLog, MaxLL, -- workspace, wkspSize); -+ fseTables->litlengthCTable, LL_bits, -+ LL_defaultNorm, LL_defaultNormLog, MaxLL, -+ workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, -- fseTables->matchlengthCTable, ML_bits, -- ML_defaultNorm, ML_defaultNormLog, MaxML, -- workspace, wkspSize); -+ fseTables->matchlengthCTable, ML_bits, -+ ML_defaultNorm, ML_defaultNormLog, MaxML, -+ workspace, wkspSize); - if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - return cSeqSizeEstimate + sequencesSectionHeaderSize; - } - - /* Returns the size estimate for a given stream of literals, of, ll, ml */ --static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, -- const BYTE* ofCodeTable, -- const BYTE* llCodeTable, -- const BYTE* mlCodeTable, -- size_t nbSeq, -- const ZSTD_entropyCTables_t* entropy, -- const ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- void* workspace, size_t wkspSize, -- int writeLitEntropy, int writeSeqEntropy) { -+static size_t -+ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, -+ const BYTE* ofCodeTable, -+ const BYTE* llCodeTable, -+ const BYTE* mlCodeTable, -+ size_t nbSeq, -+ const ZSTD_entropyCTables_t* entropy, -+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata, -+ void* workspace, size_t wkspSize, -+ int writeLitEntropy, int writeSeqEntropy) -+{ - size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, -- &entropy->huf, &entropyMetadata->hufMetadata, -- workspace, wkspSize, writeLitEntropy); -+ &entropy->huf, &entropyMetadata->hufMetadata, -+ workspace, wkspSize, writeLitEntropy); - size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, -- nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, -- workspace, wkspSize, writeSeqEntropy); -+ nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, -+ workspace, wkspSize, writeSeqEntropy); - return seqSize + literalsSize + ZSTD_blockHeaderSize; - } - - /* Builds entropy statistics and uses them for blocksize estimation. - * -- * Returns the estimated compressed size of the seqStore, or a zstd error. -+ * @return: estimated compressed size of the seqStore, or a zstd error. - */ --static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { -- ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; -+static size_t -+ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) -+{ -+ ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata; - DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); - FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - entropyMetadata, -- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); -- return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), -+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), ""); -+ return ZSTD_estimateBlockSize( -+ seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), - seqStore->ofCode, seqStore->llCode, seqStore->mlCode, - (size_t)(seqStore->sequences - seqStore->sequencesStart), -- &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, -+ &zc->blockState.nextCBlock->entropy, -+ entropyMetadata, -+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, - (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); - } - - /* Returns literals bytes represented in a seqStore */ --static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { -+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) -+{ - size_t literalsBytes = 0; -- size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; -+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t i; - for (i = 0; i < nbSeqs; ++i) { -- seqDef seq = seqStore->sequencesStart[i]; -+ seqDef const seq = seqStore->sequencesStart[i]; - literalsBytes += seq.litLength; - if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { - literalsBytes += 0x10000; -- } -- } -+ } } - return literalsBytes; - } - - /* Returns match bytes represented in a seqStore */ --static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { -+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) -+{ - size_t matchBytes = 0; -- size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; -+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t i; - for (i = 0; i < nbSeqs; ++i) { - seqDef seq = seqStore->sequencesStart[i]; - matchBytes += seq.mlBase + MINMATCH; - if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { - matchBytes += 0x10000; -- } -- } -+ } } - return matchBytes; - } - -@@ -3307,15 +3846,12 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { - */ - static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, - const seqStore_t* originalSeqStore, -- size_t startIdx, size_t endIdx) { -- BYTE* const litEnd = originalSeqStore->lit; -- size_t literalsBytes; -- size_t literalsBytesPreceding = 0; -- -+ size_t startIdx, size_t endIdx) -+{ - *resultSeqStore = *originalSeqStore; - if (startIdx > 0) { - resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; -- literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); -+ resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); - } - - /* Move longLengthPos into the correct position if necessary */ -@@ -3328,13 +3864,12 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, - } - resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; - resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; -- literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); -- resultSeqStore->litStart += literalsBytesPreceding; - if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { - /* This accounts for possible last literals if the derived chunk reaches the end of the block */ -- resultSeqStore->lit = litEnd; -+ assert(resultSeqStore->lit == originalSeqStore->lit); - } else { -- resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; -+ size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); -+ resultSeqStore->lit = resultSeqStore->litStart + literalsBytes; - } - resultSeqStore->llCode += startIdx; - resultSeqStore->mlCode += startIdx; -@@ -3342,20 +3877,26 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, - } - - /* -- * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. -- * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). -+ * Returns the raw offset represented by the combination of offBase, ll0, and repcode history. -+ * offBase must represent a repcode in the numeric representation of ZSTD_storeSeq(). - */ - static U32 --ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) --{ -- U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ -- assert(STORED_IS_REPCODE(offCode)); -- if (adjustedOffCode == ZSTD_REP_NUM) { -- /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ -- assert(rep[0] > 0); -+ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0) -+{ -+ U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */ -+ assert(OFFBASE_IS_REPCODE(offBase)); -+ if (adjustedRepCode == ZSTD_REP_NUM) { -+ assert(ll0); -+ /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 -+ * This is only valid if it results in a valid offset value, aka > 0. -+ * Note : it may happen that `rep[0]==1` in exceptional circumstances. -+ * In which case this function will return 0, which is an invalid offset. -+ * It's not an issue though, since this value will be -+ * compared and discarded within ZSTD_seqStore_resolveOffCodes(). -+ */ - return rep[0] - 1; - } -- return rep[adjustedOffCode]; -+ return rep[adjustedRepCode]; - } - - /* -@@ -3371,30 +3912,33 @@ ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, c - * 1-3 : repcode 1-3 - * 4+ : real_offset+3 - */ --static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, -- seqStore_t* const seqStore, U32 const nbSeq) { -+static void -+ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, -+ const seqStore_t* const seqStore, U32 const nbSeq) -+{ - U32 idx = 0; -+ U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq; - for (; idx < nbSeq; ++idx) { - seqDef* const seq = seqStore->sequencesStart + idx; -- U32 const ll0 = (seq->litLength == 0); -- U32 const offCode = OFFBASE_TO_STORED(seq->offBase); -- assert(seq->offBase > 0); -- if (STORED_IS_REPCODE(offCode)) { -- U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); -- U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); -+ U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx); -+ U32 const offBase = seq->offBase; -+ assert(offBase > 0); -+ if (OFFBASE_IS_REPCODE(offBase)) { -+ U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0); -+ U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0); - /* Adjust simulated decompression repcode history if we come across a mismatch. Replace - * the repcode with the offset it actually references, determined by the compression - * repcode history. - */ - if (dRawOffset != cRawOffset) { -- seq->offBase = cRawOffset + ZSTD_REP_NUM; -+ seq->offBase = OFFSET_TO_OFFBASE(cRawOffset); - } - } - /* Compression repcode history is always updated with values directly from the unmodified seqStore. - * Decompression repcode history may use modified seq->offset value taken from compression repcode history. - */ -- ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); -- ZSTD_updateRep(cRepcodes->rep, offCode, ll0); -+ ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0); -+ ZSTD_updateRep(cRepcodes->rep, offBase, ll0); - } - } - -@@ -3404,10 +3948,11 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ - * Returns the total size of that block (including header) or a ZSTD error code. - */ - static size_t --ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, -+ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, -+ const seqStore_t* const seqStore, - repcodes_t* const dRep, repcodes_t* const cRep, - void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -+ const void* src, size_t srcSize, - U32 lastBlock, U32 isPartition) - { - const U32 rleMaxLength = 25; -@@ -3442,8 +3987,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, - cSeqsSize = 1; - } - -+ /* Sequence collection not supported when block splitting */ - if (zc->seqCollector.collectSequences) { -- ZSTD_copyBlockSequences(zc); -+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed"); - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); - return 0; - } -@@ -3481,45 +4027,49 @@ typedef struct { - - /* Helper function to perform the recursive search for block splits. - * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. -- * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then -- * we do not recurse. -+ * If advantageous to split, then we recurse down the two sub-blocks. -+ * If not, or if an error occurred in estimation, then we do not recurse. - * -- * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. -+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, -+ * defined by MIN_SEQUENCES_BLOCK_SPLITTING. - * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). - * In practice, recursion depth usually doesn't go beyond 4. - * -- * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize -+ * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. -+ * At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize - * maximum of 128 KB, this value is actually impossible to reach. - */ - static void - ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, - ZSTD_CCtx* zc, const seqStore_t* origSeqStore) - { -- seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; -- seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; -- seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; -+ seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; -+ seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; -+ seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; - size_t estimatedOriginalSize; - size_t estimatedFirstHalfSize; - size_t estimatedSecondHalfSize; - size_t midIdx = (startIdx + endIdx)/2; - -+ DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); -+ assert(endIdx >= startIdx); - if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { -- DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); -+ DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx); - return; - } -- DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); - ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); - ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); - ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); - estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); - estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); - estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); -- DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", -+ DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", - estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); - if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { - return; - } - if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { -+ DEBUGLOG(5, "split decided at seqNb:%zu", midIdx); - ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); - splits->splitLocations[splits->idx] = (U32)midIdx; - splits->idx++; -@@ -3527,14 +4077,18 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t end - } - } - --/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. -+/* Base recursive function. -+ * Populates a table with intra-block partition indices that can improve compression ratio. - * -- * Returns the number of splits made (which equals the size of the partition table - 1). -+ * @return: number of splits made (which equals the size of the partition table - 1). - */ --static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { -- seqStoreSplits splits = {partitions, 0}; -+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) -+{ -+ seqStoreSplits splits; -+ splits.splitLocations = partitions; -+ splits.idx = 0; - if (nbSeq <= 4) { -- DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); -+ DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq); - /* Refuse to try and split anything with less than 4 sequences */ - return 0; - } -@@ -3550,18 +4104,20 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) - * Returns combined size of all blocks (which includes headers), or a ZSTD error code. - */ - static size_t --ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, -- const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) -+ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t blockSize, -+ U32 lastBlock, U32 nbSeq) - { - size_t cSize = 0; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - size_t i = 0; - size_t srcBytesTotal = 0; -- U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ -- seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; -- seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; -- size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); -+ U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ -+ seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore; -+ seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore; -+ size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); - - /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history - * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two -@@ -3583,30 +4139,31 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac - ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); - ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); - -- DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", -+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, - (unsigned)zc->blockState.matchState.nextToUpdate); - - if (numSplits == 0) { -- size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, -- &dRep, &cRep, -- op, dstCapacity, -- ip, blockSize, -- lastBlock, 0 /* isPartition */); -+ size_t cSizeSingleBlock = -+ ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, -+ &dRep, &cRep, -+ op, dstCapacity, -+ ip, blockSize, -+ lastBlock, 0 /* isPartition */); - FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); - DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); -- assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); -+ assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX); -+ assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize); - return cSizeSingleBlock; - } - - ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); - for (i = 0; i <= numSplits; ++i) { -- size_t srcBytes; - size_t cSizeChunk; - U32 const lastPartition = (i == numSplits); - U32 lastBlockEntireSrc = 0; - -- srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); -+ size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); - srcBytesTotal += srcBytes; - if (lastPartition) { - /* This is the final partition, need to account for possible last literals */ -@@ -3621,7 +4178,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac - op, dstCapacity, - ip, srcBytes, - lastBlockEntireSrc, 1 /* isPartition */); -- DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); -+ DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size", -+ ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); - FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); - - ip += srcBytes; -@@ -3629,10 +4187,10 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac - dstCapacity -= cSizeChunk; - cSize += cSizeChunk; - *currSeqStore = *nextSeqStore; -- assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); -+ assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize); - } -- /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes -- * for the next block. -+ /* cRep and dRep may have diverged during the compression. -+ * If so, we use the dRep repcodes for the next block. - */ - ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); - return cSize; -@@ -3643,8 +4201,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 lastBlock) - { -- const BYTE* ip = (const BYTE*)src; -- BYTE* op = (BYTE*)dst; - U32 nbSeq; - size_t cSize; - DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); -@@ -3655,7 +4211,8 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, - if (bss == ZSTDbss_noCompress) { - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; -- cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); -+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); -+ cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); - return cSize; -@@ -3673,9 +4230,9 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) - { -- /* This the upper bound for the length of an rle block. -- * This isn't the actual upper bound. Finding the real threshold -- * needs further investigation. -+ /* This is an estimated upper bound for the length of an rle block. -+ * This isn't the actual upper bound. -+ * Finding the real threshold needs further investigation. - */ - const U32 rleMaxLength = 25; - size_t cSize; -@@ -3687,11 +4244,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - - { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); -- if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } -+ if (bss == ZSTDbss_noCompress) { -+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); -+ cSize = 0; -+ goto out; -+ } - } - - if (zc->seqCollector.collectSequences) { -- ZSTD_copyBlockSequences(zc); -+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed"); - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); - return 0; - } -@@ -3767,10 +4328,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, - * * cSize >= blockBound(srcSize): We have expanded the block too much so - * emit an uncompressed block. - */ -- { -- size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); -+ { size_t const cSize = -+ ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); - if (cSize != ERROR(dstSize_tooSmall)) { -- size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); -+ size_t const maxCSize = -+ srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); - if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { - ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); -@@ -3778,7 +4340,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, - } - } - } -- } -+ } /* if (bss == ZSTDbss_compress)*/ - - DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); - /* Superblock compression failed, attempt to emit a single no compress block. -@@ -3836,7 +4398,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, - * All blocks will be terminated, all input will be consumed. - * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. - * Frame is supposed already started (header already produced) --* @return : compressed size, or an error code -+* @return : compressed size, or an error code - */ - static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, -@@ -3860,7 +4422,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - -- RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, -+ /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding -+ * additional 1. We need to revisit and change this logic to be more consistent */ -+ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1, - dstSize_tooSmall, - "not enough space to store compressed block"); - if (remaining < blockSize) blockSize = remaining; -@@ -3899,7 +4463,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTD_blockHeaderSize; - } -- } -+ } /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/ - - - ip += blockSize; -@@ -4001,19 +4565,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) - } - } - --size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) -+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) - { -- RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, -- "wrong cctx stage"); -- RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, -- parameter_unsupported, -- "incompatible with ldm"); -+ assert(cctx->stage == ZSTDcs_init); -+ assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable); - cctx->externSeqStore.seq = seq; - cctx->externSeqStore.size = nbSeq; - cctx->externSeqStore.capacity = nbSeq; - cctx->externSeqStore.pos = 0; - cctx->externSeqStore.posInSequence = 0; -- return 0; - } - - -@@ -4078,31 +4638,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, - } - } - --size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize) -+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) - { - DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); - } - -+/* NOTE: Must just wrap ZSTD_compressContinue_public() */ -+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) -+{ -+ return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize); -+} - --size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -+static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx) - { - ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; - assert(!ZSTD_checkCParams(cParams)); -- return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); -+ return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog); - } - --size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -+/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */ -+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -+{ -+ return ZSTD_getBlockSize_deprecated(cctx); -+} -+ -+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ -+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) - { - DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); -- { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); -+ { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } - - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); - } - -+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ -+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -+{ -+ return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize); -+} -+ - /*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ -@@ -4111,25 +4691,36 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* src, size_t srcSize, -- ZSTD_dictTableLoadMethod_e dtlm) -+ ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp) - { - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; - -- /* Assert that we the ms params match the params we're being given */ -+ /* Assert that the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); - -- if (srcSize > ZSTD_CHUNKSIZE_MAX) { -+ { /* Ensure large dictionaries can't cause index overflow */ -+ - /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. - * Dictionaries right at the edge will immediately trigger overflow - * correction, but I don't want to insert extra constraints here. - */ -- U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; -- /* We must have cleared our windows when our source is this large. */ -- assert(ZSTD_window_isEmpty(ms->window)); -- if (loadLdmDict) -- assert(ZSTD_window_isEmpty(ls->window)); -+ U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX; -+ -+ int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams); -+ if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) { -+ /* Some dictionary matchfinders in zstd use "short cache", -+ * which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each -+ * CDict hashtable entry as a tag rather than as part of an index. -+ * When short cache is used, we need to truncate the dictionary -+ * so that its indices don't overlap with the tag. */ -+ U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX; -+ maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize); -+ assert(!loadLdmDict); -+ } -+ - /* If the dictionary is too large, only load the suffix of the dictionary. */ - if (srcSize > maxDictSize) { - ip = iend - maxDictSize; -@@ -4138,35 +4729,58 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - } - } - -- DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); -+ if (srcSize > ZSTD_CHUNKSIZE_MAX) { -+ /* We must have cleared our windows when our source is this large. */ -+ assert(ZSTD_window_isEmpty(ms->window)); -+ if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window)); -+ } - ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); -- ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); -- ms->forceNonContiguous = params->deterministicRefPrefix; - -- if (loadLdmDict) { -+ DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); -+ -+ if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */ - ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); - ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); -+ ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); - } - -+ /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */ -+ if (params->cParams.strategy < ZSTD_btultra) { -+ U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28); -+ if (srcSize > maxDictSize) { -+ ip = iend - maxDictSize; -+ src = ip; -+ srcSize = maxDictSize; -+ } -+ } -+ -+ ms->nextToUpdate = (U32)(ip - ms->window.base); -+ ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); -+ ms->forceNonContiguous = params->deterministicRefPrefix; -+ - if (srcSize <= HASH_READ_SIZE) return 0; - - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); - -- if (loadLdmDict) -- ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); -- - switch(params->cParams.strategy) - { - case ZSTD_fast: -- ZSTD_fillHashTable(ms, iend, dtlm); -+ ZSTD_fillHashTable(ms, iend, dtlm, tfp); - break; - case ZSTD_dfast: -- ZSTD_fillDoubleHashTable(ms, iend, dtlm); -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) - assert(srcSize >= HASH_READ_SIZE); - if (ms->dedicatedDictSearch) { - assert(ms->chainTable != NULL); -@@ -4174,7 +4788,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - } else { - assert(params->useRowMatchFinder != ZSTD_ps_auto); - if (params->useRowMatchFinder == ZSTD_ps_enable) { -- size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); -+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog); - ZSTD_memset(ms->tagTable, 0, tagTableSize); - ZSTD_row_update(ms, iend-HASH_READ_SIZE); - DEBUGLOG(4, "Using row-based hash table for lazy dict"); -@@ -4183,14 +4797,23 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - DEBUGLOG(4, "Using chain-based hash table for lazy dict"); - } - } -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: -+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) - assert(srcSize >= HASH_READ_SIZE); - ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - default: -@@ -4237,11 +4860,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - - /* We only set the loaded table as valid if it contains all non-zero - * weights. Otherwise, we set it to check */ -- if (!hasZeroWeights) -+ if (!hasZeroWeights && maxSymbolValue == 255) - bs->entropy.huf.repeatMode = HUF_repeat_valid; - - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); -- RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); - dictPtr += hufHeaderSize; - } - -@@ -4327,6 +4949,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp, - void* workspace) - { - const BYTE* dictPtr = (const BYTE*)dict; -@@ -4345,7 +4968,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - { - size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( -- ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); -+ ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), ""); - } - return dictID; - } -@@ -4361,6 +4984,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp, - void* workspace) - { - DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); -@@ -4373,13 +4997,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - - /* dict restricted modes */ - if (dictContentType == ZSTD_dct_rawContent) -- return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); -+ return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp); - - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_auto) { - DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent( -- ms, ls, ws, params, dict, dictSize, dtlm); -+ ms, ls, ws, params, dict, dictSize, dtlm, tfp); - } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - assert(0); /* impossible */ -@@ -4387,13 +5011,14 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - - /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary( -- bs, ms, ws, params, dict, dictSize, dtlm, workspace); -+ bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace); - } - - #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) - #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) - - /*! ZSTD_compressBegin_internal() : -+ * Assumption : either @dict OR @cdict (or none) is non-NULL, never both - * @return : 0, or an error code */ - static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, -@@ -4426,11 +5051,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, - cdict->dictContentSize, cdict->dictContentType, dtlm, -- cctx->entropyWorkspace) -+ ZSTD_tfp_forCCtx, cctx->entropyWorkspace) - : ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, -- dictContentType, dtlm, cctx->entropyWorkspace); -+ dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= UINT_MAX); - cctx->dictID = (U32)dictID; -@@ -4471,11 +5096,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, - &cctxParams, pledgedSrcSize); - } - --size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -+static size_t -+ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) - { - ZSTD_CCtx_params cctxParams; -- { -- ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); -+ { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); - ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); - } - DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); -@@ -4483,9 +5108,15 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di - &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); - } - -+size_t -+ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -+{ -+ return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel); -+} -+ - size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) - { -- return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); -+ return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel); - } - - -@@ -4496,14 +5127,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) - { - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; -- size_t fhSize = 0; - - DEBUGLOG(4, "ZSTD_writeEpilogue"); - RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); - - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { -- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); -+ size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - dstCapacity -= fhSize; - op += fhSize; -@@ -4513,8 +5143,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; -- RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); -- MEM_writeLE32(op, cBlockHeader24); -+ ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3); -+ RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue"); -+ MEM_writeLE24(op, cBlockHeader24); - op += ZSTD_blockHeaderSize; - dstCapacity -= ZSTD_blockHeaderSize; - } -@@ -4537,9 +5168,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) - (void)extraCSize; - } - --size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize) -+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) - { - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, -@@ -4563,6 +5194,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, - return cSize + endResult; - } - -+/* NOTE: Must just wrap ZSTD_compressEnd_public() */ -+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) -+{ -+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); -+} -+ - size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, -@@ -4591,7 +5230,7 @@ size_t ZSTD_compress_advanced_internal( - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) , ""); -- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); - } - - size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, -@@ -4709,7 +5348,7 @@ static size_t ZSTD_initCDict_internal( - { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, - ¶ms, cdict->dictContent, cdict->dictContentSize, -- dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); -+ dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= (size_t)(U32)-1); - cdict->dictID = (U32)dictID; -@@ -4811,7 +5450,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2( - cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, - customMem); - -- if (ZSTD_isError( ZSTD_initCDict_internal(cdict, -+ if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, - dictLoadMethod, dictContentType, - cctxParams) )) { -@@ -4906,6 +5545,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict( - params.cParams = cParams; - params.useRowMatchFinder = useRowMatchFinder; - cdict->useRowMatchFinder = useRowMatchFinder; -+ cdict->compressionLevel = ZSTD_NO_CLEVEL; - - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, -@@ -4985,12 +5625,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced( - - /* ZSTD_compressBegin_usingCDict() : - * cdict must be != NULL */ --size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) - { - ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); - } - -+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -+{ -+ return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict); -+} -+ - /*! ZSTD_compress_usingCDict_internal(): - * Implementation of various ZSTD_compress_usingCDict* functions. - */ -@@ -5000,7 +5645,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) - { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ -- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); - } - - /*! ZSTD_compress_usingCDict_advanced(): -@@ -5197,30 +5842,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) - - static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) - { -- size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; -- if (hintInSize==0) hintInSize = cctx->blockSize; -- return hintInSize; -+ if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { -+ return cctx->blockSize - cctx->stableIn_notConsumed; -+ } -+ assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered); -+ { size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; -+ if (hintInSize==0) hintInSize = cctx->blockSize; -+ return hintInSize; -+ } - } - - /* ZSTD_compressStream_generic(): - * internal function for all *compressStream*() variants -- * non-static, because can be called from zstdmt_compress.c -- * @return : hint size for next input */ -+ * @return : hint size for next input to complete ongoing block */ - static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode) - { -- const char* const istart = (const char*)input->src; -- const char* const iend = input->size != 0 ? istart + input->size : istart; -- const char* ip = input->pos != 0 ? istart + input->pos : istart; -- char* const ostart = (char*)output->dst; -- char* const oend = output->size != 0 ? ostart + output->size : ostart; -- char* op = output->pos != 0 ? ostart + output->pos : ostart; -+ const char* const istart = (assert(input != NULL), (const char*)input->src); -+ const char* const iend = (istart != NULL) ? istart + input->size : istart; -+ const char* ip = (istart != NULL) ? istart + input->pos : istart; -+ char* const ostart = (assert(output != NULL), (char*)output->dst); -+ char* const oend = (ostart != NULL) ? ostart + output->size : ostart; -+ char* op = (ostart != NULL) ? ostart + output->pos : ostart; - U32 someMoreWork = 1; - - /* check expectations */ -- DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); -+ DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos); -+ assert(zcs != NULL); -+ if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) { -+ assert(input->pos >= zcs->stableIn_notConsumed); -+ input->pos -= zcs->stableIn_notConsumed; -+ if (ip) ip -= zcs->stableIn_notConsumed; -+ zcs->stableIn_notConsumed = 0; -+ } - if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { - assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize > 0); -@@ -5229,8 +5885,10 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - assert(zcs->outBuff != NULL); - assert(zcs->outBuffSize > 0); - } -- assert(output->pos <= output->size); -+ if (input->src == NULL) assert(input->size == 0); - assert(input->pos <= input->size); -+ if (output->dst == NULL) assert(output->size == 0); -+ assert(output->pos <= output->size); - assert((U32)flushMode <= (U32)ZSTD_e_end); - - while (someMoreWork) { -@@ -5245,7 +5903,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ - && (zcs->inBuffPos == 0) ) { - /* shortcut to compression pass directly into output buffer */ -- size_t const cSize = ZSTD_compressEnd(zcs, -+ size_t const cSize = ZSTD_compressEnd_public(zcs, - op, oend-op, ip, iend-ip); - DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); -@@ -5262,8 +5920,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - zcs->inBuff + zcs->inBuffPos, toLoad, - ip, iend-ip); - zcs->inBuffPos += loaded; -- if (loaded != 0) -- ip += loaded; -+ if (ip) ip += loaded; - if ( (flushMode == ZSTD_e_continue) - && (zcs->inBuffPos < zcs->inBuffTarget) ) { - /* not enough input to fill full block : stop here */ -@@ -5274,6 +5931,20 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - /* empty */ - someMoreWork = 0; break; - } -+ } else { -+ assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable); -+ if ( (flushMode == ZSTD_e_continue) -+ && ( (size_t)(iend - ip) < zcs->blockSize) ) { -+ /* can't compress a full block : stop here */ -+ zcs->stableIn_notConsumed = (size_t)(iend - ip); -+ ip = iend; /* pretend to have consumed input */ -+ someMoreWork = 0; break; -+ } -+ if ( (flushMode == ZSTD_e_flush) -+ && (ip == iend) ) { -+ /* empty */ -+ someMoreWork = 0; break; -+ } - } - /* compress current block (note : this stage cannot be stopped in the middle) */ - DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); -@@ -5281,9 +5952,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - void* cDst; - size_t cSize; - size_t oSize = oend-op; -- size_t const iSize = inputBuffered -- ? zcs->inBuffPos - zcs->inToCompress -- : MIN((size_t)(iend - ip), zcs->blockSize); -+ size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress -+ : MIN((size_t)(iend - ip), zcs->blockSize); - if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) - cDst = op; /* compress into output buffer, to skip flush stage */ - else -@@ -5291,9 +5961,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - if (inputBuffered) { - unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); - cSize = lastBlock ? -- ZSTD_compressEnd(zcs, cDst, oSize, -+ ZSTD_compressEnd_public(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize) : -- ZSTD_compressContinue(zcs, cDst, oSize, -+ ZSTD_compressContinue_public(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; -@@ -5306,19 +5976,16 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - if (!lastBlock) - assert(zcs->inBuffTarget <= zcs->inBuffSize); - zcs->inToCompress = zcs->inBuffPos; -- } else { -- unsigned const lastBlock = (ip + iSize == iend); -- assert(flushMode == ZSTD_e_end /* Already validated */); -+ } else { /* !inputBuffered, hence ZSTD_bm_stable */ -+ unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend); - cSize = lastBlock ? -- ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : -- ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); -+ ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) : -+ ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize); - /* Consume the input prior to error checking to mirror buffered mode. */ -- if (iSize > 0) -- ip += iSize; -+ if (ip) ip += iSize; - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; -- if (lastBlock) -- assert(ip == iend); -+ if (lastBlock) assert(ip == iend); - } - if (cDst == op) { /* no need to flush */ - op += cSize; -@@ -5388,8 +6055,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf - /* After a compression call set the expected input/output buffer. - * This is validated at the start of the next compression call. - */ --static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) -+static void -+ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input) - { -+ DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)"); - if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { - cctx->expectedInBuffer = *input; - } -@@ -5408,22 +6077,22 @@ static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, - { - if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { - ZSTD_inBuffer const expect = cctx->expectedInBuffer; -- if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) -- RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); -- if (endOp != ZSTD_e_end) -- RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); -+ if (expect.src != input->src || expect.pos != input->pos) -+ RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!"); - } -+ (void)endOp; - if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { - size_t const outBufferSize = output->size - output->pos; - if (cctx->expectedOutBufferSize != outBufferSize) -- RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); -+ RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!"); - } - return 0; - } - - static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, - ZSTD_EndDirective endOp, -- size_t inSize) { -+ size_t inSize) -+{ - ZSTD_CCtx_params params = cctx->requestedParams; - ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ -@@ -5437,9 +6106,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, - params.compressionLevel = cctx->cdict->compressionLevel; - } - DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); -- if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ -- { -- size_t const dictSize = prefixDict.dict -+ if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */ -+ -+ { size_t const dictSize = prefixDict.dict - ? prefixDict.dictSize - : (cctx->cdict ? cctx->cdict->dictContentSize : 0); - ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); -@@ -5451,6 +6120,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, - params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); - params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); - params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); -+ params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences); -+ params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize); -+ params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel); - - { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); -@@ -5477,6 +6149,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, - return 0; - } - -+/* @return provides a minimum amount of data remaining to be flushed from internal buffers -+ */ - size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, -@@ -5491,8 +6165,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - - /* transparent initialization stage */ - if (cctx->streamStage == zcss_init) { -- FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); -- ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ -+ size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */ -+ size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed; -+ if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */ -+ && (endOp == ZSTD_e_continue) /* no flush requested, more input to come */ -+ && (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */ -+ if (cctx->stableIn_notConsumed) { /* not the first time */ -+ /* check stable source guarantees */ -+ RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer"); -+ RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos"); -+ } -+ /* pretend input was consumed, to give a sense forward progress */ -+ input->pos = input->size; -+ /* save stable inBuffer, for later control, and flush/end */ -+ cctx->expectedInBuffer = *input; -+ /* but actually input wasn't consumed, so keep track of position from where compression shall resume */ -+ cctx->stableIn_notConsumed += inputSize; -+ /* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */ -+ return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */ -+ } -+ FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed"); -+ ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ - } - /* end of transparent initialization stage */ - -@@ -5510,13 +6203,20 @@ size_t ZSTD_compressStream2_simpleArgs ( - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp) - { -- ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; -- ZSTD_inBuffer input = { src, srcSize, *srcPos }; -+ ZSTD_outBuffer output; -+ ZSTD_inBuffer input; -+ output.dst = dst; -+ output.size = dstCapacity; -+ output.pos = *dstPos; -+ input.src = src; -+ input.size = srcSize; -+ input.pos = *srcPos; - /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ -- size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); -- *dstPos = output.pos; -- *srcPos = input.pos; -- return cErr; -+ { size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); -+ *dstPos = output.pos; -+ *srcPos = input.pos; -+ return cErr; -+ } - } - - size_t ZSTD_compress2(ZSTD_CCtx* cctx, -@@ -5539,6 +6239,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, - /* Reset to the original values. */ - cctx->requestedParams.inBufferMode = originalInBufferMode; - cctx->requestedParams.outBufferMode = originalOutBufferMode; -+ - FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); - if (result != 0) { /* compression not completed, due to lack of output space */ - assert(oPos == dstCapacity); -@@ -5549,64 +6250,61 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, - } - } - --typedef struct { -- U32 idx; /* Index in array of ZSTD_Sequence */ -- U32 posInSequence; /* Position within sequence at idx */ -- size_t posInSrc; /* Number of bytes given by sequences provided so far */ --} ZSTD_sequencePosition; -- - /* ZSTD_validateSequence() : - * @offCode : is presumed to follow format required by ZSTD_storeSeq() - * @returns a ZSTD error code if sequence is not valid - */ - static size_t --ZSTD_validateSequence(U32 offCode, U32 matchLength, -- size_t posInSrc, U32 windowLog, size_t dictSize) -+ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch, -+ size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer) - { -- U32 const windowSize = 1 << windowLog; -+ U32 const windowSize = 1u << windowLog; - /* posInSrc represents the amount of data the decoder would decode up to this point. - * As long as the amount of data decoded is less than or equal to window size, offsets may be - * larger than the total length of output decoded in order to reference the dict, even larger than - * window size. After output surpasses windowSize, we're limited to windowSize offsets again. - */ - size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; -- RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); -- RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); -+ size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4; -+ RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!"); -+ /* Validate maxNbSeq is large enough for the given matchLength and minMatch */ -+ RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch"); - return 0; - } - - /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ --static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) -+static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) - { -- U32 offCode = STORE_OFFSET(rawOffset); -+ U32 offBase = OFFSET_TO_OFFBASE(rawOffset); - - if (!ll0 && rawOffset == rep[0]) { -- offCode = STORE_REPCODE_1; -+ offBase = REPCODE1_TO_OFFBASE; - } else if (rawOffset == rep[1]) { -- offCode = STORE_REPCODE(2 - ll0); -+ offBase = REPCODE_TO_OFFBASE(2 - ll0); - } else if (rawOffset == rep[2]) { -- offCode = STORE_REPCODE(3 - ll0); -+ offBase = REPCODE_TO_OFFBASE(3 - ll0); - } else if (ll0 && rawOffset == rep[0] - 1) { -- offCode = STORE_REPCODE_3; -+ offBase = REPCODE3_TO_OFFBASE; - } -- return offCode; -+ return offBase; - } - --/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of -- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. -- */ --static size_t -+size_t - ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, - ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -- const void* src, size_t blockSize) -+ const void* src, size_t blockSize, -+ ZSTD_paramSwitch_e externalRepSearch) - { - U32 idx = seqPos->idx; -+ U32 const startIdx = idx; - BYTE const* ip = (BYTE const*)(src); - const BYTE* const iend = ip + blockSize; - repcodes_t updatedRepcodes; - U32 dictSize; - -+ DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize); -+ - if (cctx->cdict) { - dictSize = (U32)cctx->cdict->dictContentSize; - } else if (cctx->prefixDict.dict) { -@@ -5615,25 +6313,55 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, - dictSize = 0; - } - ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); -- for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { -+ for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) { - U32 const litLength = inSeqs[idx].litLength; -- U32 const ll0 = (litLength == 0); - U32 const matchLength = inSeqs[idx].matchLength; -- U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); -- ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); -+ U32 offBase; -+ -+ if (externalRepSearch == ZSTD_ps_disable) { -+ offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset); -+ } else { -+ U32 const ll0 = (litLength == 0); -+ offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0); -+ ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); -+ } - -- DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); -+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); - if (cctx->appliedParams.validateSequences) { - seqPos->posInSrc += litLength + matchLength; -- FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, -- cctx->appliedParams.cParams.windowLog, dictSize), -+ FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, -+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); - } -- RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, -+ RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, - "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); -- ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); -+ ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); - ip += matchLength + litLength; - } -+ -+ /* If we skipped repcode search while parsing, we need to update repcodes now */ -+ assert(externalRepSearch != ZSTD_ps_auto); -+ assert(idx >= startIdx); -+ if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) { -+ U32* const rep = updatedRepcodes.rep; -+ U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */ -+ -+ if (lastSeqIdx >= startIdx + 2) { -+ rep[2] = inSeqs[lastSeqIdx - 2].offset; -+ rep[1] = inSeqs[lastSeqIdx - 1].offset; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } else if (lastSeqIdx == startIdx + 1) { -+ rep[2] = rep[0]; -+ rep[1] = inSeqs[lastSeqIdx - 1].offset; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } else { -+ assert(lastSeqIdx == startIdx); -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = inSeqs[lastSeqIdx].offset; -+ } -+ } -+ - ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); - - if (inSeqs[idx].litLength) { -@@ -5642,26 +6370,15 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, - ip += inSeqs[idx].litLength; - seqPos->posInSrc += inSeqs[idx].litLength; - } -- RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); -+ RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!"); - seqPos->idx = idx+1; - return 0; - } - --/* Returns the number of bytes to move the current read position back by. Only non-zero -- * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something -- * went wrong. -- * -- * This function will attempt to scan through blockSize bytes represented by the sequences -- * in inSeqs, storing any (partial) sequences. -- * -- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to -- * avoid splitting a match, or to avoid splitting a match such that it would produce a match -- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. -- */ --static size_t -+size_t - ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -- const void* src, size_t blockSize) -+ const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch) - { - U32 idx = seqPos->idx; - U32 startPosInSequence = seqPos->posInSequence; -@@ -5673,6 +6390,9 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* - U32 bytesAdjustment = 0; - U32 finalMatchSplit = 0; - -+ /* TODO(embg) support fast parsing mode in noBlockDelim mode */ -+ (void)externalRepSearch; -+ - if (cctx->cdict) { - dictSize = cctx->cdict->dictContentSize; - } else if (cctx->prefixDict.dict) { -@@ -5680,7 +6400,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* - } else { - dictSize = 0; - } -- DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); -+ DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); - DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); - ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); - while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { -@@ -5688,7 +6408,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* - U32 litLength = currSeq.litLength; - U32 matchLength = currSeq.matchLength; - U32 const rawOffset = currSeq.offset; -- U32 offCode; -+ U32 offBase; - - /* Modify the sequence depending on where endPosInSequence lies */ - if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { -@@ -5702,7 +6422,6 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* - /* Move to the next sequence */ - endPosInSequence -= currSeq.litLength + currSeq.matchLength; - startPosInSequence = 0; -- idx++; - } else { - /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence - does not reach the end of the match. So, we have to split the sequence */ -@@ -5742,21 +6461,23 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* - } - /* Check if this offset can be represented with a repcode */ - { U32 const ll0 = (litLength == 0); -- offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); -- ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); -+ offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0); -+ ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); - } - - if (cctx->appliedParams.validateSequences) { - seqPos->posInSrc += litLength + matchLength; -- FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, -- cctx->appliedParams.cParams.windowLog, dictSize), -+ FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, -+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); - } -- DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); -- RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, -+ DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); -+ RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, - "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); -- ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); -+ ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); - ip += matchLength + litLength; -+ if (!finalMatchSplit) -+ idx++; /* Next Sequence */ - } - DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); - assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); -@@ -5779,7 +6500,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* - - typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -- const void* src, size_t blockSize); -+ const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); - static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) - { - ZSTD_sequenceCopier sequenceCopier = NULL; -@@ -5793,6 +6514,57 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) - return sequenceCopier; - } - -+/* Discover the size of next block by searching for the delimiter. -+ * Note that a block delimiter **must** exist in this mode, -+ * otherwise it's an input error. -+ * The block size retrieved will be later compared to ensure it remains within bounds */ -+static size_t -+blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) -+{ -+ int end = 0; -+ size_t blockSize = 0; -+ size_t spos = seqPos.idx; -+ DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize); -+ assert(spos <= inSeqsSize); -+ while (spos < inSeqsSize) { -+ end = (inSeqs[spos].offset == 0); -+ blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength; -+ if (end) { -+ if (inSeqs[spos].matchLength != 0) -+ RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0"); -+ break; -+ } -+ spos++; -+ } -+ if (!end) -+ RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter"); -+ return blockSize; -+} -+ -+/* More a "target" block size */ -+static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining) -+{ -+ int const lastBlock = (remaining <= blockSize); -+ return lastBlock ? remaining : blockSize; -+} -+ -+static size_t determine_blockSize(ZSTD_sequenceFormat_e mode, -+ size_t blockSize, size_t remaining, -+ const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) -+{ -+ DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining); -+ if (mode == ZSTD_sf_noBlockDelimiters) -+ return blockSize_noDelimiter(blockSize, remaining); -+ { size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos); -+ FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters"); -+ if (explicitBlockSize > blockSize) -+ RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block"); -+ if (explicitBlockSize > remaining) -+ RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source"); -+ return explicitBlockSize; -+ } -+} -+ - /* Compress, block-by-block, all of the sequences given. - * - * Returns the cumulative size of all compressed blocks (including their headers), -@@ -5805,9 +6577,6 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - const void* src, size_t srcSize) - { - size_t cSize = 0; -- U32 lastBlock; -- size_t blockSize; -- size_t compressedSeqsSize; - size_t remaining = srcSize; - ZSTD_sequencePosition seqPos = {0, 0, 0}; - -@@ -5827,22 +6596,29 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - } - - while (remaining) { -+ size_t compressedSeqsSize; - size_t cBlockSize; - size_t additionalByteAdjustment; -- lastBlock = remaining <= cctx->blockSize; -- blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; -+ size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, -+ cctx->blockSize, remaining, -+ inSeqs, inSeqsSize, seqPos); -+ U32 const lastBlock = (blockSize == remaining); -+ FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); -+ assert(blockSize <= remaining); - ZSTD_resetSeqStore(&cctx->seqStore); -- DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); -+ DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize); - -- additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); -+ additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes); - FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); - blockSize -= additionalByteAdjustment; - - /* If blocks are too small, emit as a nocompress block */ -- if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { -+ /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding -+ * additional 1. We need to revisit and change this logic to be more consistent */ -+ if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { - cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); -- DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); -+ DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); - cSize += cBlockSize; - ip += blockSize; - op += cBlockSize; -@@ -5851,6 +6627,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - continue; - } - -+ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); - compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, - &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, - &cctx->appliedParams, -@@ -5859,11 +6636,11 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, - cctx->bmi2); - FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); -- DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); -+ DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); - - if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore) && -- ZSTD_isRLE((BYTE const*)src, srcSize)) { -+ ZSTD_isRLE(ip, blockSize)) { - /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 -@@ -5874,12 +6651,12 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - if (compressedSeqsSize == 0) { - /* ZSTD_noCompressBlock writes the block header as well */ - cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); -- FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); -- DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); -+ FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed"); -+ DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize); - } else if (compressedSeqsSize == 1) { - cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); -- FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); -- DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); -+ FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed"); -+ DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize); - } else { - U32 cBlockHeader; - /* Error checking and repcodes update */ -@@ -5891,11 +6668,10 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); - MEM_writeLE24(op, cBlockHeader); - cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; -- DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); -+ DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize); - } - - cSize += cBlockSize; -- DEBUGLOG(4, "cSize running total: %zu", cSize); - - if (lastBlock) { - break; -@@ -5906,12 +6682,15 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - dstCapacity -= cBlockSize; - cctx->isFirstBlock = 0; - } -+ DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); - } - -+ DEBUGLOG(4, "cSize final total: %zu", cSize); - return cSize; - } - --size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, -+size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) - { -@@ -5921,7 +6700,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci - size_t frameHeaderSize = 0; - - /* Transparent initialization stage, same as compressStream2() */ -- DEBUGLOG(3, "ZSTD_compressSequences()"); -+ DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity); - assert(cctx != NULL); - FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); - /* Begin writing output, starting with frame header */ -@@ -5949,26 +6728,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci - cSize += 4; - } - -- DEBUGLOG(3, "Final compressed size: %zu", cSize); -+ DEBUGLOG(4, "Final compressed size: %zu", cSize); - return cSize; - } - - /*====== Finalize ======*/ - -+static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs) -+{ -+ const ZSTD_inBuffer nullInput = { NULL, 0, 0 }; -+ const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable); -+ return stableInput ? zcs->expectedInBuffer : nullInput; -+} -+ - /*! ZSTD_flushStream() : - * @return : amount of data remaining to flush */ - size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) - { -- ZSTD_inBuffer input = { NULL, 0, 0 }; -+ ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); -+ input.size = input.pos; /* do not ingest more input during flush */ - return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); - } - - - size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) - { -- ZSTD_inBuffer input = { NULL, 0, 0 }; -+ ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); - size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); -- FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); -+ FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed"); - if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ - /* single thread mode : attempt to calculate remaining to flush more precisely */ - { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; -@@ -6090,7 +6877,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, - cp.targetLength = (unsigned)(-clampedCompressionLevel); - } - /* refine parameters based on srcSize & dictSize */ -- return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); -+ return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto); - } - } - -@@ -6125,3 +6912,29 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); - } -+ -+void ZSTD_registerSequenceProducer( -+ ZSTD_CCtx* zc, -+ void* extSeqProdState, -+ ZSTD_sequenceProducer_F extSeqProdFunc -+) { -+ assert(zc != NULL); -+ ZSTD_CCtxParams_registerSequenceProducer( -+ &zc->requestedParams, extSeqProdState, extSeqProdFunc -+ ); -+} -+ -+void ZSTD_CCtxParams_registerSequenceProducer( -+ ZSTD_CCtx_params* params, -+ void* extSeqProdState, -+ ZSTD_sequenceProducer_F extSeqProdFunc -+) { -+ assert(params != NULL); -+ if (extSeqProdFunc != NULL) { -+ params->extSeqProdFunc = extSeqProdFunc; -+ params->extSeqProdState = extSeqProdState; -+ } else { -+ params->extSeqProdFunc = NULL; -+ params->extSeqProdState = NULL; -+ } -+} -diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h -index 71697a11ae30..53cb582a8d2b 100644 ---- a/lib/zstd/compress/zstd_compress_internal.h -+++ b/lib/zstd/compress/zstd_compress_internal.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -20,6 +21,7 @@ - ***************************************/ - #include "../common/zstd_internal.h" - #include "zstd_cwksp.h" -+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ - - - /*-************************************* -@@ -32,7 +34,7 @@ - It's not a big deal though : candidate will just be sorted again. - Additionally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. -- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. -+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. - This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ - - -@@ -111,12 +113,13 @@ typedef struct { - /* ZSTD_buildBlockEntropyStats() : - * Builds entropy for the block. - * @return : 0 on success or error code */ --size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, -- const ZSTD_entropyCTables_t* prevEntropy, -- ZSTD_entropyCTables_t* nextEntropy, -- const ZSTD_CCtx_params* cctxParams, -- ZSTD_entropyCTablesMetadata_t* entropyMetadata, -- void* workspace, size_t wkspSize); -+size_t ZSTD_buildBlockEntropyStats( -+ const seqStore_t* seqStorePtr, -+ const ZSTD_entropyCTables_t* prevEntropy, -+ ZSTD_entropyCTables_t* nextEntropy, -+ const ZSTD_CCtx_params* cctxParams, -+ ZSTD_entropyCTablesMetadata_t* entropyMetadata, -+ void* workspace, size_t wkspSize); - - /* ******************************* - * Compression internals structs * -@@ -142,26 +145,33 @@ typedef struct { - size_t capacity; /* The capacity starting from `seq` pointer */ - } rawSeqStore_t; - -+typedef struct { -+ U32 idx; /* Index in array of ZSTD_Sequence */ -+ U32 posInSequence; /* Position within sequence at idx */ -+ size_t posInSrc; /* Number of bytes given by sequences provided so far */ -+} ZSTD_sequencePosition; -+ - UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; - - typedef struct { -- int price; -- U32 off; -- U32 mlen; -- U32 litlen; -- U32 rep[ZSTD_REP_NUM]; -+ int price; /* price from beginning of segment to this position */ -+ U32 off; /* offset of previous match */ -+ U32 mlen; /* length of previous match */ -+ U32 litlen; /* nb of literals since previous match */ -+ U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ - } ZSTD_optimal_t; - - typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; - -+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) - typedef struct { - /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ - unsigned* litFreq; /* table of literals statistics, of size 256 */ - unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ - unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ - unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ -- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ -- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ -+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ -+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ - - U32 litSum; /* nb of literals */ - U32 litLengthSum; /* nb of litLength codes */ -@@ -212,8 +222,10 @@ struct ZSTD_matchState_t { - U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ - - U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ -- U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ -+ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ - U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ -+ U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ -+ U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ - - U32* hashTable; - U32* hashTable3; -@@ -228,6 +240,18 @@ struct ZSTD_matchState_t { - const ZSTD_matchState_t* dictMatchState; - ZSTD_compressionParameters cParams; - const rawSeqStore_t* ldmSeqStore; -+ -+ /* Controls prefetching in some dictMatchState matchfinders. -+ * This behavior is controlled from the cctx ms. -+ * This parameter has no effect in the cdict ms. */ -+ int prefetchCDictTables; -+ -+ /* When == 0, lazy match finders insert every position. -+ * When != 0, lazy match finders only insert positions they search. -+ * This allows them to skip much faster over incompressible data, -+ * at a small cost to compression ratio. -+ */ -+ int lazySkipping; - }; - - typedef struct { -@@ -324,6 +348,25 @@ struct ZSTD_CCtx_params_s { - - /* Internal use, for createCCtxParams() and freeCCtxParams() only */ - ZSTD_customMem customMem; -+ -+ /* Controls prefetching in some dictMatchState matchfinders */ -+ ZSTD_paramSwitch_e prefetchCDictTables; -+ -+ /* Controls whether zstd will fall back to an internal matchfinder -+ * if the external matchfinder returns an error code. */ -+ int enableMatchFinderFallback; -+ -+ /* Parameters for the external sequence producer API. -+ * Users set these parameters through ZSTD_registerSequenceProducer(). -+ * It is not possible to set these parameters individually through the public API. */ -+ void* extSeqProdState; -+ ZSTD_sequenceProducer_F extSeqProdFunc; -+ -+ /* Adjust the max block size*/ -+ size_t maxBlockSize; -+ -+ /* Controls repcode search in external sequence parsing */ -+ ZSTD_paramSwitch_e searchForExternalRepcodes; - }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ - - #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) -@@ -404,6 +447,7 @@ struct ZSTD_CCtx_s { - - /* Stable in/out buffer verification */ - ZSTD_inBuffer expectedInBuffer; -+ size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ - size_t expectedOutBufferSize; - - /* Dictionary */ -@@ -417,9 +461,14 @@ struct ZSTD_CCtx_s { - - /* Workspace for block splitter */ - ZSTD_blockSplitCtx blockSplitCtx; -+ -+ /* Buffer for output from external sequence producer */ -+ ZSTD_Sequence* extSeqBuf; -+ size_t extSeqBufCapacity; - }; - - typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; -+typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; - - typedef enum { - ZSTD_noDict = 0, -@@ -441,7 +490,7 @@ typedef enum { - * In this mode we take both the source size and the dictionary size - * into account when selecting and adjusting the parameters. - */ -- ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. -+ ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. - * We don't know what these parameters are for. We default to the legacy - * behavior of taking both the source size and the dict size into account - * when selecting and adjusting parameters. -@@ -500,9 +549,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) - /* ZSTD_noCompressBlock() : - * Writes uncompressed block to dst buffer from given src. - * Returns the size of the block */ --MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -+MEM_STATIC size_t -+ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) - { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); -+ DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); - RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall, "dst buf too small for uncompressed block"); - MEM_writeLE24(dst, cBlockHeader24); -@@ -510,7 +561,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi - return ZSTD_blockHeaderSize + srcSize; - } - --MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) -+MEM_STATIC size_t -+ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) - { - BYTE* const op = (BYTE*)dst; - U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); -@@ -529,7 +581,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) - { - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); -- assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); -+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); - return (srcSize >> minlog) + 2; - } - -@@ -565,29 +617,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con - while (ip < iend) *op++ = *ip++; - } - --#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) --#define STORE_REPCODE_1 STORE_REPCODE(1) --#define STORE_REPCODE_2 STORE_REPCODE(2) --#define STORE_REPCODE_3 STORE_REPCODE(3) --#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) --#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) --#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) --#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) --#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) --#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ --#define STORED_TO_OFFBASE(o) ((o)+1) --#define OFFBASE_TO_STORED(o) ((o)-1) -+ -+#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) -+#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) -+#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) -+#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ -+#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) -+#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) -+#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) -+#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) -+#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ - - /*! ZSTD_storeSeq() : -- * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. -- * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). -+ * Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t. -+ * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). - * @matchLength : must be >= MINMATCH -- * Allowed to overread literals up to litLimit. -+ * Allowed to over-read literals up to litLimit. - */ - HINT_INLINE UNUSED_ATTR void - ZSTD_storeSeq(seqStore_t* seqStorePtr, - size_t litLength, const BYTE* literals, const BYTE* litLimit, -- U32 offBase_minus1, -+ U32 offBase, - size_t matchLength) - { - BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; -@@ -596,8 +646,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - static const BYTE* g_start = NULL; - if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ - { U32 const pos = (U32)((const BYTE*)literals - g_start); -- DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", -- pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); -+ DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", -+ pos, (U32)litLength, (U32)matchLength, (U32)offBase); - } - #endif - assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); -@@ -607,9 +657,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - assert(literals + litLength <= litLimit); - if (litEnd <= litLimit_w) { - /* Common case we can use wildcopy. -- * First copy 16 bytes, because literals are likely short. -- */ -- assert(WILDCOPY_OVERLENGTH >= 16); -+ * First copy 16 bytes, because literals are likely short. -+ */ -+ ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(seqStorePtr->lit, literals); - if (litLength > 16) { - ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); -@@ -628,7 +678,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ -- seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); -+ seqStorePtr->sequences[0].offBase = offBase; - - /* match Length */ - assert(matchLength >= MINMATCH); -@@ -646,17 +696,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, - - /* ZSTD_updateRep() : - * updates in-place @rep (array of repeat offsets) -- * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() -+ * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() - */ - MEM_STATIC void --ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) -+ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) - { -- if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ -+ if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ - rep[2] = rep[1]; - rep[1] = rep[0]; -- rep[0] = STORED_OFFSET(offBase_minus1); -+ rep[0] = OFFBASE_TO_OFFSET(offBase); - } else { /* repcode */ -- U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; -+ U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - rep[2] = (repCode >= 2) ? rep[1] : rep[2]; -@@ -673,11 +723,11 @@ typedef struct repcodes_s { - } repcodes_t; - - MEM_STATIC repcodes_t --ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) -+ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) - { - repcodes_t newReps; - ZSTD_memcpy(&newReps, rep, sizeof(newReps)); -- ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); -+ ZSTD_updateRep(newReps.rep, offBase, ll0); - return newReps; - } - -@@ -685,59 +735,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0 - /*-************************************* - * Match length counter - ***************************************/ --static unsigned ZSTD_NbCommonBytes (size_t val) --{ -- if (MEM_isLittleEndian()) { -- if (MEM_64bits()) { --# if (__GNUC__ >= 4) -- return (__builtin_ctzll((U64)val) >> 3); --# else -- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, -- 0, 3, 1, 3, 1, 4, 2, 7, -- 0, 2, 3, 6, 1, 5, 3, 5, -- 1, 3, 4, 4, 2, 5, 6, 7, -- 7, 0, 1, 2, 3, 3, 4, 6, -- 2, 6, 5, 5, 3, 4, 5, 6, -- 7, 1, 2, 4, 6, 4, 4, 5, -- 7, 2, 6, 5, 7, 6, 7, 7 }; -- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; --# endif -- } else { /* 32 bits */ --# if (__GNUC__ >= 3) -- return (__builtin_ctz((U32)val) >> 3); --# else -- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, -- 3, 2, 2, 1, 3, 2, 0, 1, -- 3, 3, 1, 2, 2, 2, 2, 0, -- 3, 1, 2, 0, 1, 0, 1, 1 }; -- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; --# endif -- } -- } else { /* Big Endian CPU */ -- if (MEM_64bits()) { --# if (__GNUC__ >= 4) -- return (__builtin_clzll(val) >> 3); --# else -- unsigned r; -- const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ -- if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } -- if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } -- r += (!val); -- return r; --# endif -- } else { /* 32 bits */ --# if (__GNUC__ >= 3) -- return (__builtin_clz((U32)val) >> 3); --# else -- unsigned r; -- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } -- r += (!val); -- return r; --# endif -- } } --} -- -- - MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) - { - const BYTE* const pStart = pIn; -@@ -783,32 +780,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match, - * Hashes - ***************************************/ - static const U32 prime3bytes = 506832829U; --static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } --MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ -+static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } -+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ -+MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } - - static const U32 prime4bytes = 2654435761U; --static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } --static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } -+static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } -+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } -+static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } - - static const U64 prime5bytes = 889523592379ULL; --static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } --static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } -+static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } - - static const U64 prime6bytes = 227718039650203ULL; --static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } --static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } -+static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } - - static const U64 prime7bytes = 58295818150454627ULL; --static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } --static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } -+static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } - - static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; --static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } --static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } -+static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } -+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } -+static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } -+ - - MEM_STATIC FORCE_INLINE_ATTR - size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) - { -+ /* Although some of these hashes do support hBits up to 64, some do not. -+ * To be on the safe side, always avoid hBits > 32. */ -+ assert(hBits <= 32); -+ - switch(mls) - { - default: -@@ -820,6 +828,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) - } - } - -+MEM_STATIC FORCE_INLINE_ATTR -+size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { -+ /* Although some of these hashes do support hBits up to 64, some do not. -+ * To be on the safe side, always avoid hBits > 32. */ -+ assert(hBits <= 32); -+ -+ switch(mls) -+ { -+ default: -+ case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); -+ case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); -+ case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); -+ case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); -+ case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); -+ } -+} -+ -+ - /* ZSTD_ipow() : - * Return base^exponent. - */ -@@ -1011,7 +1037,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, - * The least significant cycleLog bits of the indices must remain the same, - * which may be 0. Every index up to maxDist in the past must be valid. - */ --MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, - U32 maxDist, void const* src) - { - /* preemptive overflow correction: -@@ -1167,10 +1195,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - assert(blockEndIdx >= loadedDictEnd); - -- if (blockEndIdx > loadedDictEnd + maxDist) { -+ if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { - /* On reaching window size, dictionaries are invalidated. - * For simplification, if window size is reached anywhere within next block, - * the dictionary is invalidated for the full block. -+ * -+ * We also have to invalidate the dictionary if ZSTD_window_update() has detected -+ * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. -+ * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use -+ * dictMatchState, so setting it to NULL is not a problem. - */ - DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); - *loadedDictEndPtr = 0; -@@ -1199,7 +1232,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { - * forget about the extDict. Handles overlap of the prefix and extDict. - * Returns non-zero if the segment is contiguous. - */ --MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, -+MEM_STATIC -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize, - int forceNonContiguous) - { -@@ -1302,6 +1337,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) - - #endif - -+/* Short Cache */ -+ -+/* Normally, zstd matchfinders follow this flow: -+ * 1. Compute hash at ip -+ * 2. Load index from hashTable[hash] -+ * 3. Check if *ip == *(base + index) -+ * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. -+ * -+ * Short cache is an optimization which allows us to avoid step 3 most of the time -+ * when the data doesn't actually match. With short cache, the flow becomes: -+ * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. -+ * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. -+ * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. -+ * -+ * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to -+ * dictMatchState matchfinders. -+ */ -+#define ZSTD_SHORT_CACHE_TAG_BITS 8 -+#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) -+ -+/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. -+ * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ -+MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { -+ size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; -+ U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); -+ assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); -+ hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; -+} -+ -+/* Helper function for short cache matchfinders. -+ * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ -+MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { -+ U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; -+ U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; -+ return tag1 == tag2; -+} - - - /* =============================================================== -@@ -1381,11 +1452,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); - * This cannot be used when long range matching is enabled. - * Zstd will use these sequences, and pass the literals to a secondary block - * compressor. -- * @return : An error code on failure. - * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory - * access and data corruption. - */ --size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); -+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); - - /* ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -@@ -1396,4 +1466,55 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); - */ - void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); - -+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of -+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. -+ * Note that the block delimiter must include the last literals of the block. -+ */ -+size_t -+ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, -+ ZSTD_sequencePosition* seqPos, -+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -+ const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); -+ -+/* Returns the number of bytes to move the current read position back by. -+ * Only non-zero if we ended up splitting a sequence. -+ * Otherwise, it may return a ZSTD error if something went wrong. -+ * -+ * This function will attempt to scan through blockSize bytes -+ * represented by the sequences in @inSeqs, -+ * storing any (partial) sequences. -+ * -+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to -+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match -+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. -+ */ -+size_t -+ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, -+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, -+ const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); -+ -+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ -+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { -+ return params->extSeqProdFunc != NULL; -+} -+ -+/* =============================================================== -+ * Deprecated definitions that are still used internally to avoid -+ * deprecation warnings. These functions are exactly equivalent to -+ * their public variants, but avoid the deprecation warnings. -+ * =============================================================== */ -+ -+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -+ -+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize); -+ -+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize); -+ -+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -+ -+ - #endif /* ZSTD_COMPRESS_H */ -diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c -index 52b0a8059aba..3e9ea46a670a 100644 ---- a/lib/zstd/compress/zstd_compress_literals.c -+++ b/lib/zstd/compress/zstd_compress_literals.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -13,11 +14,36 @@ - ***************************************/ - #include "zstd_compress_literals.h" - -+ -+/* ************************************************************** -+* Debug Traces -+****************************************************************/ -+#if DEBUGLEVEL >= 2 -+ -+static size_t showHexa(const void* src, size_t srcSize) -+{ -+ const BYTE* const ip = (const BYTE*)src; -+ size_t u; -+ for (u=0; u31) + (srcSize>4095); - -+ DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); -+ - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); - - switch(flSize) -@@ -36,16 +62,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, - } - - ZSTD_memcpy(ostart + flSize, src, srcSize); -- DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); -+ DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); - return srcSize + flSize; - } - -+static int allBytesIdentical(const void* src, size_t srcSize) -+{ -+ assert(srcSize >= 1); -+ assert(src != NULL); -+ { const BYTE b = ((const BYTE*)src)[0]; -+ size_t p; -+ for (p=1; p31) + (srcSize>4095); - -- (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ -+ assert(dstCapacity >= 4); (void)dstCapacity; -+ assert(allBytesIdentical(src, srcSize)); - - switch(flSize) - { -@@ -63,28 +103,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* - } - - ostart[flSize] = *(const BYTE*)src; -- DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); -+ DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); - return flSize+1; - } - --size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, -- ZSTD_hufCTables_t* nextHuf, -- ZSTD_strategy strategy, int disableLiteralCompression, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, -- void* entropyWorkspace, size_t entropyWorkspaceSize, -- const int bmi2, -- unsigned suspectUncompressible) -+/* ZSTD_minLiteralsToCompress() : -+ * returns minimal amount of literals -+ * for literal compression to even be attempted. -+ * Minimum is made tighter as compression strategy increases. -+ */ -+static size_t -+ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) -+{ -+ assert((int)strategy >= 0); -+ assert((int)strategy <= 9); -+ /* btultra2 : min 8 bytes; -+ * then 2x larger for each successive compression strategy -+ * max threshold 64 bytes */ -+ { int const shift = MIN(9-(int)strategy, 3); -+ size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; -+ DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); -+ return mintc; -+ } -+} -+ -+size_t ZSTD_compressLiterals ( -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize, -+ void* entropyWorkspace, size_t entropyWorkspaceSize, -+ const ZSTD_hufCTables_t* prevHuf, -+ ZSTD_hufCTables_t* nextHuf, -+ ZSTD_strategy strategy, -+ int disableLiteralCompression, -+ int suspectUncompressible, -+ int bmi2) - { -- size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - -- DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", -- disableLiteralCompression, (U32)srcSize); -+ DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", -+ disableLiteralCompression, (U32)srcSize, dstCapacity); -+ -+ DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); - - /* Prepare nextEntropy assuming reusing the existing table */ - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -@@ -92,40 +155,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - -- /* small ? don't even attempt compression (speed opt) */ --# define COMPRESS_LITERALS_SIZE_MIN 63 -- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; -- if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -- } -+ /* if too small, don't even attempt compression (speed opt) */ -+ if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) -+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; -- int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; -+ int const flags = 0 -+ | (bmi2 ? HUF_flags_bmi2 : 0) -+ | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) -+ | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) -+ | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); -+ -+ typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); -+ huf_compress_f huf_compress; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; -- cLitSize = singleStream ? -- HUF_compress1X_repeat( -- ostart+lhSize, dstCapacity-lhSize, src, srcSize, -- HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, -- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : -- HUF_compress4X_repeat( -- ostart+lhSize, dstCapacity-lhSize, src, srcSize, -- HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, -- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible); -+ huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; -+ cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, -+ src, srcSize, -+ HUF_SYMBOLVALUE_MAX, LitHufLog, -+ entropyWorkspace, entropyWorkspaceSize, -+ (HUF_CElt*)nextHuf->CTable, -+ &repeat, flags); -+ DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ -- DEBUGLOG(5, "Reusing previous huffman table"); -+ DEBUGLOG(5, "reusing statistics from previous huffman block"); - hType = set_repeat; - } - } - -- if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { -- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -- } -+ { size_t const minGain = ZSTD_minGain(srcSize, strategy); -+ if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { -+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -+ } } - if (cLitSize==1) { -- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -- return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); -- } -+ /* A return value of 1 signals that the alphabet consists of a single symbol. -+ * However, in some rare circumstances, it could be the compressed size (a single byte). -+ * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. -+ * (it's also necessary to not generate statistics). -+ * Therefore, in such a case, actively check that all bytes are identical. */ -+ if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { -+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -+ return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); -+ } } - - if (hType == set_compressed) { - /* using a newly constructed table */ -@@ -136,16 +210,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ -- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); -+ if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); -+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ -+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ -+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); -diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h -index 9775fb97cb70..a2a85d6b69e5 100644 ---- a/lib/zstd/compress/zstd_compress_literals.h -+++ b/lib/zstd/compress/zstd_compress_literals.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -16,16 +17,24 @@ - - size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -+/* ZSTD_compressRleLiteralsBlock() : -+ * Conditions : -+ * - All bytes in @src are identical -+ * - dstCapacity >= 4 */ - size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - --/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ --size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, -- ZSTD_hufCTables_t* nextHuf, -- ZSTD_strategy strategy, int disableLiteralCompression, -- void* dst, size_t dstCapacity, -+/* ZSTD_compressLiterals(): -+ * @entropyWorkspace: must be aligned on 4-bytes boundaries -+ * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE -+ * @suspectUncompressible: sampling checks, to potentially skip huffman coding -+ */ -+size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, -- const int bmi2, -- unsigned suspectUncompressible); -+ const ZSTD_hufCTables_t* prevHuf, -+ ZSTD_hufCTables_t* nextHuf, -+ ZSTD_strategy strategy, int disableLiteralCompression, -+ int suspectUncompressible, -+ int bmi2); - - #endif /* ZSTD_COMPRESS_LITERALS_H */ -diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c -index 21ddc1b37acf..5c028c78d889 100644 ---- a/lib/zstd/compress/zstd_compress_sequences.c -+++ b/lib/zstd/compress/zstd_compress_sequences.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -58,7 +59,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq) - { - /* Heuristic: This should cover most blocks <= 16K and - * start to fade out after 16K to about 32K depending on -- * comprssibility. -+ * compressibility. - */ - return nbSeq >= 2048; - } -@@ -166,7 +167,7 @@ ZSTD_selectEncodingType( - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { -- /* Prefer set_basic over set_rle when there are 2 or less symbols, -+ /* Prefer set_basic over set_rle when there are 2 or fewer symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ -diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h -index 7991364c2f71..7fe6f4ff5cf2 100644 ---- a/lib/zstd/compress/zstd_compress_sequences.h -+++ b/lib/zstd/compress/zstd_compress_sequences.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c -index 17d836cc84e8..41f6521b27cd 100644 ---- a/lib/zstd/compress/zstd_compress_superblock.c -+++ b/lib/zstd/compress/zstd_compress_superblock.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -36,13 +37,14 @@ - * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block - * and the following sub-blocks' literals sections will be Treeless_Literals_Block. - * @return : compressed size of literals section of a sub-block -- * Or 0 if it unable to compress. -+ * Or 0 if unable to compress. - * Or error code */ --static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, -- const ZSTD_hufCTablesMetadata_t* hufMetadata, -- const BYTE* literals, size_t litSize, -- void* dst, size_t dstSize, -- const int bmi2, int writeEntropy, int* entropyWritten) -+static size_t -+ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, -+ const ZSTD_hufCTablesMetadata_t* hufMetadata, -+ const BYTE* literals, size_t litSize, -+ void* dst, size_t dstSize, -+ const int bmi2, int writeEntropy, int* entropyWritten) - { - size_t const header = writeEntropy ? 200 : 0; - size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); -@@ -53,8 +55,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; - size_t cLitSize = 0; - -- (void)bmi2; /* TODO bmi2... */ -- - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); - - *entropyWritten = 0; -@@ -76,9 +76,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); - } - -- /* TODO bmi2 */ -- { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) -- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); -+ { int const flags = bmi2 ? HUF_flags_bmi2 : 0; -+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags) -+ : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags); - op += cSize; - cLitSize += cSize; - if (cSize == 0 || ERR_isError(cSize)) { -@@ -103,7 +103,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ -- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); -+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } -@@ -123,26 +123,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - } - *entropyWritten = 1; - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); -- return op-ostart; -+ return (size_t)(op-ostart); - } - --static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { -- const seqDef* const sstart = sequences; -- const seqDef* const send = sequences + nbSeq; -- const seqDef* sp = sstart; -+static size_t -+ZSTD_seqDecompressedSize(seqStore_t const* seqStore, -+ const seqDef* sequences, size_t nbSeqs, -+ size_t litSize, int lastSubBlock) -+{ - size_t matchLengthSum = 0; - size_t litLengthSum = 0; -- (void)(litLengthSum); /* suppress unused variable warning on some environments */ -- while (send-sp > 0) { -- ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); -+ size_t n; -+ for (n=0; ncParams.windowLog > STREAM_ACCUMULATOR_MIN; - BYTE* const ostart = (BYTE*)dst; -@@ -176,14 +181,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, ""); -- if (nbSeq < 0x7F) -+ if (nbSeq < 128) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) { -- return op - ostart; -+ return (size_t)(op - ostart); - } - - /* seqHead : flags for FSE encoding type */ -@@ -205,7 +210,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables - } - - { size_t const bitstreamSize = ZSTD_encodeSequences( -- op, oend - op, -+ op, (size_t)(oend - op), - fseTables->matchlengthCTable, mlCode, - fseTables->offcodeCTable, ofCode, - fseTables->litlengthCTable, llCode, -@@ -249,7 +254,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables - #endif - - *entropyWritten = 1; -- return op - ostart; -+ return (size_t)(op - ostart); - } - - /* ZSTD_compressSubBlock() : -@@ -275,7 +280,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, - litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); - { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, - &entropyMetadata->hufMetadata, literals, litSize, -- op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); -+ op, (size_t)(oend-op), -+ bmi2, writeLitEntropy, litEntropyWritten); - FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); - if (cLitSize == 0) return 0; - op += cLitSize; -@@ -285,18 +291,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, - sequences, nbSeq, - llCode, mlCode, ofCode, - cctxParams, -- op, oend-op, -+ op, (size_t)(oend-op), - bmi2, writeSeqEntropy, seqEntropyWritten); - FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); - if (cSeqSize == 0) return 0; - op += cSeqSize; - } - /* Write block header */ -- { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; -+ { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize; - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(ostart, cBlockHeader24); - } -- return op-ostart; -+ return (size_t)(op-ostart); - } - - static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, -@@ -385,7 +391,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, - return cSeqSizeEstimate + sequencesSectionHeaderSize; - } - --static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, -+typedef struct { -+ size_t estLitSize; -+ size_t estBlockSize; -+} EstimatedBlockSize; -+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, -@@ -393,15 +403,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, -- int writeLitEntropy, int writeSeqEntropy) { -- size_t cSizeEstimate = 0; -- cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, -- &entropy->huf, &entropyMetadata->hufMetadata, -- workspace, wkspSize, writeLitEntropy); -- cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, -+ int writeLitEntropy, int writeSeqEntropy) -+{ -+ EstimatedBlockSize ebs; -+ ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, -+ &entropy->huf, &entropyMetadata->hufMetadata, -+ workspace, wkspSize, writeLitEntropy); -+ ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); -- return cSizeEstimate + ZSTD_blockHeaderSize; -+ ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize; -+ return ebs; - } - - static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) -@@ -415,13 +427,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe - return 0; - } - -+static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount) -+{ -+ size_t n, total = 0; -+ assert(sp != NULL); -+ for (n=0; n %zu bytes", seqCount, (const void*)sp, total); -+ return total; -+} -+ -+#define BYTESCALE 256 -+ -+static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, -+ size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, -+ int firstSubBlock) -+{ -+ size_t n, budget = 0, inSize=0; -+ /* entropy headers */ -+ size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ -+ assert(firstSubBlock==0 || firstSubBlock==1); -+ budget += headerSize; -+ -+ /* first sequence => at least one sequence*/ -+ budget += sp[0].litLength * avgLitCost + avgSeqCost; -+ if (budget > targetBudget) return 1; -+ inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); -+ -+ /* loop over sequences */ -+ for (n=1; n targetBudget) -+ /* though continue to expand until the sub-block is deemed compressible */ -+ && (budget < inSize * BYTESCALE) ) -+ break; -+ } -+ -+ return n; -+} -+ - /* ZSTD_compressSubBlock_multi() : - * Breaks super-block into multiple sub-blocks and compresses them. -- * Entropy will be written to the first block. -- * The following blocks will use repeat mode to compress. -- * All sub-blocks are compressed blocks (no raw or rle blocks). -- * @return : compressed size of the super block (which is multiple ZSTD blocks) -- * Or 0 if it failed to compress. */ -+ * Entropy will be written into the first block. -+ * The following blocks use repeat_mode to compress. -+ * Sub-blocks are all compressed, except the last one when beneficial. -+ * @return : compressed size of the super block (which features multiple ZSTD blocks) -+ * or 0 if it failed to compress. */ - static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, - const ZSTD_compressedBlockState_t* prevCBlock, - ZSTD_compressedBlockState_t* nextCBlock, -@@ -434,10 +489,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, - { - const seqDef* const sstart = seqStorePtr->sequencesStart; - const seqDef* const send = seqStorePtr->sequences; -- const seqDef* sp = sstart; -+ const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */ -+ size_t const nbSeqs = (size_t)(send - sstart); - const BYTE* const lstart = seqStorePtr->litStart; - const BYTE* const lend = seqStorePtr->lit; - const BYTE* lp = lstart; -+ size_t const nbLiterals = (size_t)(lend - lstart); - BYTE const* ip = (BYTE const*)src; - BYTE const* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*)dst; -@@ -446,112 +503,171 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, - const BYTE* llCodePtr = seqStorePtr->llCode; - const BYTE* mlCodePtr = seqStorePtr->mlCode; - const BYTE* ofCodePtr = seqStorePtr->ofCode; -- size_t targetCBlockSize = cctxParams->targetCBlockSize; -- size_t litSize, seqCount; -- int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; -+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ -+ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); -+ int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); - int writeSeqEntropy = 1; -- int lastSequence = 0; -- -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", -- (unsigned)(lend-lp), (unsigned)(send-sstart)); -- -- litSize = 0; -- seqCount = 0; -- do { -- size_t cBlockSizeEstimate = 0; -- if (sstart == send) { -- lastSequence = 1; -- } else { -- const seqDef* const sequence = sp + seqCount; -- lastSequence = sequence == send - 1; -- litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; -- seqCount++; -- } -- if (lastSequence) { -- assert(lp <= lend); -- assert(litSize <= (size_t)(lend - lp)); -- litSize = (size_t)(lend - lp); -+ -+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)", -+ (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart)); -+ -+ /* let's start by a general estimation for the full block */ -+ if (nbSeqs > 0) { -+ EstimatedBlockSize const ebs = -+ ZSTD_estimateSubBlockSize(lp, nbLiterals, -+ ofCodePtr, llCodePtr, mlCodePtr, nbSeqs, -+ &nextCBlock->entropy, entropyMetadata, -+ workspace, wkspSize, -+ writeLitEntropy, writeSeqEntropy); -+ /* quick estimation */ -+ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; -+ size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; -+ const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1); -+ size_t n, avgBlockBudget, blockBudgetSupp=0; -+ avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; -+ DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", -+ (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, -+ (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); -+ /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately -+ * this will result in the production of a single uncompressed block covering @srcSize.*/ -+ if (ebs.estBlockSize > srcSize) return 0; -+ -+ /* compress and write sub-blocks */ -+ assert(nbSubBlocks>0); -+ for (n=0; n < nbSubBlocks-1; n++) { -+ /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ -+ size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp), -+ avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); -+ /* if reached last sequence : break to last sub-block (simplification) */ -+ assert(seqCount <= (size_t)(send-sp)); -+ if (sp + seqCount == send) break; -+ assert(seqCount > 0); -+ /* compress sub-block */ -+ { int litEntropyWritten = 0; -+ int seqEntropyWritten = 0; -+ size_t litSize = countLiterals(seqStorePtr, sp, seqCount); -+ const size_t decompressedSize = -+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0); -+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, -+ sp, seqCount, -+ lp, litSize, -+ llCodePtr, mlCodePtr, ofCodePtr, -+ cctxParams, -+ op, (size_t)(oend-op), -+ bmi2, writeLitEntropy, writeSeqEntropy, -+ &litEntropyWritten, &seqEntropyWritten, -+ 0); -+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); -+ -+ /* check compressibility, update state components */ -+ if (cSize > 0 && cSize < decompressedSize) { -+ DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes", -+ (unsigned)decompressedSize, (unsigned)cSize); -+ assert(ip + decompressedSize <= iend); -+ ip += decompressedSize; -+ lp += litSize; -+ op += cSize; -+ llCodePtr += seqCount; -+ mlCodePtr += seqCount; -+ ofCodePtr += seqCount; -+ /* Entropy only needs to be written once */ -+ if (litEntropyWritten) { -+ writeLitEntropy = 0; -+ } -+ if (seqEntropyWritten) { -+ writeSeqEntropy = 0; -+ } -+ sp += seqCount; -+ blockBudgetSupp = 0; -+ } } -+ /* otherwise : do not compress yet, coalesce current sub-block with following one */ - } -- /* I think there is an optimization opportunity here. -- * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful -- * since it recalculates estimate from scratch. -- * For example, it would recount literal distribution and symbol codes every time. -- */ -- cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, -- &nextCBlock->entropy, entropyMetadata, -- workspace, wkspSize, writeLitEntropy, writeSeqEntropy); -- if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { -- int litEntropyWritten = 0; -- int seqEntropyWritten = 0; -- const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); -- const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, -- sp, seqCount, -- lp, litSize, -- llCodePtr, mlCodePtr, ofCodePtr, -- cctxParams, -- op, oend-op, -- bmi2, writeLitEntropy, writeSeqEntropy, -- &litEntropyWritten, &seqEntropyWritten, -- lastBlock && lastSequence); -- FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); -- if (cSize > 0 && cSize < decompressedSize) { -- DEBUGLOG(5, "Committed the sub-block"); -- assert(ip + decompressedSize <= iend); -- ip += decompressedSize; -- sp += seqCount; -- lp += litSize; -- op += cSize; -- llCodePtr += seqCount; -- mlCodePtr += seqCount; -- ofCodePtr += seqCount; -- litSize = 0; -- seqCount = 0; -- /* Entropy only needs to be written once */ -- if (litEntropyWritten) { -- writeLitEntropy = 0; -- } -- if (seqEntropyWritten) { -- writeSeqEntropy = 0; -- } -+ } /* if (nbSeqs > 0) */ -+ -+ /* write last block */ -+ DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); -+ { int litEntropyWritten = 0; -+ int seqEntropyWritten = 0; -+ size_t litSize = (size_t)(lend - lp); -+ size_t seqCount = (size_t)(send - sp); -+ const size_t decompressedSize = -+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); -+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, -+ sp, seqCount, -+ lp, litSize, -+ llCodePtr, mlCodePtr, ofCodePtr, -+ cctxParams, -+ op, (size_t)(oend-op), -+ bmi2, writeLitEntropy, writeSeqEntropy, -+ &litEntropyWritten, &seqEntropyWritten, -+ lastBlock); -+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); -+ -+ /* update pointers, the nb of literals borrowed from next sequence must be preserved */ -+ if (cSize > 0 && cSize < decompressedSize) { -+ DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes", -+ (unsigned)decompressedSize, (unsigned)cSize); -+ assert(ip + decompressedSize <= iend); -+ ip += decompressedSize; -+ lp += litSize; -+ op += cSize; -+ llCodePtr += seqCount; -+ mlCodePtr += seqCount; -+ ofCodePtr += seqCount; -+ /* Entropy only needs to be written once */ -+ if (litEntropyWritten) { -+ writeLitEntropy = 0; - } -+ if (seqEntropyWritten) { -+ writeSeqEntropy = 0; -+ } -+ sp += seqCount; - } -- } while (!lastSequence); -+ } -+ -+ - if (writeLitEntropy) { -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); -+ DEBUGLOG(5, "Literal entropy tables were never written"); - ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); - } - if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { - /* If we haven't written our entropy tables, then we've violated our contract and - * must emit an uncompressed block. - */ -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); -+ DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block"); - return 0; - } -+ - if (ip < iend) { -- size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); -+ /* some data left : last part of the block sent uncompressed */ -+ size_t const rSize = (size_t)((iend - ip)); -+ size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock); -+ DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize)); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - assert(cSize != 0); - op += cSize; - /* We have to regenerate the repcodes because we've skipped some sequences */ - if (sp < send) { -- seqDef const* seq; -+ const seqDef* seq; - repcodes_t rep; - ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); - for (seq = sstart; seq < sp; ++seq) { -- ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); -+ ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); - } - ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); - } - } -- DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); -- return op-ostart; -+ -+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u", -+ (unsigned)(op-ostart)); -+ return (size_t)(op-ostart); - } - - size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, -- void const* src, size_t srcSize, -- unsigned lastBlock) { -+ const void* src, size_t srcSize, -+ unsigned lastBlock) -+{ - ZSTD_entropyCTablesMetadata_t entropyMetadata; - - FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, -diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h -index 224ece79546e..826bbc9e029b 100644 ---- a/lib/zstd/compress/zstd_compress_superblock.h -+++ b/lib/zstd/compress/zstd_compress_superblock.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h -index 349fc923c355..86bc3c2c23c7 100644 ---- a/lib/zstd/compress/zstd_cwksp.h -+++ b/lib/zstd/compress/zstd_cwksp.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,7 +15,9 @@ - /*-************************************* - * Dependencies - ***************************************/ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ - #include "../common/zstd_internal.h" -+#include "../common/portability_macros.h" - - - /*-************************************* -@@ -41,8 +44,9 @@ - ***************************************/ - typedef enum { - ZSTD_cwksp_alloc_objects, -- ZSTD_cwksp_alloc_buffers, -- ZSTD_cwksp_alloc_aligned -+ ZSTD_cwksp_alloc_aligned_init_once, -+ ZSTD_cwksp_alloc_aligned, -+ ZSTD_cwksp_alloc_buffers - } ZSTD_cwksp_alloc_phase_e; - - /* -@@ -95,8 +99,8 @@ typedef enum { - * - * Workspace Layout: - * -- * [ ... workspace ... ] -- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] -+ * [ ... workspace ... ] -+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once] - * - * The various objects that live in the workspace are divided into the - * following categories, and are allocated separately: -@@ -120,9 +124,18 @@ typedef enum { - * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. These tables are 64-byte aligned. - * -- * - Aligned: these buffers are used for various purposes that require 4 byte -- * alignment, but don't require any initialization before they're used. These -- * buffers are each aligned to 64 bytes. -+ * - Init once: these buffers require to be initialized at least once before -+ * use. They should be used when we want to skip memory initialization -+ * while not triggering memory checkers (like Valgrind) when reading from -+ * from this memory without writing to it first. -+ * These buffers should be used carefully as they might contain data -+ * from previous compressions. -+ * Buffers are aligned to 64 bytes. -+ * -+ * - Aligned: these buffers don't require any initialization before they're -+ * used. The user of the buffer should make sure they write into a buffer -+ * location before reading from it. -+ * Buffers are aligned to 64 bytes. - * - * - Buffers: these buffers are used for various purposes that don't require - * any alignment or initialization before they're used. This means they can -@@ -134,8 +147,9 @@ typedef enum { - * correctly packed into the workspace buffer. That order is: - * - * 1. Objects -- * 2. Buffers -- * 3. Aligned/Tables -+ * 2. Init once / Tables -+ * 3. Aligned / Tables -+ * 4. Buffers / Tables - * - * Attempts to reserve objects of different types out of order will fail. - */ -@@ -147,6 +161,7 @@ typedef struct { - void* tableEnd; - void* tableValidEnd; - void* allocStart; -+ void* initOnceStart; - - BYTE allocFailed; - int workspaceOversizedDuration; -@@ -159,6 +174,7 @@ typedef struct { - ***************************************/ - - MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); -+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws); - - MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { - (void)ws; -@@ -168,6 +184,8 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { - assert(ws->tableEnd <= ws->allocStart); - assert(ws->tableValidEnd <= ws->allocStart); - assert(ws->allocStart <= ws->workspaceEnd); -+ assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws)); -+ assert(ws->workspace <= ws->initOnceStart); - } - - /* -@@ -210,14 +228,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { - * for internal purposes (currently only alignment). - */ - MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { -- /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes -- * to align the beginning of tables section, as well as another n_2=[0, 63] bytes -- * to align the beginning of the aligned section. -- * -- * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and -- * aligneds being sized in multiples of 64 bytes. -+ /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES -+ * bytes to align the beginning of tables section and end of buffers; - */ -- size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; -+ size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2; - return slackSpace; - } - -@@ -230,10 +244,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt - size_t const alignBytesMask = alignBytes - 1; - size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; - assert((alignBytes & alignBytesMask) == 0); -- assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); -+ assert(bytes < alignBytes); - return bytes; - } - -+/* -+ * Returns the initial value for allocStart which is used to determine the position from -+ * which we can allocate from the end of the workspace. -+ */ -+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) { -+ return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1)); -+} -+ - /* - * Internal function. Do not use directly. - * Reserves the given number of bytes within the aligned/buffer segment of the wksp, -@@ -274,27 +296,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase - { - assert(phase >= ws->phase); - if (phase > ws->phase) { -- /* Going from allocating objects to allocating buffers */ -- if (ws->phase < ZSTD_cwksp_alloc_buffers && -- phase >= ZSTD_cwksp_alloc_buffers) { -+ /* Going from allocating objects to allocating initOnce / tables */ -+ if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once && -+ phase >= ZSTD_cwksp_alloc_aligned_init_once) { - ws->tableValidEnd = ws->objectEnd; -- } -+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); - -- /* Going from allocating buffers to allocating aligneds/tables */ -- if (ws->phase < ZSTD_cwksp_alloc_aligned && -- phase >= ZSTD_cwksp_alloc_aligned) { -- { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ -- size_t const bytesToAlign = -- ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); -- DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); -- ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ -- RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), -- memory_allocation, "aligned phase - alignment initial allocation failed!"); -- } - { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ -- void* const alloc = ws->objectEnd; -+ void *const alloc = ws->objectEnd; - size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); -- void* const objectEnd = (BYTE*)alloc + bytesToAlign; -+ void *const objectEnd = (BYTE *) alloc + bytesToAlign; - DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); - RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, - "table phase - alignment initial allocation failed!"); -@@ -302,7 +313,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase - ws->tableEnd = objectEnd; /* table area starts being empty */ - if (ws->tableValidEnd < ws->tableEnd) { - ws->tableValidEnd = ws->tableEnd; -- } } } -+ } -+ } -+ } - ws->phase = phase; - ZSTD_cwksp_assert_internal_consistency(ws); - } -@@ -314,7 +327,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase - */ - MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) - { -- return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); -+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd); - } - - /* -@@ -343,6 +356,33 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) - return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); - } - -+/* -+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). -+ * This memory has been initialized at least once in the past. -+ * This doesn't mean it has been initialized this time, and it might contain data from previous -+ * operations. -+ * The main usage is for algorithms that might need read access into uninitialized memory. -+ * The algorithm must maintain safety under these conditions and must make sure it doesn't -+ * leak any of the past data (directly or in side channels). -+ */ -+MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes) -+{ -+ size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES); -+ void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once); -+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); -+ if(ptr && ptr < ws->initOnceStart) { -+ /* We assume the memory following the current allocation is either: -+ * 1. Not usable as initOnce memory (end of workspace) -+ * 2. Another initOnce buffer that has been allocated before (and so was previously memset) -+ * 3. An ASAN redzone, in which case we don't want to write on it -+ * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart. -+ * Note that we assume here that MSAN and ASAN cannot run in the same time. */ -+ ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes)); -+ ws->initOnceStart = ptr; -+ } -+ return ptr; -+} -+ - /* - * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). - */ -@@ -356,18 +396,22 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) - - /* - * Aligned on 64 bytes. These buffers have the special property that -- * their values remain constrained, allowing us to re-use them without -+ * their values remain constrained, allowing us to reuse them without - * memset()-ing them. - */ - MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) - { -- const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; -+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once; - void* alloc; - void* end; - void* top; - -- if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { -- return NULL; -+ /* We can only start allocating tables after we are done reserving space for objects at the -+ * start of the workspace */ -+ if(ws->phase < phase) { -+ if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { -+ return NULL; -+ } - } - alloc = ws->tableEnd; - end = (BYTE *)alloc + bytes; -@@ -451,7 +495,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { -- ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); -+ ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd)); - } - ZSTD_cwksp_mark_tables_clean(ws); - } -@@ -478,14 +522,23 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { - - - ws->tableEnd = ws->objectEnd; -- ws->allocStart = ws->workspaceEnd; -+ ws->allocStart = ZSTD_cwksp_initialAllocStart(ws); - ws->allocFailed = 0; -- if (ws->phase > ZSTD_cwksp_alloc_buffers) { -- ws->phase = ZSTD_cwksp_alloc_buffers; -+ if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) { -+ ws->phase = ZSTD_cwksp_alloc_aligned_init_once; - } - ZSTD_cwksp_assert_internal_consistency(ws); - } - -+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { -+ return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); -+} -+ -+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { -+ return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) -+ + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); -+} -+ - /* - * The provided workspace takes ownership of the buffer [start, start+size). - * Any existing values in the workspace are ignored (the previously managed -@@ -498,6 +551,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c - ws->workspaceEnd = (BYTE*)start + size; - ws->objectEnd = ws->workspace; - ws->tableValidEnd = ws->objectEnd; -+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); - ws->phase = ZSTD_cwksp_alloc_objects; - ws->isStatic = isStatic; - ZSTD_cwksp_clear(ws); -@@ -529,15 +583,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { - ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); - } - --MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { -- return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); --} -- --MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { -- return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) -- + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); --} -- - MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { - return ws->allocFailed; - } -@@ -550,17 +595,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { - * Returns if the estimated space needed for a wksp is within an acceptable limit of the - * actual amount of space used. - */ --MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, -- size_t const estimatedSpace, int resizedWorkspace) { -- if (resizedWorkspace) { -- /* Resized/newly allocated wksp should have exact bounds */ -- return ZSTD_cwksp_used(ws) == estimatedSpace; -- } else { -- /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes -- * than estimatedSpace. See the comments in zstd_cwksp.h for details. -- */ -- return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); -- } -+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) { -+ /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice -+ * the alignment bytes difference between estimation and actual usage */ -+ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) && -+ ZSTD_cwksp_used(ws) <= estimatedSpace; - } - - -diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c -index 76933dea2624..5ff54f17d92f 100644 ---- a/lib/zstd/compress/zstd_double_fast.c -+++ b/lib/zstd/compress/zstd_double_fast.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,8 +12,49 @@ - #include "zstd_compress_internal.h" - #include "zstd_double_fast.h" - -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR - --void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, -+ void const* end, ZSTD_dictTableLoadMethod_e dtlm) -+{ -+ const ZSTD_compressionParameters* const cParams = &ms->cParams; -+ U32* const hashLarge = ms->hashTable; -+ U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ U32 const mls = cParams->minMatch; -+ U32* const hashSmall = ms->chainTable; -+ U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ const BYTE* const base = ms->window.base; -+ const BYTE* ip = base + ms->nextToUpdate; -+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; -+ const U32 fastHashFillStep = 3; -+ -+ /* Always insert every fastHashFillStep position into the hash tables. -+ * Insert the other positions into the large hash table if their entry -+ * is empty. -+ */ -+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { -+ U32 const curr = (U32)(ip - base); -+ U32 i; -+ for (i = 0; i < fastHashFillStep; ++i) { -+ size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); -+ size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); -+ if (i == 0) { -+ ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); -+ } -+ if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { -+ ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); -+ } -+ /* Only load extra positions for ZSTD_dtlm_full */ -+ if (dtlm == ZSTD_dtlm_fast) -+ break; -+ } } -+} -+ -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; -@@ -43,11 +85,24 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - /* Only load extra positions for ZSTD_dtlm_full */ - if (dtlm == ZSTD_dtlm_fast) - break; -- } } -+ } } -+} -+ -+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, -+ const void* const end, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp) -+{ -+ if (tfp == ZSTD_tfp_forCDict) { -+ ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); -+ } else { -+ ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); -+ } - } - - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_doubleFast_noDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls /* template */) -@@ -67,7 +122,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; -- U32 offsetSaved = 0; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; - - size_t mLength; - U32 offset; -@@ -100,8 +155,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; -- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; -- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; -+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; -+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; - } - - /* Outer Loop: one iteration per match found and stored */ -@@ -131,7 +186,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); - goto _match_stored; - } - -@@ -175,9 +230,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - } while (ip1 <= ilimit); - - _cleanup: -+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), -+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; -+ - /* save reps for next block */ -- rep[0] = offset_1 ? offset_1 : offsetSaved; -- rep[1] = offset_2 ? offset_2 : offsetSaved; -+ rep[0] = offset_1 ? offset_1 : offsetSaved1; -+ rep[1] = offset_2 ? offset_2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -217,7 +276,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - hashLong[hl1] = (U32)(ip1 - base); - } - -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - _match_stored: - /* match found */ -@@ -243,7 +302,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ -@@ -254,6 +313,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( - - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, -@@ -275,7 +335,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; -- U32 offsetSaved = 0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams; -@@ -286,8 +345,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); -- const U32 dictHBitsL = dictCParams->hashLog; -- const U32 dictHBitsS = dictCParams->chainLog; -+ const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); -@@ -295,6 +354,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - /* if a dictionary is attached, it must be within window range */ - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - -+ if (ms->prefetchCDictTables) { -+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); -+ size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); -+ PREFETCH_AREA(dictHashLong, hashTableBytes); -+ PREFETCH_AREA(dictHashSmall, chainTableBytes); -+ } -+ - /* init */ - ip += (dictAndPrefixLength == 0); - -@@ -309,8 +375,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - U32 offset; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); -- size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); -- size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); -+ size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); -+ size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); -+ U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); -+ int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); - U32 const curr = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 matchIndexS = hashSmall[h]; -@@ -328,7 +398,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); - goto _match_stored; - } - -@@ -340,9 +410,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - goto _match_found; - } -- } else { -+ } else if (dictTagsMatchL) { - /* check dictMatchState long match */ -- U32 const dictMatchIndexL = dictHashLong[dictHL]; -+ U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; - const BYTE* dictMatchL = dictBase + dictMatchIndexL; - assert(dictMatchL < dictEnd); - -@@ -358,9 +428,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - if (MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } -- } else { -+ } else if (dictTagsMatchS) { - /* check dictMatchState short match */ -- U32 const dictMatchIndexS = dictHashSmall[dictHS]; -+ U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; - match = dictBase + dictMatchIndexS; - matchIndexS = dictMatchIndexS + dictIndexDelta; - -@@ -375,10 +445,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - continue; - - _search_next_long: -- - { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); -- size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); -+ size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; -+ U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = curr + 1; - -@@ -391,9 +462,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ - goto _match_found; - } -- } else { -+ } else if (dictTagsMatchL3) { - /* check dict long +1 match */ -- U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; -+ U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; - const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; - assert(dictMatchL3 < dictEnd); - if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { -@@ -419,7 +490,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - offset_2 = offset_1; - offset_1 = offset; - -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - _match_stored: - /* match found */ -@@ -448,7 +519,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; -@@ -461,8 +532,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( - } /* while (ip < ilimit) */ - - /* save reps for next block */ -- rep[0] = offset_1 ? offset_1 : offsetSaved; -- rep[1] = offset_2 ? offset_2 : offsetSaved; -+ rep[0] = offset_1; -+ rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -527,7 +598,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState( - } - - --static size_t ZSTD_compressBlock_doubleFast_extDict_generic( -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_doubleFast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) -@@ -585,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); - } else { - if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; -@@ -596,7 +669,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); -@@ -621,7 +694,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - } - offset_2 = offset_1; - offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); - - } else { - ip += ((ip-anchor) >> kSearchStrength) + 1; -@@ -653,7 +726,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; -@@ -694,3 +767,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict( - return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); - } - } -+ -+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ -diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h -index 6822bde65a1d..b7ddc714f13e 100644 ---- a/lib/zstd/compress/zstd_double_fast.h -+++ b/lib/zstd/compress/zstd_double_fast.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -15,8 +16,12 @@ - #include "../common/mem.h" /* U32 */ - #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ - -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ - void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, -- void const* end, ZSTD_dictTableLoadMethod_e dtlm); -+ void const* end, ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp); -+ - size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -@@ -27,6 +32,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict -+#else -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL -+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ - - - #endif /* ZSTD_DOUBLE_FAST_H */ -diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c -index a752e6beab52..b7a63ba4ce56 100644 ---- a/lib/zstd/compress/zstd_fast.c -+++ b/lib/zstd/compress/zstd_fast.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -11,8 +12,46 @@ - #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ - #include "zstd_fast.h" - -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms, -+ const void* const end, -+ ZSTD_dictTableLoadMethod_e dtlm) -+{ -+ const ZSTD_compressionParameters* const cParams = &ms->cParams; -+ U32* const hashTable = ms->hashTable; -+ U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; -+ U32 const mls = cParams->minMatch; -+ const BYTE* const base = ms->window.base; -+ const BYTE* ip = base + ms->nextToUpdate; -+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; -+ const U32 fastHashFillStep = 3; - --void ZSTD_fillHashTable(ZSTD_matchState_t* ms, -+ /* Currently, we always use ZSTD_dtlm_full for filling CDict tables. -+ * Feel free to remove this assert if there's a good reason! */ -+ assert(dtlm == ZSTD_dtlm_full); -+ -+ /* Always insert every fastHashFillStep position into the hash table. -+ * Insert the other positions if their hash entry is empty. -+ */ -+ for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { -+ U32 const curr = (U32)(ip - base); -+ { size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls); -+ ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); } -+ -+ if (dtlm == ZSTD_dtlm_fast) continue; -+ /* Only load extra positions for ZSTD_dtlm_full */ -+ { U32 p; -+ for (p = 1; p < fastHashFillStep; ++p) { -+ size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls); -+ if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */ -+ ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p); -+ } } } } -+} -+ -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms, - const void* const end, - ZSTD_dictTableLoadMethod_e dtlm) - { -@@ -25,6 +64,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; - -+ /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables. -+ * Feel free to remove this assert if there's a good reason! */ -+ assert(dtlm == ZSTD_dtlm_fast); -+ - /* Always insert every fastHashFillStep position into the hash table. - * Insert the other positions if their hash entry is empty. - */ -@@ -42,6 +85,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - } } } } - } - -+void ZSTD_fillHashTable(ZSTD_matchState_t* ms, -+ const void* const end, -+ ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp) -+{ -+ if (tfp == ZSTD_tfp_forCDict) { -+ ZSTD_fillHashTableForCDict(ms, end, dtlm); -+ } else { -+ ZSTD_fillHashTableForCCtx(ms, end, dtlm); -+ } -+} -+ - - /* - * If you squint hard enough (and ignore repcodes), the search operation at any -@@ -89,8 +144,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - * - * This is also the work we do at the beginning to enter the loop initially. - */ --FORCE_INLINE_TEMPLATE size_t --ZSTD_compressBlock_fast_noDict_generic( -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_fast_noDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls, U32 const hasStep) -@@ -117,7 +173,7 @@ ZSTD_compressBlock_fast_noDict_generic( - - U32 rep_offset1 = rep[0]; - U32 rep_offset2 = rep[1]; -- U32 offsetSaved = 0; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; - - size_t hash0; /* hash for ip0 */ - size_t hash1; /* hash for ip1 */ -@@ -141,8 +197,8 @@ ZSTD_compressBlock_fast_noDict_generic( - { U32 const curr = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; -- if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; -- if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; -+ if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0; -+ if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0; - } - - /* start each op */ -@@ -180,8 +236,14 @@ ZSTD_compressBlock_fast_noDict_generic( - mLength = ip0[-1] == match0[-1]; - ip0 -= mLength; - match0 -= mLength; -- offcode = STORE_REPCODE_1; -+ offcode = REPCODE1_TO_OFFBASE; - mLength += 4; -+ -+ /* First write next hash table entry; we've already calculated it. -+ * This write is known to be safe because the ip1 is before the -+ * repcode (ip2). */ -+ hashTable[hash1] = (U32)(ip1 - base); -+ - goto _match; - } - -@@ -195,6 +257,12 @@ ZSTD_compressBlock_fast_noDict_generic( - /* check match at ip[0] */ - if (MEM_read32(ip0) == mval) { - /* found a match! */ -+ -+ /* First write next hash table entry; we've already calculated it. -+ * This write is known to be safe because the ip1 == ip0 + 1, so -+ * we know we will resume searching after ip1 */ -+ hashTable[hash1] = (U32)(ip1 - base); -+ - goto _offset; - } - -@@ -224,6 +292,21 @@ ZSTD_compressBlock_fast_noDict_generic( - /* check match at ip[0] */ - if (MEM_read32(ip0) == mval) { - /* found a match! */ -+ -+ /* first write next hash table entry; we've already calculated it */ -+ if (step <= 4) { -+ /* We need to avoid writing an index into the hash table >= the -+ * position at which we will pick up our searching after we've -+ * taken this match. -+ * -+ * The minimum possible match has length 4, so the earliest ip0 -+ * can be after we take this match will be the current ip0 + 4. -+ * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely -+ * write this position. -+ */ -+ hashTable[hash1] = (U32)(ip1 - base); -+ } -+ - goto _offset; - } - -@@ -254,9 +337,24 @@ ZSTD_compressBlock_fast_noDict_generic( - * However, it seems to be a meaningful performance hit to try to search - * them. So let's not. */ - -+ /* When the repcodes are outside of the prefix, we set them to zero before the loop. -+ * When the offsets are still zero, we need to restore them after the block to have a correct -+ * repcode history. If only one offset was invalid, it is easy. The tricky case is when both -+ * offsets were invalid. We need to figure out which offset to refill with. -+ * - If both offsets are zero they are in the same order. -+ * - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`. -+ * - If only one is zero, we need to decide which offset to restore. -+ * - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1. -+ * - It is impossible for rep_offset2 to be non-zero. -+ * -+ * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then -+ * set rep[0] = rep_offset1 and rep[1] = offsetSaved1. -+ */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2; -+ - /* save reps for next block */ -- rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; -- rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; -+ rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1; -+ rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -267,7 +365,7 @@ ZSTD_compressBlock_fast_noDict_generic( - match0 = base + idx; - rep_offset2 = rep_offset1; - rep_offset1 = (U32)(ip0-match0); -- offcode = STORE_OFFSET(rep_offset1); -+ offcode = OFFSET_TO_OFFBASE(rep_offset1); - mLength = 4; - - /* Count the backwards match length. */ -@@ -287,11 +385,6 @@ ZSTD_compressBlock_fast_noDict_generic( - ip0 += mLength; - anchor = ip0; - -- /* write next hash table entry */ -- if (ip1 < ip0) { -- hashTable[hash1] = (U32)(ip1 - base); -- } -- - /* Fill table and check for immediate repcode. */ - if (ip0 <= ilimit) { - /* Fill Table */ -@@ -306,7 +399,7 @@ ZSTD_compressBlock_fast_noDict_generic( - { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; -- ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); -+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } -@@ -369,6 +462,7 @@ size_t ZSTD_compressBlock_fast( - } - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_fast_dictMatchState_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) -@@ -380,14 +474,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; -- const BYTE* ip = istart; -+ const BYTE* ip0 = istart; -+ const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */ - const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; -- U32 offsetSaved = 0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; -@@ -397,13 +491,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); -- const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); -- const U32 dictHLog = dictCParams->hashLog; -+ const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart); -+ const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; - - /* if a dictionary is still attached, it necessarily means that - * it is within window size. So we just check it. */ - const U32 maxDistance = 1U << cParams->windowLog; -- const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); -+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - assert(endIndex - prefixStartIndex <= maxDistance); - (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - -@@ -413,106 +507,155 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( - * when translating a dict index into a local index */ - assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); - -+ if (ms->prefetchCDictTables) { -+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); -+ PREFETCH_AREA(dictHashTable, hashTableBytes); -+ } -+ - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); -- ip += (dictAndPrefixLength == 0); -+ ip0 += (dictAndPrefixLength == 0); - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - -- /* Main Search Loop */ -- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ -+ /* Outer search loop */ -+ assert(stepSize >= 1); -+ while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */ - size_t mLength; -- size_t const h = ZSTD_hashPtr(ip, hlog, mls); -- U32 const curr = (U32)(ip-base); -- U32 const matchIndex = hashTable[h]; -- const BYTE* match = base + matchIndex; -- const U32 repIndex = curr + 1 - offset_1; -- const BYTE* repMatch = (repIndex < prefixStartIndex) ? -- dictBase + (repIndex - dictIndexDelta) : -- base + repIndex; -- hashTable[h] = curr; /* update hash table */ -- -- if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ -- && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { -- const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; -- ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); -- } else if ( (matchIndex <= prefixStartIndex) ) { -- size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); -- U32 const dictMatchIndex = dictHashTable[dictHash]; -- const BYTE* dictMatch = dictBase + dictMatchIndex; -- if (dictMatchIndex <= dictStartIndex || -- MEM_read32(dictMatch) != MEM_read32(ip)) { -- assert(stepSize >= 1); -- ip += ((ip-anchor) >> kSearchStrength) + stepSize; -- continue; -- } else { -- /* found a dict match */ -- U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); -- mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; -- while (((ip>anchor) & (dictMatch>dictStart)) -- && (ip[-1] == dictMatch[-1])) { -- ip--; dictMatch--; mLength++; -+ size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls); -+ -+ size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls); -+ U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0); -+ -+ U32 matchIndex = hashTable[hash0]; -+ U32 curr = (U32)(ip0 - base); -+ size_t step = stepSize; -+ const size_t kStepIncr = 1 << kSearchStrength; -+ const BYTE* nextStep = ip0 + kStepIncr; -+ -+ /* Inner search loop */ -+ while (1) { -+ const BYTE* match = base + matchIndex; -+ const U32 repIndex = curr + 1 - offset_1; -+ const BYTE* repMatch = (repIndex < prefixStartIndex) ? -+ dictBase + (repIndex - dictIndexDelta) : -+ base + repIndex; -+ const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls); -+ size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls); -+ hashTable[hash0] = curr; /* update hash table */ -+ -+ if (((U32) ((prefixStartIndex - 1) - repIndex) >= -+ 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ -+ && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) { -+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -+ mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4; -+ ip0++; -+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); -+ break; -+ } -+ -+ if (dictTagsMatch) { -+ /* Found a possible dict match */ -+ const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; -+ const BYTE* dictMatch = dictBase + dictMatchIndex; -+ if (dictMatchIndex > dictStartIndex && -+ MEM_read32(dictMatch) == MEM_read32(ip0)) { -+ /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */ -+ if (matchIndex <= prefixStartIndex) { -+ U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta); -+ mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4; -+ while (((ip0 > anchor) & (dictMatch > dictStart)) -+ && (ip0[-1] == dictMatch[-1])) { -+ ip0--; -+ dictMatch--; -+ mLength++; -+ } /* catch up */ -+ offset_2 = offset_1; -+ offset_1 = offset; -+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); -+ break; -+ } -+ } -+ } -+ -+ if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) { -+ /* found a regular match */ -+ U32 const offset = (U32) (ip0 - match); -+ mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4; -+ while (((ip0 > anchor) & (match > prefixStart)) -+ && (ip0[-1] == match[-1])) { -+ ip0--; -+ match--; -+ mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); -+ break; - } -- } else if (MEM_read32(match) != MEM_read32(ip)) { -- /* it's not a match, and we're not going to check the dictionary */ -- assert(stepSize >= 1); -- ip += ((ip-anchor) >> kSearchStrength) + stepSize; -- continue; -- } else { -- /* found a regular match */ -- U32 const offset = (U32)(ip-match); -- mLength = ZSTD_count(ip+4, match+4, iend) + 4; -- while (((ip>anchor) & (match>prefixStart)) -- && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ -- offset_2 = offset_1; -- offset_1 = offset; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -- } -+ -+ /* Prepare for next iteration */ -+ dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS]; -+ dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1); -+ matchIndex = hashTable[hash1]; -+ -+ if (ip1 >= nextStep) { -+ step++; -+ nextStep += kStepIncr; -+ } -+ ip0 = ip1; -+ ip1 = ip1 + step; -+ if (ip1 > ilimit) goto _cleanup; -+ -+ curr = (U32)(ip0 - base); -+ hash0 = hash1; -+ } /* end inner search loop */ - - /* match found */ -- ip += mLength; -- anchor = ip; -+ assert(mLength); -+ ip0 += mLength; -+ anchor = ip0; - -- if (ip <= ilimit) { -+ if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+curr+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ -- hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); -+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - /* check immediate repcode */ -- while (ip <= ilimit) { -- U32 const current2 = (U32)(ip-base); -+ while (ip0 <= ilimit) { -+ U32 const current2 = (U32)(ip0-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) -- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { -+ && (MEM_read32(repMatch2) == MEM_read32(ip0))) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; -- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; -+ size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); -- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; -- ip += repLength2; -- anchor = ip; -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); -+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2; -+ ip0 += repLength2; -+ anchor = ip0; - continue; - } - break; - } - } -+ -+ /* Prepare for next iteration */ -+ assert(ip0 == anchor); -+ ip1 = ip0 + stepSize; - } - -+_cleanup: - /* save reps for next block */ -- rep[0] = offset_1 ? offset_1 : offsetSaved; -- rep[1] = offset_2 ? offset_2 : offsetSaved; -+ rep[0] = offset_1; -+ rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -@@ -545,7 +688,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState( - } - - --static size_t ZSTD_compressBlock_fast_extDict_generic( -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_fast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) - { -@@ -553,11 +698,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ -- U32 const stepSize = cParams->targetLength + !(cParams->targetLength); -+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const istart = (const BYTE*)src; -- const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); -@@ -570,6 +714,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=rep[0], offset_2=rep[1]; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; -+ -+ const BYTE* ip0 = istart; -+ const BYTE* ip1; -+ const BYTE* ip2; -+ const BYTE* ip3; -+ U32 current0; -+ -+ -+ size_t hash0; /* hash for ip0 */ -+ size_t hash1; /* hash for ip1 */ -+ U32 idx; /* match idx for ip0 */ -+ const BYTE* idxBase; /* base pointer for idx */ -+ -+ U32 offcode; -+ const BYTE* match0; -+ size_t mLength; -+ const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */ -+ -+ size_t step; -+ const BYTE* nextStep; -+ const size_t kStepIncr = (1 << (kSearchStrength - 1)); - - (void)hasStep; /* not currently specialized on whether it's accelerated */ - -@@ -579,75 +745,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); - -- /* Search Loop */ -- while (ip < ilimit) { /* < instead of <=, because (ip+1) */ -- const size_t h = ZSTD_hashPtr(ip, hlog, mls); -- const U32 matchIndex = hashTable[h]; -- const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; -- const BYTE* match = matchBase + matchIndex; -- const U32 curr = (U32)(ip-base); -- const U32 repIndex = curr + 1 - offset_1; -- const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; -- const BYTE* const repMatch = repBase + repIndex; -- hashTable[h] = curr; /* update hash table */ -- DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); -- -- if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ -- & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ -- && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { -- const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -- size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; -- ip++; -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); -- ip += rLength; -- anchor = ip; -- } else { -- if ( (matchIndex < dictStartIndex) || -- (MEM_read32(match) != MEM_read32(ip)) ) { -- assert(stepSize >= 1); -- ip += ((ip-anchor) >> kSearchStrength) + stepSize; -- continue; -+ { U32 const curr = (U32)(ip0 - base); -+ U32 const maxRep = curr - dictStartIndex; -+ if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0; -+ if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0; -+ } -+ -+ /* start each op */ -+_start: /* Requires: ip0 */ -+ -+ step = stepSize; -+ nextStep = ip0 + kStepIncr; -+ -+ /* calculate positions, ip0 - anchor == 0, so we skip step calc */ -+ ip1 = ip0 + 1; -+ ip2 = ip0 + step; -+ ip3 = ip2 + 1; -+ -+ if (ip3 >= ilimit) { -+ goto _cleanup; -+ } -+ -+ hash0 = ZSTD_hashPtr(ip0, hlog, mls); -+ hash1 = ZSTD_hashPtr(ip1, hlog, mls); -+ -+ idx = hashTable[hash0]; -+ idxBase = idx < prefixStartIndex ? dictBase : base; -+ -+ do { -+ { /* load repcode match for ip[2] */ -+ U32 const current2 = (U32)(ip2 - base); -+ U32 const repIndex = current2 - offset_1; -+ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; -+ U32 rval; -+ if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ -+ & (offset_1 > 0) ) { -+ rval = MEM_read32(repBase + repIndex); -+ } else { -+ rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ - } -- { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; -- const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; -- U32 const offset = curr - matchIndex; -- size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; -- while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ -- offset_2 = offset_1; offset_1 = offset; /* update offset history */ -- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); -- ip += mLength; -- anchor = ip; -+ -+ /* write back hash table entry */ -+ current0 = (U32)(ip0 - base); -+ hashTable[hash0] = current0; -+ -+ /* check repcode at ip[2] */ -+ if (MEM_read32(ip2) == rval) { -+ ip0 = ip2; -+ match0 = repBase + repIndex; -+ matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; -+ assert((match0 != prefixStart) & (match0 != dictStart)); -+ mLength = ip0[-1] == match0[-1]; -+ ip0 -= mLength; -+ match0 -= mLength; -+ offcode = REPCODE1_TO_OFFBASE; -+ mLength += 4; -+ goto _match; - } } - -- if (ip <= ilimit) { -- /* Fill Table */ -- hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; -- hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); -- /* check immediate repcode */ -- while (ip <= ilimit) { -- U32 const current2 = (U32)(ip-base); -- U32 const repIndex2 = current2 - offset_2; -- const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; -- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ -- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { -- const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; -- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; -- { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2); -- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; -- ip += repLength2; -- anchor = ip; -- continue; -- } -- break; -- } } } -+ { /* load match for ip[0] */ -+ U32 const mval = idx >= dictStartIndex ? -+ MEM_read32(idxBase + idx) : -+ MEM_read32(ip0) ^ 1; /* guaranteed not to match */ -+ -+ /* check match at ip[0] */ -+ if (MEM_read32(ip0) == mval) { -+ /* found a match! */ -+ goto _offset; -+ } } -+ -+ /* lookup ip[1] */ -+ idx = hashTable[hash1]; -+ idxBase = idx < prefixStartIndex ? dictBase : base; -+ -+ /* hash ip[2] */ -+ hash0 = hash1; -+ hash1 = ZSTD_hashPtr(ip2, hlog, mls); -+ -+ /* advance to next positions */ -+ ip0 = ip1; -+ ip1 = ip2; -+ ip2 = ip3; -+ -+ /* write back hash table entry */ -+ current0 = (U32)(ip0 - base); -+ hashTable[hash0] = current0; -+ -+ { /* load match for ip[0] */ -+ U32 const mval = idx >= dictStartIndex ? -+ MEM_read32(idxBase + idx) : -+ MEM_read32(ip0) ^ 1; /* guaranteed not to match */ -+ -+ /* check match at ip[0] */ -+ if (MEM_read32(ip0) == mval) { -+ /* found a match! */ -+ goto _offset; -+ } } -+ -+ /* lookup ip[1] */ -+ idx = hashTable[hash1]; -+ idxBase = idx < prefixStartIndex ? dictBase : base; -+ -+ /* hash ip[2] */ -+ hash0 = hash1; -+ hash1 = ZSTD_hashPtr(ip2, hlog, mls); -+ -+ /* advance to next positions */ -+ ip0 = ip1; -+ ip1 = ip2; -+ ip2 = ip0 + step; -+ ip3 = ip1 + step; -+ -+ /* calculate step */ -+ if (ip2 >= nextStep) { -+ step++; -+ PREFETCH_L1(ip1 + 64); -+ PREFETCH_L1(ip1 + 128); -+ nextStep += kStepIncr; -+ } -+ } while (ip3 < ilimit); -+ -+_cleanup: -+ /* Note that there are probably still a couple positions we could search. -+ * However, it seems to be a meaningful performance hit to try to search -+ * them. So let's not. */ -+ -+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), -+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; - - /* save reps for next block */ -- rep[0] = offset_1; -- rep[1] = offset_2; -+ rep[0] = offset_1 ? offset_1 : offsetSaved1; -+ rep[1] = offset_2 ? offset_2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -+ -+_offset: /* Requires: ip0, idx, idxBase */ -+ -+ /* Compute the offset code. */ -+ { U32 const offset = current0 - idx; -+ const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart; -+ matchEnd = idx < prefixStartIndex ? dictEnd : iend; -+ match0 = idxBase + idx; -+ offset_2 = offset_1; -+ offset_1 = offset; -+ offcode = OFFSET_TO_OFFBASE(offset); -+ mLength = 4; -+ -+ /* Count the backwards match length. */ -+ while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) { -+ ip0--; -+ match0--; -+ mLength++; -+ } } -+ -+_match: /* Requires: ip0, match0, offcode, matchEnd */ -+ -+ /* Count the forward length. */ -+ assert(matchEnd != 0); -+ mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart); -+ -+ ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); -+ -+ ip0 += mLength; -+ anchor = ip0; -+ -+ /* write next hash table entry */ -+ if (ip1 < ip0) { -+ hashTable[hash1] = (U32)(ip1 - base); -+ } -+ -+ /* Fill table and check for immediate repcode. */ -+ if (ip0 <= ilimit) { -+ /* Fill Table */ -+ assert(base+current0+2 > istart); /* check base overflow */ -+ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ -+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); -+ -+ while (ip0 <= ilimit) { -+ U32 const repIndex2 = (U32)(ip0-base) - offset_2; -+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; -+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */ -+ && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { -+ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; -+ size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; -+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ -+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); -+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); -+ ip0 += repLength2; -+ anchor = ip0; -+ continue; -+ } -+ break; -+ } } -+ -+ goto _start; - } - - ZSTD_GEN_FAST_FN(extDict, 4, 0) -@@ -660,6 +953,7 @@ size_t ZSTD_compressBlock_fast_extDict( - void const* src, size_t srcSize) - { - U32 const mls = ms->cParams.minMatch; -+ assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ -diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h -index fddc2f532d21..e64d9e1b2d39 100644 ---- a/lib/zstd/compress/zstd_fast.h -+++ b/lib/zstd/compress/zstd_fast.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -16,7 +17,8 @@ - #include "zstd_compress_internal.h" - - void ZSTD_fillHashTable(ZSTD_matchState_t* ms, -- void const* end, ZSTD_dictTableLoadMethod_e dtlm); -+ void const* end, ZSTD_dictTableLoadMethod_e dtlm, -+ ZSTD_tableFillPurpose_e tfp); - size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c -index 0298a01a7504..3e88d8a1a136 100644 ---- a/lib/zstd/compress/zstd_lazy.c -+++ b/lib/zstd/compress/zstd_lazy.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -10,14 +11,23 @@ - - #include "zstd_compress_internal.h" - #include "zstd_lazy.h" -+#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ -+ -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) -+ -+#define kLazySkippingStep 8 - - - /*-************************************* - * Binary Tree search - ***************************************/ - --static void --ZSTD_updateDUBT(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_updateDUBT(ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iend, - U32 mls) - { -@@ -60,8 +70,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms, - * sort one already inserted but unsorted position - * assumption : curr >= btlow == (curr - btmask) - * doesn't fail */ --static void --ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, - U32 curr, const BYTE* inputEnd, - U32 nbCompares, U32 btLow, - const ZSTD_dictMode_e dictMode) -@@ -149,8 +160,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, - } - - --static size_t --ZSTD_DUBT_findBetterDictMatch ( -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_DUBT_findBetterDictMatch ( - const ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, -@@ -197,8 +209,8 @@ ZSTD_DUBT_findBetterDictMatch ( - U32 matchIndex = dictMatchIndex + dictIndexDelta; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { - DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", -- curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex); -- bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex); -+ bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); - } - if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ -@@ -218,7 +230,7 @@ ZSTD_DUBT_findBetterDictMatch ( - } - - if (bestLength >= MINMATCH) { -- U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; -+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - curr, (U32)bestLength, (U32)*offsetPtr, mIndex); - } -@@ -227,10 +239,11 @@ ZSTD_DUBT_findBetterDictMatch ( - } - - --static size_t --ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, -- size_t* offsetPtr, -+ size_t* offBasePtr, - U32 const mls, - const ZSTD_dictMode_e dictMode) - { -@@ -327,8 +340,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; -- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) -- bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) ) -+ bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - if (dictMode == ZSTD_dictMatchState) { - nbCompares = 0; /* in addition to avoiding checking any -@@ -361,16 +374,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - if (dictMode == ZSTD_dictMatchState && nbCompares) { - bestLength = ZSTD_DUBT_findBetterDictMatch( - ms, ip, iend, -- offsetPtr, bestLength, nbCompares, -+ offBasePtr, bestLength, nbCompares, - mls, dictMode); - } - - assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - if (bestLength >= MINMATCH) { -- U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; -+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", -- curr, (U32)bestLength, (U32)*offsetPtr, mIndex); -+ curr, (U32)bestLength, (U32)*offBasePtr, mIndex); - } - return bestLength; - } -@@ -378,17 +391,18 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - - - /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */ --FORCE_INLINE_TEMPLATE size_t --ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, -- size_t* offsetPtr, -+ size_t* offBasePtr, - const U32 mls /* template */, - const ZSTD_dictMode_e dictMode) - { - DEBUGLOG(7, "ZSTD_BtFindBestMatch"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateDUBT(ms, ip, iLimit, mls); -- return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); -+ return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode); - } - - /* ********************************* -@@ -561,7 +575,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); - if (ip+currentMl == iLimit) { - /* best possible, avoids read overflow on next attempt */ - return ml; -@@ -598,7 +612,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } -@@ -614,10 +628,12 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb - - /* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ --FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, -- const BYTE* ip, U32 const mls) -+ const BYTE* ip, U32 const mls, U32 const lazySkipping) - { - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; -@@ -632,6 +648,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; -+ /* Stop inserting every position when in the lazy skipping mode. */ -+ if (lazySkipping) -+ break; - } - - ms->nextToUpdate = target; -@@ -640,11 +659,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( - - U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; -- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0); - } - - /* inlining is important to hardwire a hot branch (template emulation) */ - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_HcFindBestMatch( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, -@@ -684,14 +704,15 @@ size_t ZSTD_HcFindBestMatch( - } - - /* HC4 match finder */ -- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); -+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping); - - for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ -- if (match[ml] == ip[ml]) /* potentially better */ -+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ -+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; -@@ -703,7 +724,7 @@ size_t ZSTD_HcFindBestMatch( - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - -@@ -739,7 +760,7 @@ size_t ZSTD_HcFindBestMatch( - if (currentMl > ml) { - ml = currentMl; - assert(curr > matchIndex + dmsIndexDelta); -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - -@@ -756,8 +777,6 @@ size_t ZSTD_HcFindBestMatch( - * (SIMD) Row-based matchfinder - ***********************************/ - /* Constants for row-based hash */ --#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ --#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ - #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) - #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ - -@@ -769,64 +788,19 @@ typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 repr - * Starting from the LSB, returns the idx of the next non-zero bit. - * Basically counting the nb of trailing zeroes. - */ --static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { -- assert(val != 0); --# if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)))) -- if (sizeof(size_t) == 4) { -- U32 mostSignificantWord = (U32)(val >> 32); -- U32 leastSignificantWord = (U32)val; -- if (leastSignificantWord == 0) { -- return 32 + (U32)__builtin_ctz(mostSignificantWord); -- } else { -- return (U32)__builtin_ctz(leastSignificantWord); -- } -- } else { -- return (U32)__builtin_ctzll(val); -- } --# else -- /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count -- * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer -- */ -- val = ~val & (val - 1ULL); /* Lowest set bit mask */ -- val = val - ((val >> 1) & 0x5555555555555555); -- val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); -- return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); --# endif --} -- --/* ZSTD_rotateRight_*(): -- * Rotates a bitfield to the right by "count" bits. -- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts -- */ --FORCE_INLINE_TEMPLATE --U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { -- assert(count < 64); -- count &= 0x3F; /* for fickle pattern recognition */ -- return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); --} -- --FORCE_INLINE_TEMPLATE --U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { -- assert(count < 32); -- count &= 0x1F; /* for fickle pattern recognition */ -- return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); --} -- --FORCE_INLINE_TEMPLATE --U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { -- assert(count < 16); -- count &= 0x0F; /* for fickle pattern recognition */ -- return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); -+MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) { -+ return ZSTD_countTrailingZeros64(val); - } - - /* ZSTD_row_nextIndex(): - * Returns the next index to insert at within a tagTable row, and updates the "head" -- * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) -+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row}) - */ - FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { -- U32 const next = (*tagRow - 1) & rowMask; -- *tagRow = (BYTE)next; -- return next; -+ U32 next = (*tagRow-1) & rowMask; -+ next += (next == 0) ? rowMask : 0; /* skip first position */ -+ *tagRow = (BYTE)next; -+ return next; - } - - /* ZSTD_isAligned(): -@@ -840,7 +814,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { - /* ZSTD_row_prefetch(): - * Performs prefetching for the hashTable and tagTable at a given row. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { -+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) { - PREFETCH_L1(hashTable + relRow); - if (rowLog >= 5) { - PREFETCH_L1(hashTable + relRow + 16); -@@ -859,18 +833,20 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* ta - * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, - * but not beyond iLimit. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, - U32 const rowLog, U32 const mls, - U32 idx, const BYTE* const iLimit) - { - U32 const* const hashTable = ms->hashTable; -- U16 const* const tagTable = ms->tagTable; -+ BYTE const* const tagTable = ms->tagTable; - U32 const hashLog = ms->rowHashLog; - U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); - U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); - - for (; idx < lim; ++idx) { -- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); -+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); - U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); - ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; -@@ -885,12 +861,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B - * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at - * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. - */ --FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, -- U16 const* tagTable, BYTE const* base, -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, -+ BYTE const* tagTable, BYTE const* base, - U32 idx, U32 const hashLog, -- U32 const rowLog, U32 const mls) -+ U32 const rowLog, U32 const mls, -+ U64 const hashSalt) - { -- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); -+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); - U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); - { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; -@@ -902,28 +881,29 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab - /* ZSTD_row_update_internalImpl(): - * Updates the hash table with positions starting from updateStartIdx until updateEndIdx. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, -- U32 updateStartIdx, U32 const updateEndIdx, -- U32 const mls, U32 const rowLog, -- U32 const rowMask, U32 const useCache) -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, -+ U32 updateStartIdx, U32 const updateEndIdx, -+ U32 const mls, U32 const rowLog, -+ U32 const rowMask, U32 const useCache) - { - U32* const hashTable = ms->hashTable; -- U16* const tagTable = ms->tagTable; -+ BYTE* const tagTable = ms->tagTable; - U32 const hashLog = ms->rowHashLog; - const BYTE* const base = ms->window.base; - - DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx); - for (; updateStartIdx < updateEndIdx; ++updateStartIdx) { -- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls) -- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); -+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt) -+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); - U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - U32* const row = hashTable + relRow; -- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. -- Explicit cast allows us to get exact desired position within each row */ -+ BYTE* tagRow = tagTable + relRow; - U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); - -- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); -- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; -+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt)); -+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK; - row[pos] = updateStartIdx; - } - } -@@ -932,9 +912,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, - * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. - * Skips sections of long matches as is necessary. - */ --FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, -- U32 const mls, U32 const rowLog, -- U32 const rowMask, U32 const useCache) -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, -+ U32 const mls, U32 const rowLog, -+ U32 const rowMask, U32 const useCache) - { - U32 idx = ms->nextToUpdate; - const BYTE* const base = ms->window.base; -@@ -971,7 +953,35 @@ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { - const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); - - DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); -- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); -+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */); -+} -+ -+/* Returns the mask width of bits group of which will be set to 1. Given not all -+ * architectures have easy movemask instruction, this helps to iterate over -+ * groups of bits easier and faster. -+ */ -+FORCE_INLINE_TEMPLATE U32 -+ZSTD_row_matchMaskGroupWidth(const U32 rowEntries) -+{ -+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); -+ assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); -+ (void)rowEntries; -+#if defined(ZSTD_ARCH_ARM_NEON) -+ /* NEON path only works for little endian */ -+ if (!MEM_isLittleEndian()) { -+ return 1; -+ } -+ if (rowEntries == 16) { -+ return 4; -+ } -+ if (rowEntries == 32) { -+ return 2; -+ } -+ if (rowEntries == 64) { -+ return 1; -+ } -+#endif -+ return 1; - } - - #if defined(ZSTD_ARCH_X86_SSE2) -@@ -994,71 +1004,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U - } - #endif - --/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches -- * the hash at the nth position in a row of the tagTable. -- * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield -- * to match up with the actual layout of the entries within the hashTable */ -+#if defined(ZSTD_ARCH_ARM_NEON) -+FORCE_INLINE_TEMPLATE ZSTD_VecMask -+ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped) -+{ -+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); -+ if (rowEntries == 16) { -+ /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits. -+ * After that groups of 4 bits represent the equalMask. We lower -+ * all bits except the highest in these groups by doing AND with -+ * 0x88 = 0b10001000. -+ */ -+ const uint8x16_t chunk = vld1q_u8(src); -+ const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag))); -+ const uint8x8_t res = vshrn_n_u16(equalMask, 4); -+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0); -+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull; -+ } else if (rowEntries == 32) { -+ /* Same idea as with rowEntries == 16 but doing AND with -+ * 0x55 = 0b01010101. -+ */ -+ const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src); -+ const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]); -+ const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]); -+ const uint8x16_t dup = vdupq_n_u8(tag); -+ const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6); -+ const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6); -+ const uint8x8_t res = vsli_n_u8(t0, t1, 4); -+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ; -+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull; -+ } else { /* rowEntries == 64 */ -+ const uint8x16x4_t chunk = vld4q_u8(src); -+ const uint8x16_t dup = vdupq_n_u8(tag); -+ const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup); -+ const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup); -+ const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup); -+ const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup); -+ -+ const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1); -+ const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1); -+ const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2); -+ const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); -+ const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); -+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0); -+ return ZSTD_rotateRight_U64(matches, headGrouped); -+ } -+} -+#endif -+ -+/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by -+ * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag" -+ * matches the hash at the nth position in a row of the tagTable. -+ * Each row is a circular buffer beginning at the value of "headGrouped". So we -+ * must rotate the "matches" bitfield to match up with the actual layout of the -+ * entries within the hashTable */ - FORCE_INLINE_TEMPLATE ZSTD_VecMask --ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) -+ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries) - { -- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET; -+ const BYTE* const src = tagRow; - assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); - assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); -+ assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8); - - #if defined(ZSTD_ARCH_X86_SSE2) - -- return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head); -+ return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped); - - #else /* SW or NEON-LE */ - - # if defined(ZSTD_ARCH_ARM_NEON) - /* This NEON path only works for little endian - otherwise use SWAR below */ - if (MEM_isLittleEndian()) { -- if (rowEntries == 16) { -- const uint8x16_t chunk = vld1q_u8(src); -- const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag))); -- const uint16x8_t t0 = vshlq_n_u16(equalMask, 7); -- const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14)); -- const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14)); -- const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28)); -- const U16 hi = (U16)vgetq_lane_u8(t3, 8); -- const U16 lo = (U16)vgetq_lane_u8(t3, 0); -- return ZSTD_rotateRight_U16((hi << 8) | lo, head); -- } else if (rowEntries == 32) { -- const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src); -- const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]); -- const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]); -- const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag)); -- const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag)); -- const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0)); -- const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1)); -- const uint8x8_t t0 = vreinterpret_u8_s8(pack0); -- const uint8x8_t t1 = vreinterpret_u8_s8(pack1); -- const uint8x8_t t2 = vsri_n_u8(t1, t0, 2); -- const uint8x8x2_t t3 = vuzp_u8(t2, t0); -- const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4); -- const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0); -- return ZSTD_rotateRight_U32(matches, head); -- } else { /* rowEntries == 64 */ -- const uint8x16x4_t chunk = vld4q_u8(src); -- const uint8x16_t dup = vdupq_n_u8(tag); -- const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup); -- const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup); -- const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup); -- const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup); -- -- const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1); -- const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1); -- const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2); -- const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); -- const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); -- const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0); -- return ZSTD_rotateRight_U64(matches, head); -- } -+ return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped); - } - # endif /* ZSTD_ARCH_ARM_NEON */ - /* SWAR */ -- { const size_t chunkSize = sizeof(size_t); -+ { const int chunkSize = sizeof(size_t); - const size_t shiftAmount = ((chunkSize * 8) - chunkSize); - const size_t xFF = ~((size_t)0); - const size_t x01 = xFF / 0xFF; -@@ -1091,11 +1112,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, - } - matches = ~matches; - if (rowEntries == 16) { -- return ZSTD_rotateRight_U16((U16)matches, head); -+ return ZSTD_rotateRight_U16((U16)matches, headGrouped); - } else if (rowEntries == 32) { -- return ZSTD_rotateRight_U32((U32)matches, head); -+ return ZSTD_rotateRight_U32((U32)matches, headGrouped); - } else { -- return ZSTD_rotateRight_U64((U64)matches, head); -+ return ZSTD_rotateRight_U64((U64)matches, headGrouped); - } - } - #endif -@@ -1103,20 +1124,21 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, - - /* The high-level approach of the SIMD row based match finder is as follows: - * - Figure out where to insert the new entry: -- * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" -- * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines -+ * - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index. -+ * - The hash is salted by a value that changes on every contex reset, so when the same table is used -+ * we will avoid collisions that would otherwise slow us down by intorducing phantom matches. -+ * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines - * which row to insert into. -- * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can -- * be considered as a circular buffer with a "head" index that resides in the tagTable. -- * - Also insert the "tag" into the equivalent row and position in the tagTable. -- * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. -- * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, -- * for alignment/performance reasons, leaving some bytes unused. -- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and -+ * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can -+ * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes -+ * per row). -+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and - * generate a bitfield that we can cycle through to check the collisions in the hash table. - * - Pick the longest match. -+ * - Insert the tag into the equivalent row and position in the tagTable. - */ - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_RowFindBestMatch( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, -@@ -1125,7 +1147,7 @@ size_t ZSTD_RowFindBestMatch( - const U32 rowLog) - { - U32* const hashTable = ms->hashTable; -- U16* const tagTable = ms->tagTable; -+ BYTE* const tagTable = ms->tagTable; - U32* const hashCache = ms->hashCache; - const U32 hashLog = ms->rowHashLog; - const ZSTD_compressionParameters* const cParams = &ms->cParams; -@@ -1143,8 +1165,11 @@ size_t ZSTD_RowFindBestMatch( - const U32 rowEntries = (1U << rowLog); - const U32 rowMask = rowEntries - 1; - const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ -+ const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries); -+ const U64 hashSalt = ms->hashSalt; - U32 nbAttempts = 1U << cappedSearchLog; - size_t ml=4-1; -+ U32 hash; - - /* DMS/DDS variables that may be referenced laster */ - const ZSTD_matchState_t* const dms = ms->dictMatchState; -@@ -1168,7 +1193,7 @@ size_t ZSTD_RowFindBestMatch( - if (dictMode == ZSTD_dictMatchState) { - /* Prefetch DMS rows */ - U32* const dmsHashTable = dms->hashTable; -- U16* const dmsTagTable = dms->tagTable; -+ BYTE* const dmsTagTable = dms->tagTable; - U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); - U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; -@@ -1178,23 +1203,34 @@ size_t ZSTD_RowFindBestMatch( - } - - /* Update the hashTable and tagTable up to (but not including) ip */ -- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); -+ if (!ms->lazySkipping) { -+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); -+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt); -+ } else { -+ /* Stop inserting every position when in the lazy skipping mode. -+ * The hash cache is also not kept up to date in this mode. -+ */ -+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); -+ ms->nextToUpdate = curr; -+ } -+ ms->hashSaltEntropy += hash; /* collect salt entropy */ -+ - { /* Get the hash for ip, compute the appropriate row */ -- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); - U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; - U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; - U32* const row = hashTable + relRow; - BYTE* tagRow = (BYTE*)(tagTable + relRow); -- U32 const head = *tagRow & rowMask; -+ U32 const headGrouped = (*tagRow & rowMask) * groupWidth; - U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; - size_t numMatches = 0; - size_t currMatch = 0; -- ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); -+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries); - - /* Cycle through the matches and prefetch */ -- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { -- U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; -+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { -+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; - U32 const matchIndex = row[matchPos]; -+ if(matchPos == 0) continue; - assert(numMatches < rowEntries); - if (matchIndex < lowLimit) - break; -@@ -1204,13 +1240,14 @@ size_t ZSTD_RowFindBestMatch( - PREFETCH_L1(dictBase + matchIndex); - } - matchBuffer[numMatches++] = matchIndex; -+ --nbAttempts; - } - - /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop - in ZSTD_row_update_internal() at the next search. */ - { - U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); -- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; -+ tagRow[pos] = (BYTE)tag; - row[pos] = ms->nextToUpdate++; - } - -@@ -1224,7 +1261,8 @@ size_t ZSTD_RowFindBestMatch( - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ -- if (match[ml] == ip[ml]) /* potentially better */ -+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ -+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; -@@ -1236,7 +1274,7 @@ size_t ZSTD_RowFindBestMatch( - /* Save best solution */ - if (currentMl > ml) { - ml = currentMl; -- *offsetPtr = STORE_OFFSET(curr - matchIndex); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } -@@ -1254,19 +1292,21 @@ size_t ZSTD_RowFindBestMatch( - const U32 dmsSize = (U32)(dmsEnd - dmsBase); - const U32 dmsIndexDelta = dictLimit - dmsSize; - -- { U32 const head = *dmsTagRow & rowMask; -+ { U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth; - U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; - size_t numMatches = 0; - size_t currMatch = 0; -- ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); -+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries); - -- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { -- U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; -+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { -+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; - U32 const matchIndex = dmsRow[matchPos]; -+ if(matchPos == 0) continue; - if (matchIndex < dmsLowestIndex) - break; - PREFETCH_L1(dmsBase + matchIndex); - matchBuffer[numMatches++] = matchIndex; -+ --nbAttempts; - } - - /* Return the longest match */ -@@ -1285,7 +1325,7 @@ size_t ZSTD_RowFindBestMatch( - if (currentMl > ml) { - ml = currentMl; - assert(curr > matchIndex + dmsIndexDelta); -- *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); -+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); - if (ip+currentMl == iLimit) break; - } - } -@@ -1472,8 +1512,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( - * Common parser - lazy strategy - *********************************/ - --FORCE_INLINE_TEMPLATE size_t --ZSTD_compressBlock_lazy_generic( -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_compressBlock_lazy_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, -@@ -1491,7 +1532,8 @@ ZSTD_compressBlock_lazy_generic( - const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); - const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - -- U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; -+ U32 offset_1 = rep[0], offset_2 = rep[1]; -+ U32 offsetSaved1 = 0, offsetSaved2 = 0; - - const int isDMS = dictMode == ZSTD_dictMatchState; - const int isDDS = dictMode == ZSTD_dedicatedDictSearch; -@@ -1512,8 +1554,8 @@ ZSTD_compressBlock_lazy_generic( - U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); - U32 const maxRep = curr - windowLow; -- if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; -- if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; -+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; -+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; - } - if (isDxS) { - /* dictMatchState repCode checks don't currently handle repCode == 0 -@@ -1522,10 +1564,11 @@ ZSTD_compressBlock_lazy_generic( - assert(offset_2 <= dictAndPrefixLength); - } - -+ /* Reset the lazy skipping state */ -+ ms->lazySkipping = 0; -+ - if (searchMethod == search_rowHash) { -- ZSTD_row_fillHashCache(ms, base, rowLog, -- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), -- ms->nextToUpdate, ilimit); -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); - } - - /* Match Loop */ -@@ -1537,7 +1580,7 @@ ZSTD_compressBlock_lazy_generic( - #endif - while (ip < ilimit) { - size_t matchLength=0; -- size_t offcode=STORE_REPCODE_1; -+ size_t offBase = REPCODE1_TO_OFFBASE; - const BYTE* start=ip+1; - DEBUGLOG(7, "search baseline (depth 0)"); - -@@ -1562,14 +1605,23 @@ ZSTD_compressBlock_lazy_generic( - } - - /* first search (depth 0) */ -- { size_t offsetFound = 999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode); -+ { size_t offbaseFound = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode); - if (ml2 > matchLength) -- matchLength = ml2, start = ip, offcode=offsetFound; -+ matchLength = ml2, start = ip, offBase = offbaseFound; - } - - if (matchLength < 4) { -- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ -+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */; -+ ip += step; -+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. -+ * In this mode we stop inserting every position into our tables, and only insert -+ * positions that we search, which is one in step positions. -+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high, -+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets -+ * triggered once we've gone 2KB without finding any matches. -+ */ -+ ms->lazySkipping = step > kLazySkippingStep; - continue; - } - -@@ -1579,12 +1631,12 @@ ZSTD_compressBlock_lazy_generic( - DEBUGLOG(7, "search depth 1"); - ip ++; - if ( (dictMode == ZSTD_noDict) -- && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { -+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 3); -- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - if (isDxS) { - const U32 repIndex = (U32)(ip - base) - offset_1; -@@ -1596,17 +1648,17 @@ ZSTD_compressBlock_lazy_generic( - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 3); -- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - } -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); -+ { size_t ofbCandidate=999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; /* search a better one */ - } } - -@@ -1615,12 +1667,12 @@ ZSTD_compressBlock_lazy_generic( - DEBUGLOG(7, "search depth 2"); - ip ++; - if ( (dictMode == ZSTD_noDict) -- && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { -+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 4); -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - if (isDxS) { - const U32 repIndex = (U32)(ip - base) - offset_1; -@@ -1632,17 +1684,17 @@ ZSTD_compressBlock_lazy_generic( - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 4); -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) -- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; - } - } -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); -+ { size_t ofbCandidate=999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ -@@ -1653,26 +1705,33 @@ ZSTD_compressBlock_lazy_generic( - * notably if `value` is unsigned, resulting in a large positive `-value`. - */ - /* catch up */ -- if (STORED_IS_OFFSET(offcode)) { -+ if (OFFBASE_IS_OFFSET(offBase)) { - if (dictMode == ZSTD_noDict) { -- while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) -- && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ -+ while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest)) -+ && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */ - { start--; matchLength++; } - } - if (isDxS) { -- U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); -+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); - const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; - const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - } -- offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); -+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); - } - /* store sequence */ - _storeSequence: - { size_t const litLength = (size_t)(start - anchor); -- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); -+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); - anchor = ip = start + matchLength; - } -+ if (ms->lazySkipping) { -+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ -+ if (searchMethod == search_rowHash) { -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); -+ } -+ ms->lazySkipping = 0; -+ } - - /* check immediate repcode */ - if (isDxS) { -@@ -1686,8 +1745,8 @@ ZSTD_compressBlock_lazy_generic( - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; -- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset_2 <=> offset_1 */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); -+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */ -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); - ip += matchLength; - anchor = ip; - continue; -@@ -1701,166 +1760,181 @@ ZSTD_compressBlock_lazy_generic( - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; -- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); -+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */ -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - -- /* Save reps for next block */ -- rep[0] = offset_1 ? offset_1 : savedOffset; -- rep[1] = offset_2 ? offset_2 : savedOffset; -+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), -+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ -+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; -+ -+ /* save reps for next block */ -+ rep[0] = offset_1 ? offset_1 : offsetSaved1; -+ rep[1] = offset_2 ? offset_2 : offsetSaved2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); - } -+#endif /* build exclusions */ - - --size_t ZSTD_compressBlock_btlazy2( -+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_lazy2( -+size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_lazy( -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); - } - --size_t ZSTD_compressBlock_greedy( -+size_t ZSTD_compressBlock_greedy_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_btlazy2_dictMatchState( -+size_t ZSTD_compressBlock_greedy_dictMatchState_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_lazy2_dictMatchState( -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); - } -+#endif - --size_t ZSTD_compressBlock_lazy_dictMatchState( -+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dictMatchState( -+size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); - } - -- --size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); - } - --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( -+size_t ZSTD_compressBlock_lazy_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( -+size_t ZSTD_compressBlock_lazy_dictMatchState_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); - } - --/* Row-based matchfinder */ --size_t ZSTD_compressBlock_lazy2_row( -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); - } -+#endif - --size_t ZSTD_compressBlock_lazy_row( -+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_row( -+size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_lazy2_dictMatchState_row( -+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); - } - --size_t ZSTD_compressBlock_lazy_dictMatchState_row( -+size_t ZSTD_compressBlock_lazy2_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dictMatchState_row( -+size_t ZSTD_compressBlock_lazy2_dictMatchState_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); - } - -- - size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); - } -+#endif - --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( -+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); - } - --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( -+size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - { -- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); -+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); - } -+#endif - -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_compressBlock_lazy_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], -@@ -1886,12 +1960,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); - -+ /* Reset the lazy skipping state */ -+ ms->lazySkipping = 0; -+ - /* init */ - ip += (ip == prefixStart); - if (searchMethod == search_rowHash) { -- ZSTD_row_fillHashCache(ms, base, rowLog, -- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), -- ms->nextToUpdate, ilimit); -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); - } - - /* Match Loop */ -@@ -1903,7 +1978,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - #endif - while (ip < ilimit) { - size_t matchLength=0; -- size_t offcode=STORE_REPCODE_1; -+ size_t offBase = REPCODE1_TO_OFFBASE; - const BYTE* start=ip+1; - U32 curr = (U32)(ip-base); - -@@ -1922,14 +1997,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - } } - - /* first search (depth 0) */ -- { size_t offsetFound = 999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict); -+ { size_t ofbCandidate = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); - if (ml2 > matchLength) -- matchLength = ml2, start = ip, offcode=offsetFound; -+ matchLength = ml2, start = ip, offBase = ofbCandidate; - } - - if (matchLength < 4) { -- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ -+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength); -+ ip += step + 1; /* jump faster over incompressible sections */ -+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. -+ * In this mode we stop inserting every position into our tables, and only insert -+ * positions that we search, which is one in step positions. -+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high, -+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets -+ * triggered once we've gone 2KB without finding any matches. -+ */ -+ ms->lazySkipping = step > kLazySkippingStep; - continue; - } - -@@ -1939,7 +2023,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - ip ++; - curr++; - /* check repCode */ -- if (offcode) { -+ if (offBase) { - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); - const U32 repIndex = (U32)(curr - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; -@@ -1951,18 +2035,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 3); -- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); - if ((repLength >= 4) && (gain2 > gain1)) -- matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; - } } - - /* search match, depth 1 */ -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); -+ { size_t ofbCandidate = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; /* search a better one */ - } } - -@@ -1971,7 +2055,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - ip ++; - curr++; - /* check repCode */ -- if (offcode) { -+ if (offBase) { - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); - const U32 repIndex = (U32)(curr - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; -@@ -1983,38 +2067,45 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 4); -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); - if ((repLength >= 4) && (gain2 > gain1)) -- matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; -+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; - } } - - /* search match, depth 2 */ -- { size_t offset2=999999999; -- size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); -- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ -- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); -+ { size_t ofbCandidate = 999999999; -+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); -+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ -+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { -- matchLength = ml2, offcode = offset2, start = ip; -+ matchLength = ml2, offBase = ofbCandidate, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ -- if (STORED_IS_OFFSET(offcode)) { -- U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); -+ if (OFFBASE_IS_OFFSET(offBase)) { -+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ -- offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); -+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); - } - - /* store sequence */ - _storeSequence: - { size_t const litLength = (size_t)(start - anchor); -- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); -+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); - anchor = ip = start + matchLength; - } -+ if (ms->lazySkipping) { -+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ -+ if (searchMethod == search_rowHash) { -+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); -+ } -+ ms->lazySkipping = 0; -+ } - - /* check immediate repcode */ - while (ip <= ilimit) { -@@ -2029,8 +2120,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; -- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset history */ -- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); -+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */ -+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ -@@ -2045,8 +2136,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( - /* Return the last literals size */ - return (size_t)(iend - anchor); - } -+#endif /* build exclusions */ - -- -+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -@@ -2054,49 +2146,55 @@ size_t ZSTD_compressBlock_greedy_extDict( - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); - } - --size_t ZSTD_compressBlock_lazy_extDict( -+size_t ZSTD_compressBlock_greedy_extDict_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -- - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); - } -+#endif - --size_t ZSTD_compressBlock_lazy2_extDict( -+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); - } - --size_t ZSTD_compressBlock_btlazy2_extDict( -+size_t ZSTD_compressBlock_lazy_extDict_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); - } -+#endif - --size_t ZSTD_compressBlock_greedy_extDict_row( -+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -+ - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); - } - --size_t ZSTD_compressBlock_lazy_extDict_row( -+size_t ZSTD_compressBlock_lazy2_extDict_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -- - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); - } -+#endif - --size_t ZSTD_compressBlock_lazy2_extDict_row( -+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - - { -- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); -+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); - } -+#endif -diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h -index e5bdf4df8dde..22c9201f4e63 100644 ---- a/lib/zstd/compress/zstd_lazy.h -+++ b/lib/zstd/compress/zstd_lazy.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -22,98 +23,175 @@ - */ - #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 - -+#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ -+ -+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) - U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); - void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); - - void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); - - void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ -+#endif - --size_t ZSTD_compressBlock_btlazy2( -+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2( -+size_t ZSTD_compressBlock_greedy_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy( -+size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy( -+size_t ZSTD_compressBlock_greedy_dictMatchState_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_row( -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_row( -+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_row( -+size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_btlazy2_dictMatchState( -+size_t ZSTD_compressBlock_greedy_extDict_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_dictMatchState( -+ -+#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy -+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row -+#else -+#define ZSTD_COMPRESSBLOCK_GREEDY NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL -+#endif -+ -+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_dictMatchState( -+size_t ZSTD_compressBlock_lazy_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dictMatchState( -+size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_dictMatchState_row( -+size_t ZSTD_compressBlock_lazy_dictMatchState_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_dictMatchState_row( -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dictMatchState_row( -+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( -+size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( -+size_t ZSTD_compressBlock_lazy_extDict_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( -+ -+#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy -+#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row -+#else -+#define ZSTD_COMPRESSBLOCK_LAZY NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL -+#endif -+ -+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( -+size_t ZSTD_compressBlock_lazy2_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( -+size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( -+size_t ZSTD_compressBlock_lazy2_dictMatchState_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_greedy_extDict( -+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_extDict( -+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_greedy_extDict_row( -+size_t ZSTD_compressBlock_lazy2_extDict_row( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy_extDict_row( -+ -+#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 -+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row -+#else -+#define ZSTD_COMPRESSBLOCK_LAZY2 NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL -+#endif -+ -+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_lazy2_extDict_row( -+size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- -+ -+#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict -+#else -+#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL -+#endif -+ - - - #endif /* ZSTD_LAZY_H */ -diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c -index dd86fc83e7dd..07f3bc6437ce 100644 ---- a/lib/zstd/compress/zstd_ldm.c -+++ b/lib/zstd/compress/zstd_ldm.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -242,11 +243,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, - switch(ms->cParams.strategy) - { - case ZSTD_fast: -- ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); -+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); - break; - - case ZSTD_dfast: -- ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); -+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); -+#else -+ assert(0); /* shouldn't be called: cparams should've been adjusted. */ -+#endif - break; - - case ZSTD_greedy: -@@ -318,7 +323,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) - } - } - --static size_t ZSTD_ldm_generateSequences_internal( -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_ldm_generateSequences_internal( - ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, - ldmParams_t const* params, void const* src, size_t srcSize) - { -@@ -549,7 +556,7 @@ size_t ZSTD_ldm_generateSequences( - * the window through early invalidation. - * TODO: * Test the chunk size. - * * Try invalidation after the sequence generation and test the -- * the offset against maxDist directly. -+ * offset against maxDist directly. - * - * NOTE: Because of dictionaries + sequence splitting we MUST make sure - * that any offset used is valid at the END of the sequence, since it may -@@ -689,7 +696,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - /* maybeSplitSequence updates rawSeqStore->pos */ - rawSeq const sequence = maybeSplitSequence(rawSeqStore, - (U32)(iend - ip), minMatch); -- int i; - /* End signal */ - if (sequence.offset == 0) - break; -@@ -702,6 +708,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - /* Run the block compressor */ - DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); - { -+ int i; - size_t const newLitLength = - blockCompressor(ms, seqStore, rep, ip, sequence.litLength); - ip += sequence.litLength; -@@ -711,7 +718,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - rep[0] = sequence.offset; - /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, -- STORE_OFFSET(sequence.offset), -+ OFFSET_TO_OFFBASE(sequence.offset), - sequence.matchLength); - ip += sequence.matchLength; - } -diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h -index fbc6a5e88fd7..c540731abde7 100644 ---- a/lib/zstd/compress/zstd_ldm.h -+++ b/lib/zstd/compress/zstd_ldm.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h -index 647f865be290..cfccfc46f6f7 100644 ---- a/lib/zstd/compress/zstd_ldm_geartab.h -+++ b/lib/zstd/compress/zstd_ldm_geartab.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c -index fd82acfda62f..a87b66ac8d24 100644 ---- a/lib/zstd/compress/zstd_opt.c -+++ b/lib/zstd/compress/zstd_opt.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -12,11 +13,14 @@ - #include "hist.h" - #include "zstd_opt.h" - -+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) - - #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ - #define ZSTD_MAX_PRICE (1<<30) - --#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ -+#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ - - - /*-************************************* -@@ -26,27 +30,35 @@ - #if 0 /* approximation at bit level (for tests) */ - # define BITCOST_ACCURACY 0 - # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) --# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) -+# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) - #elif 0 /* fractional bit accuracy (for tests) */ - # define BITCOST_ACCURACY 8 - # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) --# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) -+# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) - #else /* opt==approx, ultra==accurate */ - # define BITCOST_ACCURACY 8 - # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) --# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) -+# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) - #endif - -+/* ZSTD_bitWeight() : -+ * provide estimated "cost" of a stat in full bits only */ - MEM_STATIC U32 ZSTD_bitWeight(U32 stat) - { - return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); - } - -+/* ZSTD_fracWeight() : -+ * provide fractional-bit "cost" of a stat, -+ * using linear interpolation approximation */ - MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) - { - U32 const stat = rawStat + 1; - U32 const hb = ZSTD_highbit32(stat); - U32 const BWeight = hb * BITCOST_MULTIPLIER; -+ /* Fweight was meant for "Fractional weight" -+ * but it's effectively a value between 1 and 2 -+ * using fixed point arithmetic */ - U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + BITCOST_ACCURACY < 31); -@@ -57,7 +69,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) - /* debugging function, - * @return price in bytes as fractional value - * for debug messages only */ --MEM_STATIC double ZSTD_fCost(U32 price) -+MEM_STATIC double ZSTD_fCost(int price) - { - return (double)price / (BITCOST_MULTIPLIER*8); - } -@@ -88,20 +100,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts) - return total; - } - --static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) -+typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; -+ -+static U32 -+ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) - { - U32 s, sum=0; -- DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); -+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", -+ (unsigned)lastEltIndex+1, (unsigned)shift ); - assert(shift < 30); - for (s=0; s> shift); -- sum += table[s]; -+ unsigned const base = base1 ? 1 : (table[s]>0); -+ unsigned const newStat = base + (table[s] >> shift); -+ sum += newStat; -+ table[s] = newStat; - } - return sum; - } - - /* ZSTD_scaleStats() : -- * reduce all elements in table is sum too large -+ * reduce all elt frequencies in table if sum too large - * return the resulting sum of elements */ - static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) - { -@@ -110,7 +128,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) - DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); - assert(logTarget < 30); - if (factor <= 1) return prevsum; -- return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); -+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); - } - - /* ZSTD_rescaleFreqs() : -@@ -129,18 +147,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, - DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); - optPtr->priceType = zop_dynamic; - -- if (optPtr->litLengthSum == 0) { /* first block : init */ -- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ -- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); -+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ -+ -+ /* heuristic: use pre-defined stats for too small inputs */ -+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) { -+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); - optPtr->priceType = zop_predef; - } - - assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { -- /* huffman table presumed generated by dictionary */ -+ -+ /* huffman stats covering the full value set : table presumed generated by dictionary */ - optPtr->priceType = zop_dynamic; - - if (compressedLiterals) { -+ /* generate literals statistics from huffman table */ - unsigned lit; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; -@@ -188,13 +210,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, - optPtr->offCodeSum += optPtr->offCodeFreq[of]; - } } - -- } else { /* not a dictionary */ -+ } else { /* first block, no dictionary */ - - assert(optPtr->litFreq != NULL); - if (compressedLiterals) { -+ /* base initial cost of literals on direct frequency within src */ - unsigned lit = MaxLit; - HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ -- optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); -+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); - } - - { unsigned const baseLLfreqs[MaxLL+1] = { -@@ -224,10 +247,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, - optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); - } - -- - } - -- } else { /* new block : re-use previous statistics, scaled down */ -+ } else { /* new block : scale down accumulated statistics */ - - if (compressedLiterals) - optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); -@@ -246,6 +268,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr, - int optLevel) - { -+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); - if (litLength == 0) return 0; - - if (!ZSTD_compressedLiterals(optPtr)) -@@ -255,11 +278,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, - return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ - - /* dynamic statistics */ -- { U32 price = litLength * optPtr->litSumBasePrice; -+ { U32 price = optPtr->litSumBasePrice * litLength; -+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; - U32 u; -+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); - for (u=0; u < litLength; u++) { -- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ -- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); -+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); -+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; -+ price -= litPrice; - } - return price; - } -@@ -272,10 +298,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP - assert(litLength <= ZSTD_BLOCKSIZE_MAX); - if (optPtr->priceType == zop_predef) - return WEIGHT(litLength, optLevel); -- /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX -- * because it isn't representable in the zstd format. So instead just -- * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block -- * would be all literals. -+ -+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX -+ * because it isn't representable in the zstd format. -+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. -+ * In such a case, the block would be all literals. - */ - if (litLength == ZSTD_BLOCKSIZE_MAX) - return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); -@@ -289,24 +316,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP - } - - /* ZSTD_getMatchPrice() : -- * Provides the cost of the match part (offset + matchLength) of a sequence -+ * Provides the cost of the match part (offset + matchLength) of a sequence. - * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. -- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 -+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() - * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) - */ - FORCE_INLINE_TEMPLATE U32 --ZSTD_getMatchPrice(U32 const offcode, -+ZSTD_getMatchPrice(U32 const offBase, - U32 const matchLength, - const optState_t* const optPtr, - int const optLevel) - { - U32 price; -- U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); -+ U32 const offCode = ZSTD_highbit32(offBase); - U32 const mlBase = matchLength - MINMATCH; - assert(matchLength >= MINMATCH); - -- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ -- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); -+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ -+ return WEIGHT(mlBase, optLevel) -+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ - - /* dynamic statistics */ - price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); -@@ -325,10 +353,10 @@ ZSTD_getMatchPrice(U32 const offcode, - } - - /* ZSTD_updateStats() : -- * assumption : literals + litLengtn <= iend */ -+ * assumption : literals + litLength <= iend */ - static void ZSTD_updateStats(optState_t* const optPtr, - U32 litLength, const BYTE* literals, -- U32 offsetCode, U32 matchLength) -+ U32 offBase, U32 matchLength) - { - /* literals */ - if (ZSTD_compressedLiterals(optPtr)) { -@@ -344,8 +372,8 @@ static void ZSTD_updateStats(optState_t* const optPtr, - optPtr->litLengthSum++; - } - -- /* offset code : expected to follow storeSeq() numeric representation */ -- { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); -+ /* offset code : follows storeSeq() numeric representation */ -+ { U32 const offCode = ZSTD_highbit32(offBase); - assert(offCode <= MaxOff); - optPtr->offCodeFreq[offCode]++; - optPtr->offCodeSum++; -@@ -379,9 +407,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) - - /* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ --static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, -- U32* nextToUpdate3, -- const BYTE* const ip) -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, -+ U32* nextToUpdate3, -+ const BYTE* const ip) - { - U32* const hashTable3 = ms->hashTable3; - U32 const hashLog3 = ms->hashLog3; -@@ -408,7 +438,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, - * @param ip assumed <= iend-8 . - * @param target The target of ZSTD_updateTree_internal() - we are filling to this position - * @return : nb of positions added */ --static U32 ZSTD_insertBt1( -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_insertBt1( - const ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - U32 const target, -@@ -527,6 +559,7 @@ static U32 ZSTD_insertBt1( - } - - FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - void ZSTD_updateTree_internal( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, -@@ -535,7 +568,7 @@ void ZSTD_updateTree_internal( - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; -- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", -+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", - idx, target, dictMode); - - while(idx < target) { -@@ -553,15 +586,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { - } - - FORCE_INLINE_TEMPLATE --U32 ZSTD_insertBtAndGetAllMatches ( -- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ -- ZSTD_matchState_t* ms, -- U32* nextToUpdate3, -- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, -- const U32 rep[ZSTD_REP_NUM], -- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ -- const U32 lengthToBeat, -- U32 const mls /* template */) -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 -+ZSTD_insertBtAndGetAllMatches ( -+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ -+ ZSTD_matchState_t* ms, -+ U32* nextToUpdate3, -+ const BYTE* const ip, const BYTE* const iLimit, -+ const ZSTD_dictMode_e dictMode, -+ const U32 rep[ZSTD_REP_NUM], -+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ -+ const U32 lengthToBeat, -+ const U32 mls /* template */) - { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); -@@ -644,7 +680,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( - DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", - repCode, ll0, repOffset, repLen); - bestLength = repLen; -- matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ -+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ - matches[mnum].len = (U32)repLen; - mnum++; - if ( (repLen > sufficient_len) -@@ -673,7 +709,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( - bestLength = mlen; - assert(curr > matchIndex3); - assert(mnum==0); /* no prior solution */ -- matches[0].off = STORE_OFFSET(curr - matchIndex3); -+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); - matches[0].len = (U32)mlen; - mnum = 1; - if ( (mlen > sufficient_len) | -@@ -706,13 +742,13 @@ U32 ZSTD_insertBtAndGetAllMatches ( - } - - if (matchLength > bestLength) { -- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", -- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); -+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", -+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); - assert(matchEndIdx > matchIndex); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; -- matches[mnum].off = STORE_OFFSET(curr - matchIndex); -+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) -@@ -754,12 +790,12 @@ U32 ZSTD_insertBtAndGetAllMatches ( - - if (matchLength > bestLength) { - matchIndex = dictMatchIndex + dmsIndexDelta; -- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", -- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); -+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", -+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; -- matches[mnum].off = STORE_OFFSET(curr - matchIndex); -+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) -@@ -792,7 +828,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)( - U32 const ll0, - U32 const lengthToBeat); - --FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+U32 ZSTD_btGetAllMatches_internal( - ZSTD_match_t* matches, - ZSTD_matchState_t* ms, - U32* nextToUpdate3, -@@ -960,7 +998,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, - const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) - { - U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; -- /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ -+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ - U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; - - /* Ensure that current block position is not outside of the match */ -@@ -971,11 +1009,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, - } - - if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { -- U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); -- DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", -- candidateOffCode, candidateMatchLength, currPosInBlock); -+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); -+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", -+ candidateOffBase, candidateMatchLength, currPosInBlock); - matches[*nbMatches].len = candidateMatchLength; -- matches[*nbMatches].off = candidateOffCode; -+ matches[*nbMatches].off = candidateOffBase; - (*nbMatches)++; - } - } -@@ -1011,11 +1049,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, - * Optimal parser - *********************************/ - --static U32 ZSTD_totalLen(ZSTD_optimal_t sol) --{ -- return sol.litlen + sol.mlen; --} -- - #if 0 /* debug */ - - static void -@@ -1033,7 +1066,13 @@ listStats(const U32* table, int lastEltID) - - #endif - --FORCE_INLINE_TEMPLATE size_t -+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) -+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) -+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) -+ -+FORCE_INLINE_TEMPLATE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t - ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], -@@ -1059,9 +1098,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - - ZSTD_optimal_t* const opt = optStatePtr->priceTable; - ZSTD_match_t* const matches = optStatePtr->matchTable; -- ZSTD_optimal_t lastSequence; -+ ZSTD_optimal_t lastStretch; - ZSTD_optLdm_t optLdm; - -+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); -+ - optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; - optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; - ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); -@@ -1082,103 +1123,139 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - U32 const ll0 = !litlen; - U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); - ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, -- (U32)(ip-istart), (U32)(iend - ip)); -- if (!nbMatches) { ip++; continue; } -+ (U32)(ip-istart), (U32)(iend-ip)); -+ if (!nbMatches) { -+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); -+ ip++; -+ continue; -+ } -+ -+ /* Match found: let's store this solution, and eventually find more candidates. -+ * During this forward pass, @opt is used to store stretches, -+ * defined as "a match followed by N literals". -+ * Note how this is different from a Sequence, which is "N literals followed by a match". -+ * Storing stretches allows us to store different match predecessors -+ * for each literal position part of a literals run. */ - - /* initialize opt[0] */ -- { U32 i ; for (i=0; i immediate encoding */ - { U32 const maxML = matches[nbMatches-1].len; -- U32 const maxOffcode = matches[nbMatches-1].off; -- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", -- nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); -+ U32 const maxOffBase = matches[nbMatches-1].off; -+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", -+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); - - if (maxML > sufficient_len) { -- lastSequence.litlen = litlen; -- lastSequence.mlen = maxML; -- lastSequence.off = maxOffcode; -- DEBUGLOG(6, "large match (%u>%u), immediate encoding", -+ lastStretch.litlen = 0; -+ lastStretch.mlen = maxML; -+ lastStretch.off = maxOffBase; -+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding", - maxML, sufficient_len); - cur = 0; -- last_pos = ZSTD_totalLen(lastSequence); -+ last_pos = maxML; - goto _shortestPath; - } } - - /* set prices for first matches starting position == 0 */ - assert(opt[0].price >= 0); -- { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); -- U32 pos; -+ { U32 pos; - U32 matchNb; - for (pos = 1; pos < minMatch; pos++) { -- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ -+ opt[pos].price = ZSTD_MAX_PRICE; -+ opt[pos].mlen = 0; -+ opt[pos].litlen = litlen + pos; - } - for (matchNb = 0; matchNb < nbMatches; matchNb++) { -- U32 const offcode = matches[matchNb].off; -+ U32 const offBase = matches[matchNb].off; - U32 const end = matches[matchNb].len; - for ( ; pos <= end ; pos++ ) { -- U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); -- U32 const sequencePrice = literalsPrice + matchPrice; -+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); -+ int const sequencePrice = opt[0].price + matchPrice; - DEBUGLOG(7, "rPos:%u => set initial price : %.2f", - pos, ZSTD_fCost(sequencePrice)); - opt[pos].mlen = pos; -- opt[pos].off = offcode; -- opt[pos].litlen = litlen; -- opt[pos].price = (int)sequencePrice; -- } } -+ opt[pos].off = offBase; -+ opt[pos].litlen = 0; /* end of match */ -+ opt[pos].price = sequencePrice + LL_PRICE(0); -+ } -+ } - last_pos = pos-1; -+ opt[pos].price = ZSTD_MAX_PRICE; - } - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - const BYTE* const inr = ip + cur; -- assert(cur < ZSTD_OPT_NUM); -- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) -+ assert(cur <= ZSTD_OPT_NUM); -+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur); - - /* Fix current position with one literal if cheaper */ -- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; -+ { U32 const litlen = opt[cur-1].litlen + 1; - int const price = opt[cur-1].price -- + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) -- + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) -- - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); -+ + LIT_PRICE(ip+cur-1) -+ + LL_INCPRICE(litlen); - assert(price < 1000000000); /* overflow check */ - if (price <= opt[cur].price) { -+ ZSTD_optimal_t const prevMatch = opt[cur]; - DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, - opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); -- opt[cur].mlen = 0; -- opt[cur].off = 0; -+ opt[cur] = opt[cur-1]; - opt[cur].litlen = litlen; - opt[cur].price = price; -+ if ( (optLevel >= 1) /* additional check only for higher modes */ -+ && (prevMatch.litlen == 0) /* replace a match */ -+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ -+ && LIKELY(ip + cur < iend) -+ ) { -+ /* check next position, in case it would be cheaper */ -+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); -+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); -+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", -+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); -+ if ( (with1literal < withMoreLiterals) -+ && (with1literal < opt[cur+1].price) ) { -+ /* update offset history - before it disappears */ -+ U32 const prev = cur - prevMatch.mlen; -+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); -+ assert(cur >= prevMatch.mlen); -+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", -+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), -+ newReps.rep[0], newReps.rep[1], newReps.rep[2] ); -+ opt[cur+1] = prevMatch; /* mlen & offbase */ -+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t)); -+ opt[cur+1].litlen = 1; -+ opt[cur+1].price = with1literal; -+ if (last_pos < cur+1) last_pos = cur+1; -+ } -+ } - } else { -- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", -- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), -- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); -+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)", -+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); - } - } - -- /* Set the repcodes of the current position. We must do it here -- * because we rely on the repcodes of the 2nd to last sequence being -- * correct to set the next chunks repcodes during the backward -- * traversal. -+ /* Offset history is not updated during match comparison. -+ * Do it here, now that the match is selected and confirmed. - */ - ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); - assert(cur >= opt[cur].mlen); -- if (opt[cur].mlen != 0) { -+ if (opt[cur].litlen == 0) { -+ /* just finished a match => alter offset history */ - U32 const prev = cur - opt[cur].mlen; -- repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); -+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); - ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); -- } else { -- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); - } - - /* last match must start at a minimum distance of 8 from oend */ -@@ -1188,15 +1265,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - - if ( (optLevel==0) /*static_test*/ - && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { -- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); -+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); - continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ - } - - assert(opt[cur].price >= 0); -- { U32 const ll0 = (opt[cur].mlen != 0); -- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; -- U32 const previousPrice = (U32)opt[cur].price; -- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); -+ { U32 const ll0 = (opt[cur].litlen == 0); -+ int const previousPrice = opt[cur].price; -+ int const basePrice = previousPrice + LL_PRICE(0); - U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); - U32 matchNb; - -@@ -1208,18 +1284,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - continue; - } - -- { U32 const maxML = matches[nbMatches-1].len; -- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", -- inr-istart, cur, nbMatches, maxML); -- -- if ( (maxML > sufficient_len) -- || (cur + maxML >= ZSTD_OPT_NUM) ) { -- lastSequence.mlen = maxML; -- lastSequence.off = matches[nbMatches-1].off; -- lastSequence.litlen = litlen; -- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ -- last_pos = cur + ZSTD_totalLen(lastSequence); -- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ -+ { U32 const longestML = matches[nbMatches-1].len; -+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u", -+ inr-istart, cur, nbMatches, longestML); -+ -+ if ( (longestML > sufficient_len) -+ || (cur + longestML >= ZSTD_OPT_NUM) -+ || (ip + cur + longestML >= iend) ) { -+ lastStretch.mlen = longestML; -+ lastStretch.off = matches[nbMatches-1].off; -+ lastStretch.litlen = 0; -+ last_pos = cur + longestML; - goto _shortestPath; - } } - -@@ -1230,20 +1305,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; - U32 mlen; - -- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", -- matchNb, matches[matchNb].off, lastML, litlen); -+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", -+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen); - - for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ - U32 const pos = cur + mlen; -- int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); -+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); - - if ((pos > last_pos) || (price < opt[pos].price)) { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); -- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ -+ while (last_pos < pos) { -+ /* fill empty positions, for future comparisons */ -+ last_pos++; -+ opt[last_pos].price = ZSTD_MAX_PRICE; -+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ -+ } - opt[pos].mlen = mlen; - opt[pos].off = offset; -- opt[pos].litlen = litlen; -+ opt[pos].litlen = 0; - opt[pos].price = price; - } else { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", -@@ -1251,52 +1331,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ - } - } } } -+ opt[last_pos+1].price = ZSTD_MAX_PRICE; - } /* for (cur = 1; cur <= last_pos; cur++) */ - -- lastSequence = opt[last_pos]; -- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ -- assert(cur < ZSTD_OPT_NUM); /* control overflow*/ -+ lastStretch = opt[last_pos]; -+ assert(cur >= lastStretch.mlen); -+ cur = last_pos - lastStretch.mlen; - - _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ - assert(opt[0].mlen == 0); -+ assert(last_pos >= lastStretch.mlen); -+ assert(cur == last_pos - lastStretch.mlen); - -- /* Set the next chunk's repcodes based on the repcodes of the beginning -- * of the last match, and the last sequence. This avoids us having to -- * update them while traversing the sequences. -- */ -- if (lastSequence.mlen != 0) { -- repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); -- ZSTD_memcpy(rep, &reps, sizeof(reps)); -+ if (lastStretch.mlen==0) { -+ /* no solution : all matches have been converted into literals */ -+ assert(lastStretch.litlen == (ip - anchor) + last_pos); -+ ip += last_pos; -+ continue; -+ } -+ assert(lastStretch.off > 0); -+ -+ /* Update offset history */ -+ if (lastStretch.litlen == 0) { -+ /* finishing on a match : update offset history */ -+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); -+ ZSTD_memcpy(rep, &reps, sizeof(repcodes_t)); - } else { -- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); -+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t)); -+ assert(cur >= lastStretch.litlen); -+ cur -= lastStretch.litlen; - } - -- { U32 const storeEnd = cur + 1; -+ /* Let's write the shortest path solution. -+ * It is stored in @opt in reverse order, -+ * starting from @storeEnd (==cur+2), -+ * effectively partially @opt overwriting. -+ * Content is changed too: -+ * - So far, @opt stored stretches, aka a match followed by literals -+ * - Now, it will store sequences, aka literals followed by a match -+ */ -+ { U32 const storeEnd = cur + 2; - U32 storeStart = storeEnd; -- U32 seqPos = cur; -+ U32 stretchPos = cur; - - DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", - last_pos, cur); (void)last_pos; -- assert(storeEnd < ZSTD_OPT_NUM); -- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", -- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); -- opt[storeEnd] = lastSequence; -- while (seqPos > 0) { -- U32 const backDist = ZSTD_totalLen(opt[seqPos]); -+ assert(storeEnd < ZSTD_OPT_SIZE); -+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", -+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); -+ if (lastStretch.litlen > 0) { -+ /* last "sequence" is unfinished: just a bunch of literals */ -+ opt[storeEnd].litlen = lastStretch.litlen; -+ opt[storeEnd].mlen = 0; -+ storeStart = storeEnd-1; -+ opt[storeStart] = lastStretch; -+ } { -+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ -+ storeStart = storeEnd; -+ } -+ while (1) { -+ ZSTD_optimal_t nextStretch = opt[stretchPos]; -+ opt[storeStart].litlen = nextStretch.litlen; -+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", -+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); -+ if (nextStretch.mlen == 0) { -+ /* reaching beginning of segment */ -+ break; -+ } - storeStart--; -- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", -- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); -- opt[storeStart] = opt[seqPos]; -- seqPos = (seqPos > backDist) ? seqPos - backDist : 0; -+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */ -+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); -+ stretchPos -= nextStretch.litlen + nextStretch.mlen; - } - - /* save sequences */ -- DEBUGLOG(6, "sending selected sequences into seqStore") -+ DEBUGLOG(6, "sending selected sequences into seqStore"); - { U32 storePos; - for (storePos=storeStart; storePos <= storeEnd; storePos++) { - U32 const llen = opt[storePos].litlen; - U32 const mlen = opt[storePos].mlen; -- U32 const offCode = opt[storePos].off; -+ U32 const offBase = opt[storePos].off; - U32 const advance = llen + mlen; - DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", - anchor - istart, (unsigned)llen, (unsigned)mlen); -@@ -1308,11 +1422,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - } - - assert(anchor + llen <= iend); -- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); -- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); -+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); -+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); - anchor += advance; - ip = anchor; - } } -+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); -+ -+ /* update all costs */ - ZSTD_setBasePrices(optStatePtr, optLevel); - } - } /* while (ip < ilimit) */ -@@ -1320,21 +1437,27 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - /* Return the last literals size */ - return (size_t)(iend - anchor); - } -+#endif /* build exclusions */ - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - static size_t ZSTD_compressBlock_opt0( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) - { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); - } -+#endif - -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - static size_t ZSTD_compressBlock_opt2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) - { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); - } -+#endif - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -@@ -1342,20 +1465,23 @@ size_t ZSTD_compressBlock_btopt( - DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); - } -+#endif - - - - -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - /* ZSTD_initStats_ultra(): - * make a first compression pass, just to seed stats with more accurate starting values. - * only works on first block, with no dictionary and no ldm. -- * this function cannot error, hence its contract must be respected. -+ * this function cannot error out, its narrow contract must be respected. - */ --static void --ZSTD_initStats_ultra(ZSTD_matchState_t* ms, -- seqStore_t* seqStore, -- U32 rep[ZSTD_REP_NUM], -- const void* src, size_t srcSize) -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+void ZSTD_initStats_ultra(ZSTD_matchState_t* ms, -+ seqStore_t* seqStore, -+ U32 rep[ZSTD_REP_NUM], -+ const void* src, size_t srcSize) - { - U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ - ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); -@@ -1368,7 +1494,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, - - ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ - -- /* invalidate first scan from history */ -+ /* invalidate first scan from history, only keep entropy stats */ - ZSTD_resetSeqStore(seqStore); - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; -@@ -1392,10 +1518,10 @@ size_t ZSTD_compressBlock_btultra2( - U32 const curr = (U32)((const BYTE*)src - ms->window.base); - DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - -- /* 2-pass strategy: -+ /* 2-passes strategy: - * this strategy makes a first pass over first block to collect statistics -- * and seed next round's statistics with it. -- * After 1st pass, function forgets everything, and starts a new block. -+ * in order to seed next round's statistics with it. -+ * After 1st pass, function forgets history, and starts a new block. - * Consequently, this can only work if no data has been previously loaded in tables, - * aka, no dictionary, no prefix, no ldm preprocessing. - * The compression ratio gain is generally small (~0.5% on first block), -@@ -1404,15 +1530,17 @@ size_t ZSTD_compressBlock_btultra2( - if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ -- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ -- && (srcSize > ZSTD_PREDEF_THRESHOLD) -+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ -+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ - ) { - ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); - } - - return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); - } -+#endif - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -@@ -1420,18 +1548,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState( - return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); - } - --size_t ZSTD_compressBlock_btultra_dictMatchState( -+size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { -- return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); -+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); - } -+#endif - --size_t ZSTD_compressBlock_btopt_extDict( -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) - { -- return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); -+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); - } - - size_t ZSTD_compressBlock_btultra_extDict( -@@ -1440,6 +1570,7 @@ size_t ZSTD_compressBlock_btultra_extDict( - { - return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); - } -+#endif - - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries -diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h -index 22b862858ba7..ac1b743d27cd 100644 ---- a/lib/zstd/compress/zstd_opt.h -+++ b/lib/zstd/compress/zstd_opt.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,30 +15,40 @@ - - #include "zstd_compress_internal.h" - -+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ -+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) - /* used in ZSTD_loadDictionaryContent() */ - void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); -+#endif - -+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_btultra( -+size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); --size_t ZSTD_compressBlock_btultra2( -+size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -+#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt -+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState -+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict -+#else -+#define ZSTD_COMPRESSBLOCK_BTOPT NULL -+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL -+#endif - --size_t ZSTD_compressBlock_btopt_dictMatchState( -+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -+size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -- --size_t ZSTD_compressBlock_btopt_extDict( -- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -- void const* src, size_t srcSize); - size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -@@ -45,6 +56,20 @@ size_t ZSTD_compressBlock_btultra_extDict( - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ -+size_t ZSTD_compressBlock_btultra2( -+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], -+ void const* src, size_t srcSize); -+ -+#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra -+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState -+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict -+#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 -+#else -+#define ZSTD_COMPRESSBLOCK_BTULTRA NULL -+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL -+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL -+#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL -+#endif - - - #endif /* ZSTD_OPT_H */ -diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c -index 60958afebc41..ac8b87f48f84 100644 ---- a/lib/zstd/decompress/huf_decompress.c -+++ b/lib/zstd/decompress/huf_decompress.c -@@ -1,7 +1,8 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* ****************************************************************** - * huff0 huffman decoder, - * part of Finite State Entropy library -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy -@@ -19,10 +20,10 @@ - #include "../common/compiler.h" - #include "../common/bitstream.h" /* BIT_* */ - #include "../common/fse.h" /* to compress headers */ --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "../common/error_private.h" - #include "../common/zstd_internal.h" -+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ - - /* ************************************************************** - * Constants -@@ -34,6 +35,12 @@ - * Macros - ****************************************************************/ - -+#ifdef HUF_DISABLE_FAST_DECODE -+# define HUF_ENABLE_FAST_DECODE 0 -+#else -+# define HUF_ENABLE_FAST_DECODE 1 -+#endif -+ - /* These two optional macros force the use one way or another of the two - * Huffman decompression implementations. You can't force in both directions - * at the same time. -@@ -43,27 +50,25 @@ - #error "Cannot force the use of the X1 and X2 decoders at the same time!" - #endif - --#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 --# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE -+/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is -+ * supported at runtime, so we can add the BMI2 target attribute. -+ * When it is disabled, we will still get BMI2 if it is enabled statically. -+ */ -+#if DYNAMIC_BMI2 -+# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE - #else --# define HUF_ASM_X86_64_BMI2_ATTRS -+# define HUF_FAST_BMI2_ATTRS - #endif - - #define HUF_EXTERN_C - #define HUF_ASM_DECL HUF_EXTERN_C - --#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) -+#if DYNAMIC_BMI2 - # define HUF_NEED_BMI2_FUNCTION 1 - #else - # define HUF_NEED_BMI2_FUNCTION 0 - #endif - --#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) --# define HUF_NEED_DEFAULT_FUNCTION 1 --#else --# define HUF_NEED_DEFAULT_FUNCTION 0 --#endif -- - /* ************************************************************** - * Error Management - ****************************************************************/ -@@ -80,6 +85,11 @@ - /* ************************************************************** - * BMI2 Variant Wrappers - ****************************************************************/ -+typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize, -+ const void *cSrc, -+ size_t cSrcSize, -+ const HUF_DTable *DTable); -+ - #if DYNAMIC_BMI2 - - #define HUF_DGEN(fn) \ -@@ -101,9 +111,9 @@ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) \ - { \ -- if (bmi2) { \ -+ if (flags & HUF_flags_bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ -@@ -113,9 +123,9 @@ - - #define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) \ - { \ -- (void)bmi2; \ -+ (void)flags; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -@@ -134,43 +144,66 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) - return dtd; - } - --#if ZSTD_ENABLE_ASM_X86_64_BMI2 -- --static size_t HUF_initDStream(BYTE const* ip) { -+static size_t HUF_initFastDStream(BYTE const* ip) { - BYTE const lastByte = ip[7]; -- size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; -+ size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; - size_t const value = MEM_readLEST(ip) | 1; - assert(bitsConsumed <= 8); -+ assert(sizeof(size_t) == 8); - return value << bitsConsumed; - } -+ -+ -+/* -+ * The input/output arguments to the Huffman fast decoding loop: -+ * -+ * ip [in/out] - The input pointers, must be updated to reflect what is consumed. -+ * op [in/out] - The output pointers, must be updated to reflect what is written. -+ * bits [in/out] - The bitstream containers, must be updated to reflect the current state. -+ * dt [in] - The decoding table. -+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read -+ * down to this pointer. It may be below iend[0]. -+ * oend [in] - The end of the output stream. op[3] must not cross oend. -+ * iend [in] - The end of each input stream. ip[i] may cross iend[i], -+ * as long as it is above ilowest, but that indicates corruption. -+ */ - typedef struct { - BYTE const* ip[4]; - BYTE* op[4]; - U64 bits[4]; - void const* dt; -- BYTE const* ilimit; -+ BYTE const* ilowest; - BYTE* oend; - BYTE const* iend[4]; --} HUF_DecompressAsmArgs; -+} HUF_DecompressFastArgs; -+ -+typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*); - - /* -- * Initializes args for the asm decoding loop. -- * @returns 0 on success -- * 1 if the fallback implementation should be used. -+ * Initializes args for the fast decoding loop. -+ * @returns 1 on success -+ * 0 if the fallback implementation should be used. - * Or an error code on failure. - */ --static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) -+static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) - { - void const* dt = DTable + 1; - U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; - -- const BYTE* const ilimit = (const BYTE*)src + 6 + 8; -+ const BYTE* const istart = (const BYTE*)src; - -- BYTE* const oend = (BYTE*)dst + dstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); - -- /* The following condition is false on x32 platform, -- * but HUF_asm is not compatible with this ABI */ -- if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; -+ /* The fast decoding loop assumes 64-bit little-endian. -+ * This condition is false on x32. -+ */ -+ if (!MEM_isLittleEndian() || MEM_32bits()) -+ return 0; -+ -+ /* Avoid nullptr addition */ -+ if (dstSize == 0) -+ return 0; -+ assert(dst != NULL); - - /* strict minimum : jump table + 1 byte per stream */ - if (srcSize < 10) -@@ -181,11 +214,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, - * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. - */ - if (dtLog != HUF_DECODER_FAST_TABLELOG) -- return 1; -+ return 0; - - /* Read the jump table. */ - { -- const BYTE* const istart = (const BYTE*)src; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); -@@ -195,13 +227,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, - args->iend[2] = args->iend[1] + length2; - args->iend[3] = args->iend[2] + length3; - -- /* HUF_initDStream() requires this, and this small of an input -+ /* HUF_initFastDStream() requires this, and this small of an input - * won't benefit from the ASM loop anyways. -- * length1 must be >= 16 so that ip[0] >= ilimit before the loop -- * starts. - */ -- if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) -- return 1; -+ if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8) -+ return 0; - if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ - } - /* ip[] contains the position that is currently loaded into bits[]. */ -@@ -218,7 +248,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, - - /* No point to call the ASM loop for tiny outputs. */ - if (args->op[3] >= oend) -- return 1; -+ return 0; - - /* bits[] is the bit container. - * It is read from the MSB down to the LSB. -@@ -227,24 +257,25 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, - * set, so that CountTrailingZeros(bits[]) can be used - * to count how many bits we've consumed. - */ -- args->bits[0] = HUF_initDStream(args->ip[0]); -- args->bits[1] = HUF_initDStream(args->ip[1]); -- args->bits[2] = HUF_initDStream(args->ip[2]); -- args->bits[3] = HUF_initDStream(args->ip[3]); -- -- /* If ip[] >= ilimit, it is guaranteed to be safe to -- * reload bits[]. It may be beyond its section, but is -- * guaranteed to be valid (>= istart). -- */ -- args->ilimit = ilimit; -+ args->bits[0] = HUF_initFastDStream(args->ip[0]); -+ args->bits[1] = HUF_initFastDStream(args->ip[1]); -+ args->bits[2] = HUF_initFastDStream(args->ip[2]); -+ args->bits[3] = HUF_initFastDStream(args->ip[3]); -+ -+ /* The decoders must be sure to never read beyond ilowest. -+ * This is lower than iend[0], but allowing decoders to read -+ * down to ilowest can allow an extra iteration or two in the -+ * fast loop. -+ */ -+ args->ilowest = istart; - - args->oend = oend; - args->dt = dt; - -- return 0; -+ return 1; - } - --static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) -+static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd) - { - /* Validate that we haven't overwritten. */ - if (args->op[stream] > segmentEnd) -@@ -258,15 +289,33 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs - return ERROR(corruption_detected); - - /* Construct the BIT_DStream_t. */ -- bit->bitContainer = MEM_readLE64(args->ip[stream]); -- bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); -- bit->start = (const char*)args->iend[0]; -+ assert(sizeof(size_t) == 8); -+ bit->bitContainer = MEM_readLEST(args->ip[stream]); -+ bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); -+ bit->start = (const char*)args->ilowest; - bit->limitPtr = bit->start + sizeof(size_t); - bit->ptr = (const char*)args->ip[stream]; - - return 0; - } --#endif -+ -+/* Calls X(N) for each stream 0, 1, 2, 3. */ -+#define HUF_4X_FOR_EACH_STREAM(X) \ -+ do { \ -+ X(0); \ -+ X(1); \ -+ X(2); \ -+ X(3); \ -+ } while (0) -+ -+/* Calls X(N, var) for each stream 0, 1, 2, 3. */ -+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \ -+ do { \ -+ X(0, (var)); \ -+ X(1, (var)); \ -+ X(2, (var)); \ -+ X(3, (var)); \ -+ } while (0) - - - #ifndef HUF_FORCE_DECOMPRESS_X2 -@@ -283,10 +332,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decodi - static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { - U64 D4; - if (MEM_isLittleEndian()) { -- D4 = (symbol << 8) + nbBits; -+ D4 = (U64)((symbol << 8) + nbBits); - } else { -- D4 = symbol + (nbBits << 8); -+ D4 = (U64)(symbol + (nbBits << 8)); - } -+ assert(D4 < (1U << 16)); - D4 *= 0x0001000100010001ULL; - return D4; - } -@@ -329,13 +379,7 @@ typedef struct { - BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; - } HUF_ReadDTableX1_Workspace; - -- --size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) --{ -- return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); --} -- --size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) -+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) - { - U32 tableLog = 0; - U32 nbSymbols = 0; -@@ -350,7 +394,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr - DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - -- iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); -+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); - if (HUF_isError(iSize)) return iSize; - - -@@ -377,9 +421,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr - * rankStart[0] is not filled because there are no entries in the table for - * weight 0. - */ -- { -- int n; -- int nextRankStart = 0; -+ { int n; -+ U32 nextRankStart = 0; - int const unroll = 4; - int const nLimit = (int)nbSymbols - unroll + 1; - for (n=0; n<(int)tableLog+1; n++) { -@@ -406,10 +449,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr - * We can switch based on the length to a different inner loop which is - * optimized for that particular case. - */ -- { -- U32 w; -- int symbol=wksp->rankVal[0]; -- int rankStart=0; -+ { U32 w; -+ int symbol = wksp->rankVal[0]; -+ int rankStart = 0; - for (w=1; wrankVal[w]; - int const length = (1 << w) >> 1; -@@ -483,15 +525,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog - } - - #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ -- *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) -+ do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0) - --#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ -- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -- HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ -+ } while (0) - --#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ -- if (MEM_64bits()) \ -- HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits()) \ -+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ -+ } while (0) - - HINT_INLINE size_t - HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) -@@ -519,7 +565,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons - while (p < pEnd) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - -- return pEnd-pStart; -+ return (size_t)(pEnd-pStart); - } - - FORCE_INLINE_TEMPLATE size_t -@@ -529,7 +575,7 @@ HUF_decompress1X1_usingDTable_internal_body( - const HUF_DTable* DTable) - { - BYTE* op = (BYTE*)dst; -- BYTE* const oend = op + dstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize); - const void* dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - BIT_DStream_t bitD; -@@ -545,6 +591,10 @@ HUF_decompress1X1_usingDTable_internal_body( - return dstSize; - } - -+/* HUF_decompress4X1_usingDTable_internal_body(): -+ * Conditions : -+ * @dstSize >= 6 -+ */ - FORCE_INLINE_TEMPLATE size_t - HUF_decompress4X1_usingDTable_internal_body( - void* dst, size_t dstSize, -@@ -553,6 +603,7 @@ HUF_decompress4X1_usingDTable_internal_body( - { - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; -@@ -588,6 +639,7 @@ HUF_decompress4X1_usingDTable_internal_body( - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ -+ assert(dstSize >= 6); /* validated above */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); -@@ -650,52 +702,173 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo - } - #endif - --#if HUF_NEED_DEFAULT_FUNCTION - static - size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, - size_t cSrcSize, HUF_DTable const* DTable) { - return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); - } --#endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 - --HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; -+HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; -+ -+#endif -+ -+static HUF_FAST_BMI2_ATTRS -+void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) -+{ -+ U64 bits[4]; -+ BYTE const* ip[4]; -+ BYTE* op[4]; -+ U16 const* const dtable = (U16 const*)args->dt; -+ BYTE* const oend = args->oend; -+ BYTE const* const ilowest = args->ilowest; -+ -+ /* Copy the arguments to local variables */ -+ ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); -+ ZSTD_memcpy(&op, &args->op, sizeof(op)); -+ -+ assert(MEM_isLittleEndian()); -+ assert(!MEM_32bits()); -+ -+ for (;;) { -+ BYTE* olimit; -+ int stream; -+ -+ /* Assert loop preconditions */ -+#ifndef NDEBUG -+ for (stream = 0; stream < 4; ++stream) { -+ assert(op[stream] <= (stream == 3 ? oend : op[stream + 1])); -+ assert(ip[stream] >= ilowest); -+ } -+#endif -+ /* Compute olimit */ -+ { -+ /* Each iteration produces 5 output symbols per stream */ -+ size_t const oiters = (size_t)(oend - op[3]) / 5; -+ /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes -+ * per stream. -+ */ -+ size_t const iiters = (size_t)(ip[0] - ilowest) / 7; -+ /* We can safely run iters iterations before running bounds checks */ -+ size_t const iters = MIN(oiters, iiters); -+ size_t const symbols = iters * 5; -+ -+ /* We can simply check that op[3] < olimit, instead of checking all -+ * of our bounds, since we can't hit the other bounds until we've run -+ * iters iterations, which only happens when op[3] == olimit. -+ */ -+ olimit = op[3] + symbols; -+ -+ /* Exit fast decoding loop once we reach the end. */ -+ if (op[3] == olimit) -+ break; -+ -+ /* Exit the decoding loop if any input pointer has crossed the -+ * previous one. This indicates corruption, and a precondition -+ * to our loop is that ip[i] >= ip[0]. -+ */ -+ for (stream = 1; stream < 4; ++stream) { -+ if (ip[stream] < ip[stream - 1]) -+ goto _out; -+ } -+ } -+ -+#ifndef NDEBUG -+ for (stream = 1; stream < 4; ++stream) { -+ assert(ip[stream] >= ip[stream - 1]); -+ } -+#endif -+ -+#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \ -+ do { \ -+ int const index = (int)(bits[(_stream)] >> 53); \ -+ int const entry = (int)dtable[index]; \ -+ bits[(_stream)] <<= (entry & 0x3F); \ -+ op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \ -+ } while (0) -+ -+#define HUF_4X1_RELOAD_STREAM(_stream) \ -+ do { \ -+ int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ -+ int const nbBits = ctz & 7; \ -+ int const nbBytes = ctz >> 3; \ -+ op[(_stream)] += 5; \ -+ ip[(_stream)] -= nbBytes; \ -+ bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ -+ bits[(_stream)] <<= nbBits; \ -+ } while (0) -+ -+ /* Manually unroll the loop because compilers don't consistently -+ * unroll the inner loops, which destroys performance. -+ */ -+ do { -+ /* Decode 5 symbols in each of the 4 streams */ -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4); -+ -+ /* Reload each of the 4 the bitstreams */ -+ HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM); -+ } while (op[3] < olimit); -+ -+#undef HUF_4X1_DECODE_SYMBOL -+#undef HUF_4X1_RELOAD_STREAM -+ } - --static HUF_ASM_X86_64_BMI2_ATTRS -+_out: -+ -+ /* Save the final values of each of the state variables back to args. */ -+ ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); -+ ZSTD_memcpy(&args->op, &op, sizeof(op)); -+} -+ -+/* -+ * @returns @p dstSize on success (>= 6) -+ * 0 if the fallback implementation should be used -+ * An error if an error occurred -+ */ -+static HUF_FAST_BMI2_ATTRS - size_t --HUF_decompress4X1_usingDTable_internal_bmi2_asm( -+HUF_decompress4X1_usingDTable_internal_fast( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) -+ const HUF_DTable* DTable, -+ HUF_DecompressFastLoopFn loopFn) - { - void const* dt = DTable + 1; -- const BYTE* const iend = (const BYTE*)cSrc + 6; -- BYTE* const oend = (BYTE*)dst + dstSize; -- HUF_DecompressAsmArgs args; -- { -- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); -- FORWARD_IF_ERROR(ret, "Failed to init asm args"); -- if (ret != 0) -- return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ BYTE const* const ilowest = (BYTE const*)cSrc; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); -+ HUF_DecompressFastArgs args; -+ { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); -+ FORWARD_IF_ERROR(ret, "Failed to init fast loop args"); -+ if (ret == 0) -+ return 0; - } - -- assert(args.ip[0] >= args.ilimit); -- HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); -+ assert(args.ip[0] >= args.ilowest); -+ loopFn(&args); - -- /* Our loop guarantees that ip[] >= ilimit and that we haven't -+ /* Our loop guarantees that ip[] >= ilowest and that we haven't - * overwritten any op[]. - */ -- assert(args.ip[0] >= iend); -- assert(args.ip[1] >= iend); -- assert(args.ip[2] >= iend); -- assert(args.ip[3] >= iend); -+ assert(args.ip[0] >= ilowest); -+ assert(args.ip[0] >= ilowest); -+ assert(args.ip[1] >= ilowest); -+ assert(args.ip[2] >= ilowest); -+ assert(args.ip[3] >= ilowest); - assert(args.op[3] <= oend); -- (void)iend; -+ -+ assert(ilowest == args.ilowest); -+ assert(ilowest + 6 == args.iend[0]); -+ (void)ilowest; - - /* finish bit streams one by one. */ -- { -- size_t const segmentSize = (dstSize+3) / 4; -+ { size_t const segmentSize = (dstSize+3) / 4; - BYTE* segmentEnd = (BYTE*)dst; - int i; - for (i = 0; i < 4; ++i) { -@@ -712,97 +885,59 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm( - } - - /* decoded size */ -+ assert(dstSize != 0); - return dstSize; - } --#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ -- --typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, -- const void *cSrc, -- size_t cSrcSize, -- const HUF_DTable *DTable); - - HUF_DGEN(HUF_decompress1X1_usingDTable_internal) - - static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) - { -+ HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default; -+ HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop; -+ - #if DYNAMIC_BMI2 -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { -+ fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2; - # if ZSTD_ENABLE_ASM_X86_64_BMI2 -- return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --# else -- return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; -+ } - # endif -+ } else { -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } --#else -- (void)bmi2; - #endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) -- return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --#else -- return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; -+ } - #endif --} -- -- --size_t HUF_decompress1X1_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 0) return ERROR(GENERIC); -- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --} - --size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) --{ -- const BYTE* ip = (const BYTE*) cSrc; -- -- size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); -- if (HUF_isError(hSize)) return hSize; -- if (hSize >= cSrcSize) return ERROR(srcSize_wrong); -- ip += hSize; cSrcSize -= hSize; -- -- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); --} -- -- --size_t HUF_decompress4X1_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 0) return ERROR(GENERIC); -- return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -+ if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { -+ size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); -+ if (ret != 0) -+ return ret; -+ } -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } - --static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, -+static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize, int bmi2) -+ void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - -- size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); --} -- --size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) --{ -- return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); -+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - -- - #endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -@@ -985,7 +1120,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 - - static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, -- const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight, -+ const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) - { - U32* const rankVal = rankValOrigin[0]; -@@ -1040,14 +1175,7 @@ typedef struct { - - size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, - const void* src, size_t srcSize, -- void* workSpace, size_t wkspSize) --{ -- return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); --} -- --size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, -- const void* src, size_t srcSize, -- void* workSpace, size_t wkspSize, int bmi2) -+ void* workSpace, size_t wkspSize, int flags) - { - U32 tableLog, maxW, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); -@@ -1069,7 +1197,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, - if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - -- iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); -+ iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags); - if (HUF_isError(iSize)) return iSize; - - /* check result */ -@@ -1159,15 +1287,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c - } - - #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ -- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -+ do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0) - --#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ -- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ -+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ -+ } while (0) - --#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ -- if (MEM_64bits()) \ -- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ -+ do { \ -+ if (MEM_64bits()) \ -+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ -+ } while (0) - - HINT_INLINE size_t - HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, -@@ -1227,7 +1359,7 @@ HUF_decompress1X2_usingDTable_internal_body( - - /* decode */ - { BYTE* const ostart = (BYTE*) dst; -- BYTE* const oend = ostart + dstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize); - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); -@@ -1240,6 +1372,11 @@ HUF_decompress1X2_usingDTable_internal_body( - /* decoded size */ - return dstSize; - } -+ -+/* HUF_decompress4X2_usingDTable_internal_body(): -+ * Conditions: -+ * @dstSize >= 6 -+ */ - FORCE_INLINE_TEMPLATE size_t - HUF_decompress4X2_usingDTable_internal_body( - void* dst, size_t dstSize, -@@ -1247,6 +1384,7 @@ HUF_decompress4X2_usingDTable_internal_body( - const HUF_DTable* DTable) - { - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; -@@ -1280,8 +1418,9 @@ HUF_decompress4X2_usingDTable_internal_body( - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - -- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ -- if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ -+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ -+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ -+ assert(dstSize >= 6 /* validated above */); - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); -@@ -1366,44 +1505,191 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo - } - #endif - --#if HUF_NEED_DEFAULT_FUNCTION - static - size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, - size_t cSrcSize, HUF_DTable const* DTable) { - return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); - } --#endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 - --HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; -+HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; -+ -+#endif -+ -+static HUF_FAST_BMI2_ATTRS -+void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) -+{ -+ U64 bits[4]; -+ BYTE const* ip[4]; -+ BYTE* op[4]; -+ BYTE* oend[4]; -+ HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; -+ BYTE const* const ilowest = args->ilowest; -+ -+ /* Copy the arguments to local registers. */ -+ ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); -+ ZSTD_memcpy(&op, &args->op, sizeof(op)); -+ -+ oend[0] = op[1]; -+ oend[1] = op[2]; -+ oend[2] = op[3]; -+ oend[3] = args->oend; -+ -+ assert(MEM_isLittleEndian()); -+ assert(!MEM_32bits()); -+ -+ for (;;) { -+ BYTE* olimit; -+ int stream; -+ -+ /* Assert loop preconditions */ -+#ifndef NDEBUG -+ for (stream = 0; stream < 4; ++stream) { -+ assert(op[stream] <= oend[stream]); -+ assert(ip[stream] >= ilowest); -+ } -+#endif -+ /* Compute olimit */ -+ { -+ /* Each loop does 5 table lookups for each of the 4 streams. -+ * Each table lookup consumes up to 11 bits of input, and produces -+ * up to 2 bytes of output. -+ */ -+ /* We can consume up to 7 bytes of input per iteration per stream. -+ * We also know that each input pointer is >= ip[0]. So we can run -+ * iters loops before running out of input. -+ */ -+ size_t iters = (size_t)(ip[0] - ilowest) / 7; -+ /* Each iteration can produce up to 10 bytes of output per stream. -+ * Each output stream my advance at different rates. So take the -+ * minimum number of safe iterations among all the output streams. -+ */ -+ for (stream = 0; stream < 4; ++stream) { -+ size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10; -+ iters = MIN(iters, oiters); -+ } -+ -+ /* Each iteration produces at least 5 output symbols. So until -+ * op[3] crosses olimit, we know we haven't executed iters -+ * iterations yet. This saves us maintaining an iters counter, -+ * at the expense of computing the remaining # of iterations -+ * more frequently. -+ */ -+ olimit = op[3] + (iters * 5); -+ -+ /* Exit the fast decoding loop once we reach the end. */ -+ if (op[3] == olimit) -+ break; -+ -+ /* Exit the decoding loop if any input pointer has crossed the -+ * previous one. This indicates corruption, and a precondition -+ * to our loop is that ip[i] >= ip[0]. -+ */ -+ for (stream = 1; stream < 4; ++stream) { -+ if (ip[stream] < ip[stream - 1]) -+ goto _out; -+ } -+ } -+ -+#ifndef NDEBUG -+ for (stream = 1; stream < 4; ++stream) { -+ assert(ip[stream] >= ip[stream - 1]); -+ } -+#endif - --static HUF_ASM_X86_64_BMI2_ATTRS size_t --HUF_decompress4X2_usingDTable_internal_bmi2_asm( -+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \ -+ do { \ -+ if ((_decode3) || (_stream) != 3) { \ -+ int const index = (int)(bits[(_stream)] >> 53); \ -+ HUF_DEltX2 const entry = dtable[index]; \ -+ MEM_write16(op[(_stream)], entry.sequence); \ -+ bits[(_stream)] <<= (entry.nbBits) & 0x3F; \ -+ op[(_stream)] += (entry.length); \ -+ } \ -+ } while (0) -+ -+#define HUF_4X2_RELOAD_STREAM(_stream) \ -+ do { \ -+ HUF_4X2_DECODE_SYMBOL(3, 1); \ -+ { \ -+ int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ -+ int const nbBits = ctz & 7; \ -+ int const nbBytes = ctz >> 3; \ -+ ip[(_stream)] -= nbBytes; \ -+ bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ -+ bits[(_stream)] <<= nbBits; \ -+ } \ -+ } while (0) -+ -+ /* Manually unroll the loop because compilers don't consistently -+ * unroll the inner loops, which destroys performance. -+ */ -+ do { -+ /* Decode 5 symbols from each of the first 3 streams. -+ * The final stream will be decoded during the reload phase -+ * to reduce register pressure. -+ */ -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); -+ -+ /* Decode one symbol from the final stream */ -+ HUF_4X2_DECODE_SYMBOL(3, 1); -+ -+ /* Decode 4 symbols from the final stream & reload bitstreams. -+ * The final stream is reloaded last, meaning that all 5 symbols -+ * are decoded from the final stream before it is reloaded. -+ */ -+ HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM); -+ } while (op[3] < olimit); -+ } -+ -+#undef HUF_4X2_DECODE_SYMBOL -+#undef HUF_4X2_RELOAD_STREAM -+ -+_out: -+ -+ /* Save the final values of each of the state variables back to args. */ -+ ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); -+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); -+ ZSTD_memcpy(&args->op, &op, sizeof(op)); -+} -+ -+ -+static HUF_FAST_BMI2_ATTRS size_t -+HUF_decompress4X2_usingDTable_internal_fast( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) { -+ const HUF_DTable* DTable, -+ HUF_DecompressFastLoopFn loopFn) { - void const* dt = DTable + 1; -- const BYTE* const iend = (const BYTE*)cSrc + 6; -- BYTE* const oend = (BYTE*)dst + dstSize; -- HUF_DecompressAsmArgs args; -+ const BYTE* const ilowest = (const BYTE*)cSrc; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); -+ HUF_DecompressFastArgs args; - { -- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); -+ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); - FORWARD_IF_ERROR(ret, "Failed to init asm args"); -- if (ret != 0) -- return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (ret == 0) -+ return 0; - } - -- assert(args.ip[0] >= args.ilimit); -- HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); -+ assert(args.ip[0] >= args.ilowest); -+ loopFn(&args); - - /* note : op4 already verified within main loop */ -- assert(args.ip[0] >= iend); -- assert(args.ip[1] >= iend); -- assert(args.ip[2] >= iend); -- assert(args.ip[3] >= iend); -+ assert(args.ip[0] >= ilowest); -+ assert(args.ip[1] >= ilowest); -+ assert(args.ip[2] >= ilowest); -+ assert(args.ip[3] >= ilowest); - assert(args.op[3] <= oend); -- (void)iend; -+ -+ assert(ilowest == args.ilowest); -+ assert(ilowest + 6 == args.iend[0]); -+ (void)ilowest; - - /* finish bitStreams one by one */ - { -@@ -1426,91 +1712,72 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm( - /* decoded size */ - return dstSize; - } --#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ - - static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, -- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) -+ size_t cSrcSize, HUF_DTable const* DTable, int flags) - { -+ HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default; -+ HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop; -+ - #if DYNAMIC_BMI2 -- if (bmi2) { -+ if (flags & HUF_flags_bmi2) { -+ fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2; - # if ZSTD_ENABLE_ASM_X86_64_BMI2 -- return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --# else -- return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; -+ } - # endif -+ } else { -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } --#else -- (void)bmi2; - #endif - - #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) -- return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); --#else -- return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); -+ if (!(flags & HUF_flags_disableAsm)) { -+ loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; -+ } - #endif -+ -+ if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { -+ size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); -+ if (ret != 0) -+ return ret; -+ } -+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); - } - - HUF_DGEN(HUF_decompress1X2_usingDTable_internal) - --size_t HUF_decompress1X2_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 1) return ERROR(GENERIC); -- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --} -- - size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) -+ void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, -- workSpace, wkspSize); -+ workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags); - } - -- --size_t HUF_decompress4X2_usingDTable( -- void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc dtd = HUF_getDTableDesc(DTable); -- if (dtd.tableType != 1) return ERROR(GENERIC); -- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --} -- --static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, -+static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize, int bmi2) -+ void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - - size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, -- workSpace, wkspSize); -+ workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - --size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, -- const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) --{ -- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); --} -- -- - #endif /* HUF_FORCE_DECOMPRESS_X1 */ - - -@@ -1518,44 +1785,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - /* Universal decompression selectors */ - /* ***********************************/ - --size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc const dtd = HUF_getDTableDesc(DTable); --#if defined(HUF_FORCE_DECOMPRESS_X1) -- (void)dtd; -- assert(dtd.tableType == 0); -- return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#elif defined(HUF_FORCE_DECOMPRESS_X2) -- (void)dtd; -- assert(dtd.tableType == 1); -- return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#else -- return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : -- HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#endif --} -- --size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, -- const void* cSrc, size_t cSrcSize, -- const HUF_DTable* DTable) --{ -- DTableDesc const dtd = HUF_getDTableDesc(DTable); --#if defined(HUF_FORCE_DECOMPRESS_X1) -- (void)dtd; -- assert(dtd.tableType == 0); -- return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#elif defined(HUF_FORCE_DECOMPRESS_X2) -- (void)dtd; -- assert(dtd.tableType == 1); -- return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#else -- return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : -- HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); --#endif --} -- - - #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -@@ -1610,36 +1839,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) - #endif - } - -- --size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, -- size_t dstSize, const void* cSrc, -- size_t cSrcSize, void* workSpace, -- size_t wkspSize) --{ -- /* validation checks */ -- if (dstSize == 0) return ERROR(dstSize_tooSmall); -- if (cSrcSize == 0) return ERROR(corruption_detected); -- -- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); --#if defined(HUF_FORCE_DECOMPRESS_X1) -- (void)algoNb; -- assert(algoNb == 0); -- return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); --#elif defined(HUF_FORCE_DECOMPRESS_X2) -- (void)algoNb; -- assert(algoNb == 1); -- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); --#else -- return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize): -- HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); --#endif -- } --} -- - size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, -- void* workSpace, size_t wkspSize) -+ void* workSpace, size_t wkspSize, int flags) - { - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); -@@ -1652,71 +1854,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize); -+ cSrcSize, workSpace, wkspSize, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize); -+ cSrcSize, workSpace, wkspSize, flags); - #else - return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize): -+ cSrcSize, workSpace, wkspSize, flags): - HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, -- cSrcSize, workSpace, wkspSize); -+ cSrcSize, workSpace, wkspSize, flags); - #endif - } - } - - --size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) - { - DTableDesc const dtd = HUF_getDTableDesc(DTable); - #if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); -- return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); -- return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #else -- return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : -- HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : -+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #endif - } - - #ifndef HUF_FORCE_DECOMPRESS_X2 --size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) - { - const BYTE* ip = (const BYTE*) cSrc; - -- size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - -- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); - } - #endif - --size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) - { - DTableDesc const dtd = HUF_getDTableDesc(DTable); - #if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); -- return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); -- return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #else -- return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : -- HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : -+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); - #endif - } - --size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) - { - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); -@@ -1726,15 +1928,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds - #if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); -- return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); - #elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); -- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); - #else -- return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : -- HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) : -+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); - #endif - } - } -- -diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c -index dbbc7919de53..30ef65e1ab5c 100644 ---- a/lib/zstd/decompress/zstd_ddict.c -+++ b/lib/zstd/decompress/zstd_ddict.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -14,12 +15,12 @@ - /*-******************************************************* - * Dependencies - *********************************************************/ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ - #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ - #include "../common/cpu.h" /* bmi2 */ - #include "../common/mem.h" /* low level memory routines */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "zstd_decompress_internal.h" - #include "zstd_ddict.h" -@@ -131,7 +132,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, - ZSTD_memcpy(internalBuffer, dict, dictSize); - } - ddict->dictSize = dictSize; -- ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ -+ ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ - - /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); -@@ -237,5 +238,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) - unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) - { - if (ddict==NULL) return 0; -- return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -+ return ddict->dictID; - } -diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h -index 8c1a79d666f8..de459a0dacd1 100644 ---- a/lib/zstd/decompress/zstd_ddict.h -+++ b/lib/zstd/decompress/zstd_ddict.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c -index 6b3177c94711..c9cbc45f6ed9 100644 ---- a/lib/zstd/decompress/zstd_decompress.c -+++ b/lib/zstd/decompress/zstd_decompress.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -53,13 +54,15 @@ - * Dependencies - *********************************************************/ - #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ -+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ -+#include "../common/error_private.h" -+#include "../common/zstd_internal.h" /* blockProperties_t */ - #include "../common/mem.h" /* low level memory routines */ -+#include "../common/bits.h" /* ZSTD_highbit32 */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ --#include "../common/zstd_internal.h" /* blockProperties_t */ - #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ - #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ - #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ -@@ -72,11 +75,11 @@ - *************************************/ - - #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 --#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. -- * Currently, that means a 0.75 load factor. -- * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded -- * the load factor of the ddict hash set. -- */ -+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. -+ * Currently, that means a 0.75 load factor. -+ * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded -+ * the load factor of the ddict hash set. -+ */ - - #define DDICT_HASHSET_TABLE_BASE_SIZE 64 - #define DDICT_HASHSET_RESIZE_FACTOR 2 -@@ -237,6 +240,8 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) - dctx->outBufferMode = ZSTD_bm_buffered; - dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; - dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; -+ dctx->disableHufAsm = 0; -+ dctx->maxBlockSizeParam = 0; - } - - static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) -@@ -253,6 +258,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; - dctx->oversizedDuration = 0; -+ dctx->isFrameDecompression = 1; - #if DYNAMIC_BMI2 - dctx->bmi2 = ZSTD_cpuSupportsBmi2(); - #endif -@@ -421,16 +427,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, -- * or an error code, which can be tested using ZSTD_isError() */ -+** or an error code, which can be tested using ZSTD_isError() */ - size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) - { - const BYTE* ip = (const BYTE*)src; - size_t const minInputSize = ZSTD_startingInputLength(format); - -- ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ -- if (srcSize < minInputSize) return minInputSize; -- RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); -+ DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize); -+ -+ if (srcSize > 0) { -+ /* note : technically could be considered an assert(), since it's an invalid entry */ -+ RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0"); -+ } -+ if (srcSize < minInputSize) { -+ if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) { -+ /* when receiving less than @minInputSize bytes, -+ * control these bytes at least correspond to a supported magic number -+ * in order to error out early if they don't. -+ **/ -+ size_t const toCopy = MIN(4, srcSize); -+ unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER); -+ assert(src != NULL); -+ ZSTD_memcpy(hbuf, src, toCopy); -+ if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) { -+ /* not a zstd frame : let's check if it's a skippable frame */ -+ MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START); -+ ZSTD_memcpy(hbuf, src, toCopy); -+ if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) { -+ RETURN_ERROR(prefix_unknown, -+ "first bytes don't correspond to any supported magic number"); -+ } } } -+ return minInputSize; -+ } - -+ ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */ - if ( (format != ZSTD_f_zstd1_magicless) - && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { -@@ -540,61 +570,62 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) - sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); - RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported, ""); -- { -- size_t const skippableSize = skippableHeaderSize + sizeU32; -+ { size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); - return skippableSize; - } - } - - /*! ZSTD_readSkippableFrame() : -- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. -+ * Retrieves content of a skippable frame, and writes it to dst buffer. - * - * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, - * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested - * in the magicVariant. - * -- * Returns an error if destination buffer is not large enough, or if the frame is not skippable. -+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame. - * - * @return : number of bytes written or a ZSTD error. - */ --ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, -- const void* src, size_t srcSize) -+size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, -+ unsigned* magicVariant, /* optional, can be NULL */ -+ const void* src, size_t srcSize) - { -- U32 const magicNumber = MEM_readLE32(src); -- size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); -- size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; -- -- /* check input validity */ -- RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); -- RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); -- RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); -+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); - -- /* deliver payload */ -- if (skippableContentSize > 0 && dst != NULL) -- ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); -- if (magicVariant != NULL) -- *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; -- return skippableContentSize; -+ { U32 const magicNumber = MEM_readLE32(src); -+ size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); -+ size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; -+ -+ /* check input validity */ -+ RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); -+ RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); -+ RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); -+ -+ /* deliver payload */ -+ if (skippableContentSize > 0 && dst != NULL) -+ ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); -+ if (magicVariant != NULL) -+ *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; -+ return skippableContentSize; -+ } - } - - /* ZSTD_findDecompressedSize() : -- * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames -- * @return : decompressed size of the frames contained */ -+ * note: compatible with legacy mode -+ * @return : decompressed size of the frames contained */ - unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) - { -- unsigned long long totalDstSize = 0; -+ U64 totalDstSize = 0; - - while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { - U32 const magicNumber = MEM_readLE32(src); - - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); -- if (ZSTD_isError(skippableSize)) { -- return ZSTD_CONTENTSIZE_ERROR; -- } -+ if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR; - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; -@@ -602,17 +633,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) - continue; - } - -- { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); -- if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; -+ { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize); -+ if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs; - -- /* check for overflow */ -- if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; -- totalDstSize += ret; -+ if (U64_MAX - totalDstSize < fcs) -+ return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */ -+ totalDstSize += fcs; - } -+ /* skip to next frame */ - { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); -- if (ZSTD_isError(frameSrcSize)) { -- return ZSTD_CONTENTSIZE_ERROR; -- } -+ if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR; -+ assert(frameSrcSize <= srcSize); - - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; -@@ -676,13 +707,13 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) - return frameSizeInfo; - } - --static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) -+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) - { - ZSTD_frameSizeInfo frameSizeInfo; - ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - - -- if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) -+ if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) - && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); - assert(ZSTD_isError(frameSizeInfo.compressedSize) || -@@ -696,7 +727,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize - ZSTD_frameHeader zfh; - - /* Extract Frame Header */ -- { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); -+ { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format); - if (ZSTD_isError(ret)) - return ZSTD_errorFrameSizeInfo(ret); - if (ret > 0) -@@ -730,23 +761,26 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize - ip += 4; - } - -+ frameSizeInfo.nbBlocks = nbBlocks; - frameSizeInfo.compressedSize = (size_t)(ip - ipstart); - frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) - ? zfh.frameContentSize -- : nbBlocks * zfh.blockSizeMax; -+ : (unsigned long long)nbBlocks * zfh.blockSizeMax; - return frameSizeInfo; - } - } - -+static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) { -+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format); -+ return frameSizeInfo.compressedSize; -+} -+ - /* ZSTD_findFrameCompressedSize() : -- * compatible with legacy mode -- * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame -- * `srcSize` must be at least as large as the frame contained -- * @return : the compressed size of the frame starting at `src` */ -+ * See docs in zstd.h -+ * Note: compatible with legacy mode */ - size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) - { -- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); -- return frameSizeInfo.compressedSize; -+ return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1); - } - - /* ZSTD_decompressBound() : -@@ -760,7 +794,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) - unsigned long long bound = 0; - /* Iterate over each frame */ - while (srcSize > 0) { -- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); -+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); - size_t const compressedSize = frameSizeInfo.compressedSize; - unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; - if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) -@@ -773,6 +807,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) - return bound; - } - -+size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) -+{ -+ size_t margin = 0; -+ unsigned maxBlockSize = 0; -+ -+ /* Iterate over each frame */ -+ while (srcSize > 0) { -+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); -+ size_t const compressedSize = frameSizeInfo.compressedSize; -+ unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; -+ ZSTD_frameHeader zfh; -+ -+ FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), ""); -+ if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) -+ return ERROR(corruption_detected); -+ -+ if (zfh.frameType == ZSTD_frame) { -+ /* Add the frame header to our margin */ -+ margin += zfh.headerSize; -+ /* Add the checksum to our margin */ -+ margin += zfh.checksumFlag ? 4 : 0; -+ /* Add 3 bytes per block */ -+ margin += 3 * frameSizeInfo.nbBlocks; -+ -+ /* Compute the max block size */ -+ maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax); -+ } else { -+ assert(zfh.frameType == ZSTD_skippableFrame); -+ /* Add the entire skippable frame size to our margin. */ -+ margin += compressedSize; -+ } -+ -+ assert(srcSize >= compressedSize); -+ src = (const BYTE*)src + compressedSize; -+ srcSize -= compressedSize; -+ } -+ -+ /* Add the max block size back to the margin. */ -+ margin += maxBlockSize; -+ -+ return margin; -+} - - /*-************************************************************* - * Frame decoding -@@ -856,6 +932,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; - } - -+ /* Shrink the blockSizeMax if enabled */ -+ if (dctx->maxBlockSizeParam != 0) -+ dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam); -+ - /* Loop on each block */ - while (1) { - BYTE* oBlockEnd = oend; -@@ -888,7 +968,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - switch(blockProperties.blockType) - { - case bt_compressed: -- decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); -+ assert(dctx->isFrameDecompression == 1); -+ decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming); - break; - case bt_raw : - /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ -@@ -901,12 +982,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } -- -- if (ZSTD_isError(decodedSize)) return decodedSize; -- if (dctx->validateChecksum) -+ FORWARD_IF_ERROR(decodedSize, "Block decompression failure"); -+ DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize); -+ if (dctx->validateChecksum) { - xxh64_update(&dctx->xxhState, op, decodedSize); -- if (decodedSize != 0) -+ } -+ if (decodedSize) /* support dst = NULL,0 */ { - op += decodedSize; -+ } - assert(ip != NULL); - ip += cBlockSize; - remainingSrcSize -= cBlockSize; -@@ -930,12 +1013,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - } - ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); - /* Allow caller to get size read */ -+ DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr); - *srcPtr = ip; - *srcSizePtr = remainingSrcSize; - return (size_t)(op-ostart); - } - --static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, -+static -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR -+size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, -@@ -955,17 +1041,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, - while (srcSize >= ZSTD_startingInputLength(dctx->format)) { - - -- { U32 const magicNumber = MEM_readLE32(src); -- DEBUGLOG(4, "reading magic number %08X (expecting %08X)", -- (unsigned)magicNumber, ZSTD_MAGICNUMBER); -+ if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { -+ U32 const magicNumber = MEM_readLE32(src); -+ DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { -+ /* skippable frame detected : skip it */ - size_t const skippableSize = readSkippableFrameSize(src, srcSize); -- FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); -+ FORWARD_IF_ERROR(skippableSize, "invalid skippable frame"); - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; -- continue; -+ continue; /* check next frame */ - } } - - if (ddict) { -@@ -1061,8 +1148,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr - size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } - - /* -- * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, -- * we allow taking a partial block as the input. Currently only raw uncompressed blocks can -+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we -+ * allow taking a partial block as the input. Currently only raw uncompressed blocks can - * be streamed. - * - * For blocks that can be streamed, this allows us to reduce the latency until we produce -@@ -1181,7 +1268,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c - { - case bt_compressed: - DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); -- rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); -+ assert(dctx->isFrameDecompression == 1); -+ rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_raw : -@@ -1250,6 +1338,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c - case ZSTDds_decodeSkippableHeader: - assert(src != NULL); - assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); -+ assert(dctx->format != ZSTD_f_zstd1_magicless); - ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ - dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ - dctx->stage = ZSTDds_skipFrame; -@@ -1262,7 +1351,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c - - default: - assert(0); /* impossible */ -- RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ -+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ - } - } - -@@ -1303,11 +1392,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - /* in minimal huffman, we always use X1 variants */ - size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, -- workspace, workspaceSize); -+ workspace, workspaceSize, /* flags */ 0); - #else - size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, (size_t)(dictEnd - dictPtr), -- workspace, workspaceSize); -+ workspace, workspaceSize, /* flags */ 0); - #endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); - dictPtr += hSize; -@@ -1403,10 +1492,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) - dctx->prefixStart = NULL; - dctx->virtualStart = NULL; - dctx->dictEnd = NULL; -- dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ -+ dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - dctx->bType = bt_reserved; -+ dctx->isFrameDecompression = 1; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; -@@ -1465,7 +1555,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) - * This could for one of the following reasons : - * - The frame does not require a dictionary (most common case). - * - The frame was built with dictID intentionally removed. -- * Needed dictionary is a hidden information. -+ * Needed dictionary is a hidden piece of information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, frame header could not be decoded. - * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. -@@ -1474,7 +1564,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) - * ZSTD_getFrameHeader(), which will provide a more precise error code. */ - unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) - { -- ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; -+ ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 }; - size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); - if (ZSTD_isError(hError)) return 0; - return zfp.dictID; -@@ -1581,7 +1671,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di - size_t ZSTD_initDStream(ZSTD_DStream* zds) - { - DEBUGLOG(4, "ZSTD_initDStream"); -- return ZSTD_initDStream_usingDDict(zds, NULL); -+ FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), ""); -+ FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), ""); -+ return ZSTD_startingInputLength(zds->format); - } - - /* ZSTD_initDStream_usingDDict() : -@@ -1589,6 +1681,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) - * this function cannot fail */ - size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) - { -+ DEBUGLOG(4, "ZSTD_initDStream_usingDDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); - return ZSTD_startingInputLength(dctx->format); -@@ -1599,6 +1692,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) - * this function cannot fail */ - size_t ZSTD_resetDStream(ZSTD_DStream* dctx) - { -+ DEBUGLOG(4, "ZSTD_resetDStream"); - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); - return ZSTD_startingInputLength(dctx->format); - } -@@ -1670,6 +1764,15 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) - bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; - bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; - return bounds; -+ case ZSTD_d_disableHuffmanAssembly: -+ bounds.lowerBound = 0; -+ bounds.upperBound = 1; -+ return bounds; -+ case ZSTD_d_maxBlockSize: -+ bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; -+ bounds.upperBound = ZSTD_BLOCKSIZE_MAX; -+ return bounds; -+ - default:; - } - bounds.error = ERROR(parameter_unsupported); -@@ -1710,6 +1813,12 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value - case ZSTD_d_refMultipleDDicts: - *value = (int)dctx->refMultipleDDicts; - return 0; -+ case ZSTD_d_disableHuffmanAssembly: -+ *value = (int)dctx->disableHufAsm; -+ return 0; -+ case ZSTD_d_maxBlockSize: -+ *value = dctx->maxBlockSizeParam; -+ return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); -@@ -1743,6 +1852,14 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value - } - dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; - return 0; -+ case ZSTD_d_disableHuffmanAssembly: -+ CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); -+ dctx->disableHufAsm = value != 0; -+ return 0; -+ case ZSTD_d_maxBlockSize: -+ if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value); -+ dctx->maxBlockSizeParam = value; -+ return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); -@@ -1754,6 +1871,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) - || (reset == ZSTD_reset_session_and_parameters) ) { - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; -+ dctx->isFrameDecompression = 1; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { -@@ -1770,11 +1888,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) - return ZSTD_sizeof_DCtx(dctx); - } - --size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -+static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax) - { -- size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); -- /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ -- unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); -+ size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax); -+ /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block -+ * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing -+ * the block at the beginning of the output buffer, and maintain a full window. -+ * -+ * We need another blockSize worth of buffer so that we can store split -+ * literals at the end of the block without overwriting the extDict window. -+ */ -+ unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2); - unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); - size_t const minRBSize = (size_t) neededSize; - RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, -@@ -1782,6 +1906,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long - return minRBSize; - } - -+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -+{ -+ return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX); -+} -+ - size_t ZSTD_estimateDStreamSize(size_t windowSize) - { - size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); -@@ -1918,7 +2047,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - if (zds->refMultipleDDicts && zds->ddictSet) { - ZSTD_DCtx_selectFrameDDict(zds); - } -- DEBUGLOG(5, "header size : %u", (U32)hSize); - if (ZSTD_isError(hSize)) { - return hSize; /* error */ - } -@@ -1932,6 +2060,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - zds->lhSize += remainingInput; - } - input->pos = input->size; -+ /* check first few bytes */ -+ FORWARD_IF_ERROR( -+ ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format), -+ "First few bytes detected incorrect" ); -+ /* return hint input size */ - return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - assert(ip != NULL); -@@ -1943,14 +2076,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && zds->fParams.frameType != ZSTD_skippableFrame - && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { -- size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); -+ size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format); - if (cSize <= (size_t)(iend-istart)) { - /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); - if (ZSTD_isError(decompressedSize)) return decompressedSize; -- DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") -+ DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()"); -+ assert(istart != NULL); - ip = istart + cSize; -- op += decompressedSize; -+ op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ - zds->expected = 0; - zds->streamStage = zdss_init; - someMoreWork = 0; -@@ -1969,7 +2103,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - -- if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ -+ if (zds->format == ZSTD_f_zstd1 -+ && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); - zds->stage = ZSTDds_skipFrame; - } else { -@@ -1985,11 +2120,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge, ""); -+ if (zds->maxBlockSizeParam != 0) -+ zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam); - - /* Adapt buffer sizes to frame header instructions */ - { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered -- ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) -+ ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax) - : 0; - - ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); -@@ -2034,6 +2171,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - } - if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); -+ assert(ip != NULL); - ip += neededInSize; - /* Function modifies the stage so we must break */ - break; -@@ -2048,7 +2186,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t loadedSize; - /* At this point we shouldn't be decompressing a block that we can stream. */ -- assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); -+ assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip))); - if (isSkipFrame) { - loadedSize = MIN(toLoad, (size_t)(iend-ip)); - } else { -@@ -2057,8 +2195,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); - } -- ip += loadedSize; -- zds->inPos += loadedSize; -+ if (loadedSize != 0) { -+ /* ip may be NULL */ -+ ip += loadedSize; -+ zds->inPos += loadedSize; -+ } - if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ - - /* decode loaded input */ -@@ -2068,14 +2209,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - break; - } - case zdss_flush: -- { size_t const toFlushSize = zds->outEnd - zds->outStart; -+ { -+ size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); -- op += flushedSize; -+ -+ op = op ? op + flushedSize : op; -+ - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->streamStage = zdss_read; - if ( (zds->outBuffSize < zds->fParams.frameContentSize) -- && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { -+ && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { - DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", - (int)(zds->outBuffSize - zds->outStart), - (U32)zds->fParams.blockSizeMax); -@@ -2089,7 +2233,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - - default: - assert(0); /* impossible */ -- RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ -+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ - } } - - /* result */ -@@ -2102,8 +2246,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB - if ((ip==istart) && (op==ostart)) { /* no forward progress */ - zds->noForwardProgress ++; - if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { -- RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); -- RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); -+ RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, ""); -+ RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, ""); - assert(0); - } - } else { -@@ -2140,11 +2284,17 @@ size_t ZSTD_decompressStream_simpleArgs ( - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos) - { -- ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; -- ZSTD_inBuffer input = { src, srcSize, *srcPos }; -- /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ -- size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); -- *dstPos = output.pos; -- *srcPos = input.pos; -- return cErr; -+ ZSTD_outBuffer output; -+ ZSTD_inBuffer input; -+ output.dst = dst; -+ output.size = dstCapacity; -+ output.pos = *dstPos; -+ input.src = src; -+ input.size = srcSize; -+ input.pos = *srcPos; -+ { size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); -+ *dstPos = output.pos; -+ *srcPos = input.pos; -+ return cErr; -+ } - } -diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c -index c1913b8e7c89..9fe9a12c8a2c 100644 ---- a/lib/zstd/decompress/zstd_decompress_block.c -+++ b/lib/zstd/decompress/zstd_decompress_block.c -@@ -1,5 +1,6 @@ -+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -20,12 +21,12 @@ - #include "../common/mem.h" /* low level memory routines */ - #define FSE_STATIC_LINKING_ONLY - #include "../common/fse.h" --#define HUF_STATIC_LINKING_ONLY - #include "../common/huf.h" - #include "../common/zstd_internal.h" - #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ - #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ - #include "zstd_decompress_block.h" -+#include "../common/bits.h" /* ZSTD_highbit32 */ - - /*_******************************************************* - * Macros -@@ -51,6 +52,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } - * Block decoding - ***************************************************************/ - -+static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) -+{ -+ size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; -+ assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); -+ return blockSizeMax; -+} -+ - /*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ - size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, -@@ -73,41 +81,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, - const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) - { -- if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) -- { -- /* room for litbuffer to fit without read faulting */ -- dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; -+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); -+ assert(litSize <= blockSizeMax); -+ assert(dctx->isFrameDecompression || streaming == not_streaming); -+ assert(expectedWriteSize <= blockSizeMax); -+ if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { -+ /* If we aren't streaming, we can just put the literals after the output -+ * of the current block. We don't need to worry about overwriting the -+ * extDict of our window, because it doesn't exist. -+ * So if we have space after the end of the block, just put it there. -+ */ -+ dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH; - dctx->litBufferEnd = dctx->litBuffer + litSize; - dctx->litBufferLocation = ZSTD_in_dst; -- } -- else if (litSize > ZSTD_LITBUFFEREXTRASIZE) -- { -- /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ -+ } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { -+ /* Literals fit entirely within the extra buffer, put them there to avoid -+ * having to split the literals. -+ */ -+ dctx->litBuffer = dctx->litExtraBuffer; -+ dctx->litBufferEnd = dctx->litBuffer + litSize; -+ dctx->litBufferLocation = ZSTD_not_in_dst; -+ } else { -+ assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); -+ /* Literals must be split between the output block and the extra lit -+ * buffer. We fill the extra lit buffer with the tail of the literals, -+ * and put the rest of the literals at the end of the block, with -+ * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. -+ * This MUST not write more than our maxBlockSize beyond dst, because in -+ * streaming mode, that could overwrite part of our extDict window. -+ */ - if (splitImmediately) { - /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ - dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; - dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; -- } -- else { -- /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ -+ } else { -+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ - dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; - dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; - } - dctx->litBufferLocation = ZSTD_split; -- } -- else -- { -- /* fits entirely within litExtraBuffer, so no split is necessary */ -- dctx->litBuffer = dctx->litExtraBuffer; -- dctx->litBufferEnd = dctx->litBuffer + litSize; -- dctx->litBufferLocation = ZSTD_not_in_dst; -+ assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize); - } - } - --/* Hidden declaration for fullbench */ --size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, -- const void* src, size_t srcSize, -- void* dst, size_t dstCapacity, const streaming_operation streaming); - /*! ZSTD_decodeLiteralsBlock() : - * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored - * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current -@@ -116,7 +132,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - * - * @return : nb of bytes read from src (< srcSize ) - * note : symbol not declared but exposed for fullbench */ --size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, -+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ - void* dst, size_t dstCapacity, const streaming_operation streaming) - { -@@ -125,6 +141,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - - { const BYTE* const istart = (const BYTE*) src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); -+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); - - switch(litEncType) - { -@@ -134,13 +151,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - ZSTD_FALLTHROUGH; - - case set_compressed: -- RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); -+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3"); - { size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - size_t hufSuccess; -- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); -+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); -+ int const flags = 0 -+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) -+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ -@@ -164,7 +184,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - break; - } - RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); -+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); -+ if (!singleStream) -+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, -+ "Not enough literals (%zu) for the 4-streams mode (min %u)", -+ litSize, MIN_LITERALS_FOR_4_STREAMS); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); - RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); - ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); -@@ -176,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - - if (litEncType==set_repeat) { - if (singleStream) { -- hufSuccess = HUF_decompress1X_usingDTable_bmi2( -+ hufSuccess = HUF_decompress1X_usingDTable( - dctx->litBuffer, litSize, istart+lhSize, litCSize, -- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); -+ dctx->HUFptr, flags); - } else { -- hufSuccess = HUF_decompress4X_usingDTable_bmi2( -+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); -+ hufSuccess = HUF_decompress4X_usingDTable( - dctx->litBuffer, litSize, istart+lhSize, litCSize, -- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); -+ dctx->HUFptr, flags); - } - } else { - if (singleStream) { -@@ -190,26 +215,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - hufSuccess = HUF_decompress1X_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, -- sizeof(dctx->workspace)); -+ sizeof(dctx->workspace), flags); - #else -- hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( -+ hufSuccess = HUF_decompress1X1_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, -- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); -+ sizeof(dctx->workspace), flags); - #endif - } else { -- hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( -+ hufSuccess = HUF_decompress4X_hufOnly_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, -- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); -+ sizeof(dctx->workspace), flags); - } - } - if (dctx->litBufferLocation == ZSTD_split) - { -+ assert(litSize > ZSTD_LITBUFFEREXTRASIZE); - ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); - ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); - dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; - dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; -+ assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax); - } - - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); -@@ -224,7 +251,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - case set_basic: - { size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; -- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); -+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ -@@ -237,11 +264,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - break; - case 3: - lhSize = 3; -+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3"); - litSize = MEM_readLE24(istart) >> 4; - break; - } - - RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); - RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); - ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ -@@ -270,7 +299,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - case set_rle: - { U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; -- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); -+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ -@@ -279,16 +308,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - break; - case 1: - lhSize = 2; -+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; -+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); - litSize = MEM_readLE24(istart) >> 4; -- RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); - break; - } - RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); -+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); - RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); - ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); - if (dctx->litBufferLocation == ZSTD_split) -@@ -310,6 +340,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - } - } - -+/* Hidden declaration for fullbench */ -+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, -+ const void* src, size_t srcSize, -+ void* dst, size_t dstCapacity); -+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, -+ const void* src, size_t srcSize, -+ void* dst, size_t dstCapacity) -+{ -+ dctx->isFrameDecompression = 0; -+ return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); -+} -+ - /* Default FSE distribution tables. - * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions -@@ -506,14 +548,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, - for (i = 8; i < n; i += 8) { - MEM_write64(spread + pos + i, sv); - } -- pos += n; -+ assert(n>=0); -+ pos += (size_t)n; - } - } - /* Now we spread those positions across the table. -- * The benefit of doing it in two stages is that we avoid the the -+ * The benefit of doing it in two stages is that we avoid the - * variable size inner loop, which caused lots of branch misses. - * Now we can run through all the positions without any branch misses. -- * We unroll the loop twice, since that is what emperically worked best. -+ * We unroll the loop twice, since that is what empirically worked best. - */ - { - size_t position = 0; -@@ -540,7 +583,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, - for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ -+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */ - } } - assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } -@@ -551,7 +594,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, - for (u=0; u 0x7F) { - if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); -@@ -681,8 +719,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - } - *nbSeqPtr = nbSeq; - -+ if (nbSeq == 0) { -+ /* No sequence : section ends immediately */ -+ RETURN_ERROR_IF(ip != iend, corruption_detected, -+ "extraneous data present in the Sequences section"); -+ return (size_t)(ip - istart); -+ } -+ - /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ -+ RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */ - { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); -@@ -829,7 +875,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt - /* ZSTD_safecopyDstBeforeSrc(): - * This version allows overlap with dst before src, or handles the non-overlap case with dst after src - * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ --static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { -+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) { - ptrdiff_t const diff = op - ip; - BYTE* const oend = op + length; - -@@ -858,6 +904,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length - * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). - */ - FORCE_NOINLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -905,6 +952,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op, - * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. - */ - FORCE_NOINLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, - BYTE* const oend, const BYTE* const oend_w, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -950,6 +998,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, - } - - HINT_INLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -964,6 +1013,11 @@ size_t ZSTD_execSequence(BYTE* op, - - assert(op != NULL /* Precondition */); - assert(oend_w < oend /* No underflow */); -+ -+#if defined(__aarch64__) -+ /* prefetch sequence starting from match that will be used for copy later */ -+ PREFETCH_L1(match); -+#endif - /* Handle edge cases in a slow path: - * - Read beyond end of literals - * - Match end is within WILDCOPY_OVERLIMIT of oend -@@ -1043,6 +1097,7 @@ size_t ZSTD_execSequence(BYTE* op, - } - - HINT_INLINE -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, - BYTE* const oend, const BYTE* const oend_w, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, -@@ -1154,7 +1209,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 - } - - /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum -- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) -+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32 - * bits before reloading. This value is the maximum number of bytes we read - * after reloading when we are decoding long offsets. - */ -@@ -1165,13 +1220,37 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 - - typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; - -+/* -+ * ZSTD_decodeSequence(): -+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets -+ * only used in 32-bit mode -+ * @return : Sequence (litL + matchL + offset) -+ */ - FORCE_INLINE_TEMPLATE seq_t --ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) -+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) - { - seq_t seq; -+ /* -+ * ZSTD_seqSymbol is a 64 bits wide structure. -+ * It can be loaded in one operation -+ * and its fields extracted by simply shifting or bit-extracting on aarch64. -+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh -+ * operations that cause performance drop. This can be avoided by using this -+ * ZSTD_memcpy hack. -+ */ -+#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) -+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; -+ ZSTD_seqSymbol* const llDInfo = &llDInfoS; -+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; -+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; -+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); -+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); -+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); -+#else - const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; - const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; - const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; -+#endif - seq.matchLength = mlDInfo->baseValue; - seq.litLength = llDInfo->baseValue; - { U32 const ofBase = ofDInfo->baseValue; -@@ -1186,28 +1265,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) - U32 const llnbBits = llDInfo->nbBits; - U32 const mlnbBits = mlDInfo->nbBits; - U32 const ofnbBits = ofDInfo->nbBits; -+ -+ assert(llBits <= MaxLLBits); -+ assert(mlBits <= MaxMLBits); -+ assert(ofBits <= MaxOff); - /* - * As gcc has better branch and block analyzers, sometimes it is only -- * valuable to mark likelyness for clang, it gives around 3-4% of -+ * valuable to mark likeliness for clang, it gives around 3-4% of - * performance. - */ - - /* sequence */ - { size_t offset; -- #if defined(__clang__) -- if (LIKELY(ofBits > 1)) { -- #else - if (ofBits > 1) { -- #endif - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); -- assert(ofBits <= MaxOff); -+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); -+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { -- U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); -+ /* Always read extra bits, this keeps the logic simple, -+ * avoids branches, and avoids accidentally reading 0 bits. -+ */ -+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32; - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); -- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); -- assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ -+ offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); -@@ -1224,7 +1306,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) - } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; -- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ -+ temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; -@@ -1232,11 +1314,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) - seq.offset = offset; - } - -- #if defined(__clang__) -- if (UNLIKELY(mlBits > 0)) -- #else - if (mlBits > 0) -- #endif - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) -@@ -1246,11 +1324,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - -- #if defined(__clang__) -- if (UNLIKELY(llBits > 0)) -- #else - if (llBits > 0) -- #endif - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) -@@ -1259,17 +1333,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - -- ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ -- ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ -- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ -- ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ -+ if (!isLastSeq) { -+ /* don't update FSE state for last Sequence */ -+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ -+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ -+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ -+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ -+ BIT_reloadDStream(&seqState->DStream); -+ } - } - - return seq; - } - --#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION --MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) -+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -+#if DEBUGLEVEL >= 1 -+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) - { - size_t const windowSize = dctx->fParams.windowSize; - /* No dictionary used. */ -@@ -1283,30 +1362,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix - /* Dictionary is active. */ - return 1; - } -+#endif - --MEM_STATIC void ZSTD_assertValidSequence( -+static void ZSTD_assertValidSequence( - ZSTD_DCtx const* dctx, - BYTE const* op, BYTE const* oend, - seq_t const seq, - BYTE const* prefixStart, BYTE const* virtualStart) - { - #if DEBUGLEVEL >= 1 -- size_t const windowSize = dctx->fParams.windowSize; -- size_t const sequenceSize = seq.litLength + seq.matchLength; -- BYTE const* const oLitEnd = op + seq.litLength; -- DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", -- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); -- assert(op <= oend); -- assert((size_t)(oend - op) >= sequenceSize); -- assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); -- if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { -- size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); -- /* Offset must be within the dictionary. */ -- assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); -- assert(seq.offset <= windowSize + dictSize); -- } else { -- /* Offset must be within our window. */ -- assert(seq.offset <= windowSize); -+ if (dctx->isFrameDecompression) { -+ size_t const windowSize = dctx->fParams.windowSize; -+ size_t const sequenceSize = seq.litLength + seq.matchLength; -+ BYTE const* const oLitEnd = op + seq.litLength; -+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", -+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); -+ assert(op <= oend); -+ assert((size_t)(oend - op) >= sequenceSize); -+ assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); -+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { -+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); -+ /* Offset must be within the dictionary. */ -+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); -+ assert(seq.offset <= windowSize + dictSize); -+ } else { -+ /* Offset must be within our window. */ -+ assert(seq.offset <= windowSize); -+ } - } - #else - (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; -@@ -1322,23 +1404,21 @@ DONT_VECTORIZE - ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE*)dst; -- BYTE* const oend = ostart + maxDstSize; -+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize); - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* litBufferEnd = dctx->litBufferEnd; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); -- DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); -- (void)frame; -+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); - -- /* Regen sequences */ -+ /* Literals are split between internal buffer & output buffer */ - if (nbSeq) { - seqState_t seqState; - dctx->fseEntropy = 1; -@@ -1357,8 +1437,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - BIT_DStream_completed < BIT_DStream_overflow); - - /* decompress without overrunning litPtr begins */ -- { -- seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -+ { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */ - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression -@@ -1420,27 +1499,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - #endif - - /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ -- for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { -- size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); -+ for ( ; nbSeq; nbSeq--) { -+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); -+ if (litPtr + sequence.litLength > dctx->litBufferEnd) break; -+ { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -- assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ assert(!ZSTD_isError(oneSeqSize)); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif -- if (UNLIKELY(ZSTD_isError(oneSeqSize))) -- return oneSeqSize; -- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); -- op += oneSeqSize; -- if (UNLIKELY(!--nbSeq)) -- break; -- BIT_reloadDStream(&(seqState.DStream)); -- sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -- } -+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) -+ return oneSeqSize; -+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); -+ op += oneSeqSize; -+ } } -+ DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); - - /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ - if (nbSeq > 0) { - const size_t leftoverLit = dctx->litBufferEnd - litPtr; -- if (leftoverLit) -- { -+ DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); -+ if (leftoverLit) { - RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); - ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); - sequence.litLength -= leftoverLit; -@@ -1449,24 +1527,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - litPtr = dctx->litExtraBuffer; - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; -- { -- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); -+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; -- if (--nbSeq) -- BIT_reloadDStream(&(seqState.DStream)); - } -+ nbSeq--; - } - } - -- if (nbSeq > 0) /* there is remaining lit from extra buffer */ -- { -+ if (nbSeq > 0) { -+ /* there is remaining lit from extra buffer */ - - #if defined(__x86_64__) - __asm__(".p2align 6"); -@@ -1485,35 +1561,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - # endif - #endif - -- for (; ; ) { -- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -+ for ( ; nbSeq ; nbSeq--) { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; -- if (UNLIKELY(!--nbSeq)) -- break; -- BIT_reloadDStream(&(seqState.DStream)); - } - } - - /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); -- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); -+ DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); -+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ -- if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ -- { -- size_t const lastLLSize = litBufferEnd - litPtr; -+ if (dctx->litBufferLocation == ZSTD_split) { -+ /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ -+ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); -+ DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize); - RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); - if (op != NULL) { - ZSTD_memmove(op, litPtr, lastLLSize); -@@ -1523,15 +1598,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; - } -- { size_t const lastLLSize = litBufferEnd - litPtr; -+ /* copy last literals from internal buffer */ -+ { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); -+ DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize); - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - ZSTD_memcpy(op, litPtr, lastLLSize); - op += lastLLSize; -- } -- } -+ } } - -- return op-ostart; -+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); -+ return (size_t)(op - ostart); - } - - FORCE_INLINE_TEMPLATE size_t -@@ -1539,21 +1616,19 @@ DONT_VECTORIZE - ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE*)dst; -- BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; -+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); - const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); -- DEBUGLOG(5, "ZSTD_decompressSequences_body"); -- (void)frame; -+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); - - /* Regen sequences */ - if (nbSeq) { -@@ -1568,11 +1643,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - assert(dst != NULL); - -- ZSTD_STATIC_ASSERT( -- BIT_DStream_unfinished < BIT_DStream_completed && -- BIT_DStream_endOfBuffer < BIT_DStream_completed && -- BIT_DStream_completed < BIT_DStream_overflow); -- - #if defined(__x86_64__) - __asm__(".p2align 6"); - __asm__("nop"); -@@ -1587,73 +1657,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, - # endif - #endif - -- for ( ; ; ) { -- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -+ for ( ; nbSeq ; nbSeq--) { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); - #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; -- if (UNLIKELY(!--nbSeq)) -- break; -- BIT_reloadDStream(&(seqState.DStream)); - } - - /* check if reached exact end */ -- DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); -- RETURN_ERROR_IF(nbSeq, corruption_detected, ""); -- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); -+ assert(nbSeq == 0); -+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ -- { size_t const lastLLSize = litEnd - litPtr; -+ { size_t const lastLLSize = (size_t)(litEnd - litPtr); -+ DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize); - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - ZSTD_memcpy(op, litPtr, lastLLSize); - op += lastLLSize; -- } -- } -+ } } - -- return op-ostart; -+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); -+ return (size_t)(op - ostart); - } - - static size_t - ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - - static size_t - ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - --FORCE_INLINE_TEMPLATE size_t --ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, -+FORCE_INLINE_TEMPLATE -+ -+size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, - const BYTE* const prefixStart, const BYTE* const dictEnd) - { - prefetchPos += sequence.litLength; - { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; -- const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. -- * No consequence though : memory address is only used for prefetching, not for dereferencing */ -+ /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. -+ * No consequence though : memory address is only used for prefetching, not for dereferencing */ -+ const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset); - PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ - } - return prefetchPos + sequence.matchLength; -@@ -1668,20 +1735,18 @@ ZSTD_decompressSequencesLong_body( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE*)dst; -- BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; -+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize); - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* litBufferEnd = dctx->litBufferEnd; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); -- (void)frame; - - /* Regen sequences */ - if (nbSeq) { -@@ -1706,20 +1771,17 @@ ZSTD_decompressSequencesLong_body( - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ -- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) -- { -+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) { - /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ - const size_t leftoverLit = dctx->litBufferEnd - litPtr; - if (leftoverLit) -@@ -1732,26 +1794,26 @@ ZSTD_decompressSequencesLong_body( - litPtr = dctx->litExtraBuffer; - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; -- oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); -+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -- assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); -+ assert(!ZSTD_isError(oneSeqSize)); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); - #endif -- if (ZSTD_isError(oneSeqSize)) return oneSeqSize; -+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - -- prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); -- sequences[seqNb & STORED_SEQS_MASK] = sequence; -- op += oneSeqSize; -- } -+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); -+ sequences[seqNb & STORED_SEQS_MASK] = sequence; -+ op += oneSeqSize; -+ } } - else - { - /* lit buffer is either wholly contained in first or second split, or not split at all*/ -- oneSeqSize = dctx->litBufferLocation == ZSTD_split ? -+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? - ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : - ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); - #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - -@@ -1760,17 +1822,15 @@ ZSTD_decompressSequencesLong_body( - op += oneSeqSize; - } - } -- RETURN_ERROR_IF(seqNblitBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) -- { -+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) { - const size_t leftoverLit = dctx->litBufferEnd - litPtr; -- if (leftoverLit) -- { -+ if (leftoverLit) { - RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); - ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); - sequence->litLength -= leftoverLit; -@@ -1779,11 +1839,10 @@ ZSTD_decompressSequencesLong_body( - litPtr = dctx->litExtraBuffer; - litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; - dctx->litBufferLocation = ZSTD_not_in_dst; -- { -- size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); -+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); - #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; -@@ -1796,7 +1855,7 @@ ZSTD_decompressSequencesLong_body( - ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); - #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); -- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); -+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); - #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; -@@ -1808,8 +1867,7 @@ ZSTD_decompressSequencesLong_body( - } - - /* last literal segment */ -- if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ -- { -+ if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */ - size_t const lastLLSize = litBufferEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); - if (op != NULL) { -@@ -1827,17 +1885,16 @@ ZSTD_decompressSequencesLong_body( - } - } - -- return op-ostart; -+ return (size_t)(op - ostart); - } - - static size_t - ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - -@@ -1851,20 +1908,18 @@ DONT_VECTORIZE - ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - static BMI2_TARGET_ATTRIBUTE size_t - DONT_VECTORIZE - ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -@@ -1873,10 +1928,9 @@ static BMI2_TARGET_ATTRIBUTE size_t - ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { -- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - -@@ -1886,37 +1940,34 @@ typedef size_t (*ZSTD_decompressSequences_t)( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame); -+ const ZSTD_longOffset_e isLongOffset); - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - static size_t - ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - DEBUGLOG(5, "ZSTD_decompressSequences"); - #if DYNAMIC_BMI2 - if (ZSTD_DCtx_get_bmi2(dctx)) { -- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif -- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - static size_t - ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); - #if DYNAMIC_BMI2 - if (ZSTD_DCtx_get_bmi2(dctx)) { -- return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif -- return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -@@ -1931,69 +1982,114 @@ static size_t - ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, -- const ZSTD_longOffset_e isLongOffset, -- const int frame) -+ const ZSTD_longOffset_e isLongOffset) - { - DEBUGLOG(5, "ZSTD_decompressSequencesLong"); - #if DYNAMIC_BMI2 - if (ZSTD_DCtx_get_bmi2(dctx)) { -- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif -- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); - } - #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - -+/* -+ * @returns The total size of the history referenceable by zstd, including -+ * both the prefix and the extDict. At @p op any offset larger than this -+ * is invalid. -+ */ -+static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart) -+{ -+ return (size_t)(op - virtualStart); -+} -+ -+typedef struct { -+ unsigned longOffsetShare; -+ unsigned maxNbAdditionalBits; -+} ZSTD_OffsetInfo; - --#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ -- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) --/* ZSTD_getLongOffsetsShare() : -+/* ZSTD_getOffsetInfo() : - * condition : offTable must be valid - * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) -- * compared to maximum possible of (1< 22) total += 1; -+ ZSTD_OffsetInfo info = {0, 0}; -+ /* If nbSeq == 0, then the offTable is uninitialized, but we have -+ * no sequences, so both values should be 0. -+ */ -+ if (nbSeq != 0) { -+ const void* ptr = offTable; -+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; -+ const ZSTD_seqSymbol* table = offTable + 1; -+ U32 const max = 1 << tableLog; -+ U32 u; -+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); -+ -+ assert(max <= (1 << OffFSELog)); /* max not too large */ -+ for (u=0; u 22) info.longOffsetShare += 1; -+ } -+ -+ assert(tableLog <= OffFSELog); -+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */ - } - -- assert(tableLog <= OffFSELog); -- total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ -+ return info; -+} - -- return total; -+/* -+ * @returns The maximum offset we can decode in one read of our bitstream, without -+ * reloading more bits in the middle of the offset bits read. Any offsets larger -+ * than this must use the long offset decoder. -+ */ -+static size_t ZSTD_maxShortOffset(void) -+{ -+ if (MEM_64bits()) { -+ /* We can decode any offset without reloading bits. -+ * This might change if the max window size grows. -+ */ -+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); -+ return (size_t)-1; -+ } else { -+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1. -+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits. -+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset. -+ */ -+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1; -+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM; -+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN); -+ return maxOffset; -+ } - } --#endif - - size_t - ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, const int frame, const streaming_operation streaming) -+ const void* src, size_t srcSize, const streaming_operation streaming) - { /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; -- /* isLongOffset must be true if there are long offsets. -- * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. -- * We don't expect that to be the case in 64-bit mode. -- * In block mode, window size is not known, so we have to be conservative. -- * (note: but it could be evaluated from current-lowLimit) -- */ -- ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); -- DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); -- -- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); -+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize); -+ -+ /* Note : the wording of the specification -+ * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). -+ * This generally does not happen, as it makes little sense, -+ * since an uncompressed block would feature same size and have no decompression cost. -+ * Also, note that decoder from reference libzstd before < v1.5.4 -+ * would consider this edge case as an error. -+ * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) -+ * for broader compatibility with the deployed ecosystem of zstd decoders */ -+ RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, ""); - - /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); -- DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); -+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; -@@ -2001,6 +2097,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - - /* Build Decoding Tables */ - { -+ /* Compute the maximum block size, which must also work when !frame and fParams are unset. -+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. -+ */ -+ size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); -+ size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart); -+ /* isLongOffset must be true if there are long offsets. -+ * Offsets are long if they are larger than ZSTD_maxShortOffset(). -+ * We don't expect that to be the case in 64-bit mode. -+ * -+ * We check here to see if our history is large enough to allow long offsets. -+ * If it isn't, then we can't possible have (valid) long offsets. If the offset -+ * is invalid, then it is okay to read it incorrectly. -+ * -+ * If isLongOffsets is true, then we will later check our decoding table to see -+ * if it is even possible to generate long offsets. -+ */ -+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset())); - /* These macros control at build-time which decompressor implementation - * we use. If neither is defined, we do some inspection and dispatch at - * runtime. -@@ -2008,6 +2121,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - int usePrefetchDecoder = dctx->ddictIsCold; -+#else -+ /* Set to 1 to avoid computing offset info if we don't need to. -+ * Otherwise this value is ignored. -+ */ -+ int usePrefetchDecoder = 1; - #endif - int nbSeq; - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); -@@ -2015,40 +2133,55 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - ip += seqHSize; - srcSize -= seqHSize; - -- RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); -+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); -+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall, -+ "invalid dst"); - --#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ -- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -- if ( !usePrefetchDecoder -- && (!frame || (dctx->fParams.windowSize > (1<<24))) -- && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ -- U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); -- U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ -- usePrefetchDecoder = (shareLongOffsets >= minShare); -+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder, -+ * compute information about the share of long offsets, and the maximum nbAdditionalBits. -+ * NOTE: could probably use a larger nbSeq limit -+ */ -+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) { -+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq); -+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) { -+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small -+ * enough, then we know it is impossible to have too long an offset in this block, so we can -+ * use the regular offset decoder. -+ */ -+ isLongOffset = ZSTD_lo_isRegularOffset; -+ } -+ if (!usePrefetchDecoder) { -+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ -+ usePrefetchDecoder = (info.longOffsetShare >= minShare); -+ } - } --#endif - - dctx->ddictIsCold = 0; - - #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -- if (usePrefetchDecoder) -+ if (usePrefetchDecoder) { -+#else -+ (void)usePrefetchDecoder; -+ { - #endif - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); - #endif -+ } - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - /* else */ - if (dctx->litBufferLocation == ZSTD_split) -- return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); - else -- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); - #endif - } - } - - -+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR - void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) - { - if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ -@@ -2060,13 +2193,24 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) - } - - --size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, -- void* dst, size_t dstCapacity, -- const void* src, size_t srcSize) -+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) - { - size_t dSize; -+ dctx->isFrameDecompression = 0; - ZSTD_checkContinuity(dctx, dst, dstCapacity); -- dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); -+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); -+ FORWARD_IF_ERROR(dSize, ""); - dctx->previousDstEnd = (char*)dst + dSize; - return dSize; - } -+ -+ -+/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */ -+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize) -+{ -+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize); -+} -diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h -index 3d2d57a5d25a..becffbd89364 100644 ---- a/lib/zstd/decompress/zstd_decompress_block.h -+++ b/lib/zstd/decompress/zstd_decompress_block.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -47,7 +48,7 @@ typedef enum { - */ - size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, -- const void* src, size_t srcSize, const int frame, const streaming_operation streaming); -+ const void* src, size_t srcSize, const streaming_operation streaming); - - /* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) -@@ -64,5 +65,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - unsigned tableLog, void* wksp, size_t wkspSize, - int bmi2); - -+/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */ -+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, -+ void* dst, size_t dstCapacity, -+ const void* src, size_t srcSize); -+ - - #endif /* ZSTD_DEC_BLOCK_H */ -diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h -index 98102edb6a83..0f02526be774 100644 ---- a/lib/zstd/decompress/zstd_decompress_internal.h -+++ b/lib/zstd/decompress/zstd_decompress_internal.h -@@ -1,5 +1,6 @@ -+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Yann Collet, Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = { - - #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) - #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) -+#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 - - typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ -- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ -+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; - U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; - } ZSTD_entropyDTables_t; -@@ -152,6 +154,7 @@ struct ZSTD_DCtx_s - size_t litSize; - size_t rleSize; - size_t staticSize; -+ int isFrameDecompression; - #if DYNAMIC_BMI2 != 0 - int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - #endif -@@ -164,6 +167,8 @@ struct ZSTD_DCtx_s - ZSTD_dictUses_e dictUses; - ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ - ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ -+ int disableHufAsm; -+ int maxBlockSizeParam; - - /* streaming */ - ZSTD_dStreamStage streamStage; -diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h -index a06ca187aab5..8a47eb2a4514 100644 ---- a/lib/zstd/decompress_sources.h -+++ b/lib/zstd/decompress_sources.h -@@ -1,6 +1,6 @@ - /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c -index 22686e367e6f..466828e35752 100644 ---- a/lib/zstd/zstd_common_module.c -+++ b/lib/zstd/zstd_common_module.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -24,9 +24,6 @@ EXPORT_SYMBOL_GPL(HUF_readStats_wksp); - EXPORT_SYMBOL_GPL(ZSTD_isError); - EXPORT_SYMBOL_GPL(ZSTD_getErrorName); - EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); --EXPORT_SYMBOL_GPL(ZSTD_customMalloc); --EXPORT_SYMBOL_GPL(ZSTD_customCalloc); --EXPORT_SYMBOL_GPL(ZSTD_customFree); - - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("Zstd Common"); -diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c -index 04e1b5c01d9b..8ecf43226af2 100644 ---- a/lib/zstd/zstd_compress_module.c -+++ b/lib/zstd/zstd_compress_module.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c -index f4ed952ed485..7d31518e9d5a 100644 ---- a/lib/zstd/zstd_decompress_module.c -+++ b/lib/zstd/zstd_decompress_module.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause - /* -- * Copyright (c) Facebook, Inc. -+ * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the -@@ -77,7 +77,7 @@ EXPORT_SYMBOL(zstd_init_dstream); - - size_t zstd_reset_dstream(zstd_dstream *dstream) - { -- return ZSTD_resetDStream(dstream); -+ return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only); - } - EXPORT_SYMBOL(zstd_reset_dstream); - --- -2.46.0.rc1 diff --git a/patches/cachyos/0003-nvidia.patch b/patches/cachyos/0003-nvidia.patch deleted file mode 100644 index a06229e..0000000 --- a/patches/cachyos/0003-nvidia.patch +++ /dev/null @@ -1,761 +0,0 @@ -From eb7e13baaf58cdede50c060633bdb14bf9603a54 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 3 Jun 2024 15:33:26 +0200 -Subject: [PATCH] Fix 6.10 NVIDIA - -Co Authord by Laio Oriel Seman - -Signed-off-by: Peter Jung ---- - include/linux/mm.h | 4 ++++ - mm/memory.c | 37 ++++++++++++++++++++++++++++++++++++- - mm/nommu.c | 21 +++++++++++++++++++++ - 3 files changed, 61 insertions(+), 1 deletion(-) - -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 9849dfda44d43..adc5a252da02e 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -2438,6 +2438,10 @@ int - copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); - int follow_pte(struct vm_area_struct *vma, unsigned long address, - pte_t **ptepp, spinlock_t **ptlp); -+int follow_pfn(struct vm_area_struct *vma, unsigned long address, -+ unsigned long *pfn); -+//int follow_phys(struct vm_area_struct *vma, unsigned long address, -+// unsigned int flags, unsigned long *prot, resource_size_t *phys); - int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write); - -diff --git a/mm/memory.c b/mm/memory.c -index 0f47a533014e4..0401d10b3d824 100644 ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -5962,7 +5962,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) - * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore - * should be taken for read. - * -- * This function must not be used to modify PTE content. -+ * KVM uses this function. While it is arguably less bad than ``follow_pfn``, -+ * it is not a good general-purpose API. - * - * Return: zero on success, -ve otherwise. - */ -@@ -6012,6 +6013,40 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, - } - EXPORT_SYMBOL_GPL(follow_pte); - -+/** -+ * follow_pfn - look up PFN at a user virtual address -+ * @vma: memory mapping -+ * @address: user virtual address -+ * @pfn: location to store found PFN -+ * -+ * Only IO mappings and raw PFN mappings are allowed. -+ * -+ * This function does not allow the caller to read the permissions -+ * of the PTE. Do not use it. -+ * -+ * Return: zero and the pfn at @pfn on success, -ve otherwise. -+ */ -+int follow_pfn(struct vm_area_struct *vma, unsigned long address, -+ unsigned long *pfn) -+{ -+ int ret = -EINVAL; -+ spinlock_t *ptl; -+ pte_t *ptep; -+ -+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) -+ return ret; -+ -+ //ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); -+ ret = follow_pte(vma, address, &ptep, &ptl); -+ -+ if (ret) -+ return ret; -+ *pfn = pte_pfn(ptep_get(ptep)); -+ pte_unmap_unlock(ptep, ptl); -+ return 0; -+} -+EXPORT_SYMBOL(follow_pfn); -+ - #ifdef CONFIG_HAVE_IOREMAP_PROT - /** - * generic_access_phys - generic implementation for iomem mmap access -diff --git a/mm/nommu.c b/mm/nommu.c -index 7296e775e04e2..8e0deb733bfef 100644 ---- a/mm/nommu.c -+++ b/mm/nommu.c -@@ -110,6 +110,27 @@ unsigned int kobjsize(const void *objp) - return page_size(page); - } - -+/** -+ * follow_pfn - look up PFN at a user virtual address -+ * @vma: memory mapping -+ * @address: user virtual address -+ * @pfn: location to store found PFN -+ * -+ * Only IO mappings and raw PFN mappings are allowed. -+ * -+ * Returns zero and the pfn at @pfn on success, -ve otherwise. -+ */ -+int follow_pfn(struct vm_area_struct *vma, unsigned long address, -+ unsigned long *pfn) -+{ -+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) -+ return -EINVAL; -+ -+ *pfn = address >> PAGE_SHIFT; -+ return 0; -+} -+EXPORT_SYMBOL(follow_pfn); -+ - void vfree(const void *addr) - { - kfree(addr); --- -2.45.1 - ---- a/kernel/nvidia-drm/nvidia-drm-drv.c -+++ b/kernel/nvidia-drm/nvidia-drm-drv.c -@@ -480,6 +480,22 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags) - return -ENODEV; - } - -+#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) -+ /* -+ * If fbdev is enabled, take modeset ownership now before other DRM clients -+ * can take master (and thus NVKMS ownership). -+ */ -+ if (nv_drm_fbdev_module_param) { -+ if (!nvKms->grabOwnership(pDevice)) { -+ nvKms->freeDevice(pDevice); -+ NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership"); -+ return -EBUSY; -+ } -+ -+ nv_dev->hasFramebufferConsole = NV_TRUE; -+ } -+#endif -+ - mutex_lock(&nv_dev->lock); - - /* Set NvKmsKapiDevice */ -@@ -590,6 +606,15 @@ static void __nv_drm_unload(struct drm_device *dev) - return; - } - -+ /* Release modeset ownership if fbdev is enabled */ -+ -+#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) -+ if (nv_dev->hasFramebufferConsole) { -+ drm_atomic_helper_shutdown(dev); -+ nvKms->releaseOwnership(nv_dev->pDevice); -+ } -+#endif -+ - cancel_delayed_work_sync(&nv_dev->hotplug_event_work); - mutex_lock(&nv_dev->lock); - -@@ -1768,14 +1793,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info) - } - - #if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) -- if (nv_drm_fbdev_module_param && -- drm_core_check_feature(dev, DRIVER_MODESET)) { -- -- if (!nvKms->grabOwnership(nv_dev->pDevice)) { -- NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership"); -- goto failed_grab_ownership; -- } -- -+ if (nv_dev->hasFramebufferConsole) { - if (bus_is_pci) { - struct pci_dev *pdev = to_pci_dev(device); - -@@ -1786,8 +1804,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info) - #endif - } - drm_fbdev_generic_setup(dev, 32); -- -- nv_dev->hasFramebufferConsole = NV_TRUE; - } - #endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */ - -@@ -1798,12 +1814,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info) - - return; /* Success */ - --#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) --failed_grab_ownership: -- -- drm_dev_unregister(dev); --#endif -- - failed_drm_register: - - nv_drm_dev_free(dev); -@@ -1870,12 +1880,6 @@ void nv_drm_remove_devices(void) - struct nv_drm_device *next = dev_list->next; - struct drm_device *dev = dev_list->dev; - --#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) -- if (dev_list->hasFramebufferConsole) { -- drm_atomic_helper_shutdown(dev); -- nvKms->releaseOwnership(dev_list->pDevice); -- } --#endif - drm_dev_unregister(dev); - nv_drm_dev_free(dev); - - From 612740b11c9645e0f0240b3ca5908ef225763bc8 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Thu, 27 Jun 2024 19:46:51 +0200 -Subject: [PATCH] gsp-stutter-fix - -We've been having reports of stutter issues in 555 releases related to GSP enablement. On the proprietary driver, NVreg_EnableGpuFirmware=0 makes them go away; on the open driver that's not an option. - -So far, we've identified two possible causes here. One is fixed by commit 674c009 below. The other we can't fix/workaround in the kernel modules and requires usermode changes, but commit 8c1c49b should tell us if that path is actually being hit or not. - -I've also augmented the logs captured by nvidia-bug-report.sh with some of the info that we found severely lacking in the bug reports so far. - -My hope is that folks that have experienced these stutter issues can take these patches, try to reproduce the issue and report back with their findings (and their nvidia-bug-report logs). Many thanks in advance to anyone willing to go the extra mile(s) for us here! - -We've unfortunately missed beta2 / 555.52 with this stuff (security fixes can't wait), but here it is early so we don't have to wait on the next release. ---- - kernel-open/nvidia/nv.c | 10 + - src/nvidia/arch/nvalloc/unix/include/osapi.h | 6 - - src/nvidia/arch/nvalloc/unix/src/escape.c | 46 ---- - src/nvidia/arch/nvalloc/unix/src/osapi.c | 230 ++++++++----------- - src/nvidia/exports_link_command.txt | 1 - - src/nvidia/src/kernel/disp/disp_sw.c | 23 ++ - 6 files changed, 132 insertions(+), 184 deletions(-) - -diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c -index 99792de9..ccef3f29 100644 ---- a/kernel-open/nvidia/nv.c -+++ b/kernel-open/nvidia/nv.c -@@ -4042,6 +4042,16 @@ int NV_API_CALL nv_get_event( - nvidia_event_t *nvet; - unsigned long eflags; - -+ // -+ // Note that the head read/write is not atomic when done outside of the -+ // spinlock, so this might not be a valid pointer at all. But if we read -+ // NULL here that means that the value indeed was NULL and we can bail -+ // early since there's no events. Otherwise, we have to do a proper read -+ // under a spinlock. -+ // -+ if (nvlfp->event_data_head == NULL) -+ return NV_ERR_GENERIC; -+ - NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags); - - nvet = nvlfp->event_data_head; -diff --git a/src/nvidia/arch/nvalloc/unix/include/osapi.h b/src/nvidia/arch/nvalloc/unix/include/osapi.h -index f91e3aa5..640155e9 100644 ---- a/src/nvidia/arch/nvalloc/unix/include/osapi.h -+++ b/src/nvidia/arch/nvalloc/unix/include/osapi.h -@@ -121,9 +121,6 @@ NvBool RmGpuHasIOSpaceEnabled (nv_state_t *); - void RmFreeUnusedClients (nv_state_t *, nv_file_private_t *); - NV_STATUS RmIoctl (nv_state_t *, nv_file_private_t *, NvU32, void *, NvU32); - --NV_STATUS RmAllocOsEvent (NvHandle, nv_file_private_t *, NvU32); --NV_STATUS RmFreeOsEvent (NvHandle, NvU32); -- - void RmI2cAddGpuPorts(nv_state_t *); - - NV_STATUS RmInitX86EmuState(OBJGPU *); -@@ -141,9 +138,6 @@ int amd_msr_c0011022_incompatible(OBJOS *); - - NV_STATUS rm_get_adapter_status (nv_state_t *, NvU32 *); - --NV_STATUS rm_alloc_os_event (NvHandle, nv_file_private_t *, NvU32); --NV_STATUS rm_free_os_event (NvHandle, NvU32); --NV_STATUS rm_get_event_data (nv_file_private_t *, NvP64, NvU32 *); - void rm_client_free_os_events (NvHandle); - - NV_STATUS rm_create_mmap_context (NvHandle, NvHandle, NvHandle, NvP64, NvU64, NvU64, NvU32, NvU32); -diff --git a/src/nvidia/arch/nvalloc/unix/src/escape.c b/src/nvidia/arch/nvalloc/unix/src/escape.c -index de099513..1046b19f 100644 ---- a/src/nvidia/arch/nvalloc/unix/src/escape.c -+++ b/src/nvidia/arch/nvalloc/unix/src/escape.c -@@ -677,52 +677,6 @@ NV_STATUS RmIoctl( - break; - } - -- case NV_ESC_ALLOC_OS_EVENT: -- { -- nv_ioctl_alloc_os_event_t *pApi = data; -- -- if (dataSize != sizeof(nv_ioctl_alloc_os_event_t)) -- { -- rmStatus = NV_ERR_INVALID_ARGUMENT; -- goto done; -- } -- -- pApi->Status = rm_alloc_os_event(pApi->hClient, -- nvfp, -- pApi->fd); -- break; -- } -- -- case NV_ESC_FREE_OS_EVENT: -- { -- nv_ioctl_free_os_event_t *pApi = data; -- -- if (dataSize != sizeof(nv_ioctl_free_os_event_t)) -- { -- rmStatus = NV_ERR_INVALID_ARGUMENT; -- goto done; -- } -- -- pApi->Status = rm_free_os_event(pApi->hClient, pApi->fd); -- break; -- } -- -- case NV_ESC_RM_GET_EVENT_DATA: -- { -- NVOS41_PARAMETERS *pApi = data; -- -- if (dataSize != sizeof(NVOS41_PARAMETERS)) -- { -- rmStatus = NV_ERR_INVALID_ARGUMENT; -- goto done; -- } -- -- pApi->status = rm_get_event_data(nvfp, -- pApi->pEvent, -- &pApi->MoreEvents); -- break; -- } -- - case NV_ESC_STATUS_CODE: - { - nv_state_t *pNv; -diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c -index fd312466..51249750 100644 ---- a/src/nvidia/arch/nvalloc/unix/src/osapi.c -+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c -@@ -25,6 +25,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,39 @@ static void free_os_events( - portSyncSpinlockRelease(nv->event_spinlock); - } - -+static NV_STATUS get_os_event_data( -+ nv_file_private_t *nvfp, -+ NvP64 pEvent, -+ NvU32 *MoreEvents -+) -+{ -+ nv_event_t nv_event; -+ NvUnixEvent *nv_unix_event; -+ NV_STATUS status; -+ -+ status = os_alloc_mem((void**)&nv_unix_event, sizeof(NvUnixEvent)); -+ if (status != NV_OK) -+ return status; -+ -+ status = nv_get_event(nvfp, &nv_event, MoreEvents); -+ if (status != NV_OK) -+ { -+ status = NV_ERR_OPERATING_SYSTEM; -+ goto done; -+ } -+ -+ os_mem_set(nv_unix_event, 0, sizeof(NvUnixEvent)); -+ nv_unix_event->hObject = nv_event.hObject; -+ nv_unix_event->NotifyIndex = nv_event.index; -+ nv_unix_event->info32 = nv_event.info32; -+ nv_unix_event->info16 = nv_event.info16; -+ -+ status = os_memcpy_to_user(NvP64_VALUE(pEvent), nv_unix_event, sizeof(NvUnixEvent)); -+done: -+ os_free_mem(nv_unix_event); -+ return status; -+} -+ - void rm_client_free_os_events( - NvHandle client - ) -@@ -482,6 +516,12 @@ static NV_STATUS allocate_os_event( - goto done; - } - -+ new_event->hParent = hParent; -+ new_event->nvfp = nvfp; -+ new_event->fd = fd; -+ new_event->active = NV_TRUE; -+ new_event->refcount = 0; -+ - portSyncSpinlockAcquire(nv->event_spinlock); - for (event = nv->event_list; event; event = event->next) - { -@@ -496,45 +536,26 @@ static NV_STATUS allocate_os_event( - - new_event->next = nv->event_list; - nv->event_list = new_event; -+ nvfp->bCleanupRmapi = NV_TRUE; - portSyncSpinlockRelease(nv->event_spinlock); - - done: - if (status == NV_OK) - { -- new_event->hParent = hParent; -- new_event->nvfp = nvfp; -- new_event->fd = fd; -- new_event->active = NV_TRUE; -- new_event->refcount = 0; -- -- nvfp->bCleanupRmapi = NV_TRUE; -- - NV_PRINTF(LEVEL_INFO, "allocated OS event:\n"); - NV_PRINTF(LEVEL_INFO, " hParent: 0x%x\n", hParent); - NV_PRINTF(LEVEL_INFO, " fd: %d\n", fd); - } - else - { -+ NV_PRINTF(LEVEL_ERROR, "failed to allocate OS event: 0x%08x\n", status); -+ status = NV_ERR_INSUFFICIENT_RESOURCES; - portMemFree(new_event); - } - - return status; - } - --NV_STATUS RmAllocOsEvent( -- NvHandle hParent, -- nv_file_private_t *nvfp, -- NvU32 fd --) --{ -- if (NV_OK != allocate_os_event(hParent, nvfp, fd)) -- { -- NV_PRINTF(LEVEL_ERROR, "failed to allocate OS event\n"); -- return NV_ERR_INSUFFICIENT_RESOURCES; -- } -- return NV_OK; --} -- - static NV_STATUS free_os_event( - NvHandle hParent, - NvU32 fd -@@ -585,18 +606,6 @@ static NV_STATUS free_os_event( - return result; - } - --NV_STATUS RmFreeOsEvent( -- NvHandle hParent, -- NvU32 fd --) --{ -- if (NV_OK != free_os_event(hParent, fd)) -- { -- return NV_ERR_INVALID_EVENT; -- } -- return NV_OK; --} -- - static void RmExecuteWorkItem( - void *pWorkItem - ) -@@ -656,40 +665,6 @@ done: - portMemFree((void *)pWi); - } - --static NV_STATUS RmGetEventData( -- nv_file_private_t *nvfp, -- NvP64 pEvent, -- NvU32 *MoreEvents, -- NvBool bUserModeArgs --) --{ -- NV_STATUS RmStatus; -- NvUnixEvent *pKernelEvent = NULL; -- nv_event_t nv_event; -- RMAPI_PARAM_COPY paramCopy; -- -- RmStatus = nv_get_event(nvfp, &nv_event, MoreEvents); -- if (RmStatus != NV_OK) -- return NV_ERR_OPERATING_SYSTEM; -- -- // setup for access to client's parameters -- RMAPI_PARAM_COPY_INIT(paramCopy, pKernelEvent, pEvent, 1, sizeof(NvUnixEvent)); -- RmStatus = rmapiParamsAcquire(¶mCopy, bUserModeArgs); -- if (RmStatus != NV_OK) -- return NV_ERR_OPERATING_SYSTEM; -- -- pKernelEvent->hObject = nv_event.hObject; -- pKernelEvent->NotifyIndex = nv_event.index; -- pKernelEvent->info32 = nv_event.info32; -- pKernelEvent->info16 = nv_event.info16; -- -- // release client buffer access, with copyout as needed -- if (rmapiParamsRelease(¶mCopy) != NV_OK) -- return NV_ERR_OPERATING_SYSTEM; -- -- return NV_OK; --} -- - static NV_STATUS RmAccessRegistry( - NvHandle hClient, - NvHandle hObject, -@@ -2738,16 +2713,68 @@ NV_STATUS NV_API_CALL rm_ioctl( - NvU32 dataSize - ) - { -- NV_STATUS rmStatus; -+ NV_STATUS rmStatus = NV_OK; - THREAD_STATE_NODE threadState; - void *fp; - - NV_ENTER_RM_RUNTIME(sp,fp); -- threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); - -- rmStatus = RmIoctl(pNv, nvfp, Command, pData, dataSize); -+ // -+ // Some ioctls are handled entirely inside the OS layer and don't need to -+ // suffer the overhead of calling into RM core. -+ // -+ switch (Command) -+ { -+ case NV_ESC_ALLOC_OS_EVENT: -+ { -+ nv_ioctl_alloc_os_event_t *pApi = pData; -+ -+ if (dataSize != sizeof(nv_ioctl_alloc_os_event_t)) -+ { -+ rmStatus = NV_ERR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ pApi->Status = allocate_os_event(pApi->hClient, nvfp, pApi->fd); -+ break; -+ } -+ case NV_ESC_FREE_OS_EVENT: -+ { -+ nv_ioctl_free_os_event_t *pApi = pData; -+ -+ if (dataSize != sizeof(nv_ioctl_free_os_event_t)) -+ { -+ rmStatus = NV_ERR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ pApi->Status = free_os_event(pApi->hClient, pApi->fd); -+ break; -+ } -+ case NV_ESC_RM_GET_EVENT_DATA: -+ { -+ NVOS41_PARAMETERS *pApi = pData; -+ -+ if (dataSize != sizeof(NVOS41_PARAMETERS)) -+ { -+ rmStatus = NV_ERR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ pApi->status = get_os_event_data(nvfp, -+ pApi->pEvent, -+ &pApi->MoreEvents); -+ break; -+ } -+ default: -+ { -+ threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); -+ rmStatus = RmIoctl(pNv, nvfp, Command, pData, dataSize); -+ threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); -+ break; -+ } -+ } - -- threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); - NV_EXIT_RM_RUNTIME(sp,fp); - - return rmStatus; -@@ -2882,65 +2909,6 @@ void NV_API_CALL rm_unbind_lock( - NV_EXIT_RM_RUNTIME(sp,fp); - } - --NV_STATUS rm_alloc_os_event( -- NvHandle hClient, -- nv_file_private_t *nvfp, -- NvU32 fd --) --{ -- NV_STATUS RmStatus; -- -- // LOCK: acquire API lock -- if ((RmStatus = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_EVENT)) == NV_OK) -- { -- RmStatus = RmAllocOsEvent(hClient, nvfp, fd); -- -- // UNLOCK: release API lock -- rmapiLockRelease(); -- } -- -- return RmStatus; --} -- --NV_STATUS rm_free_os_event( -- NvHandle hClient, -- NvU32 fd --) --{ -- NV_STATUS RmStatus; -- -- // LOCK: acquire API lock -- if ((RmStatus = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_EVENT)) == NV_OK) -- { -- RmStatus = RmFreeOsEvent(hClient, fd); -- -- // UNLOCK: release API lock -- rmapiLockRelease(); -- } -- -- return RmStatus; --} -- --NV_STATUS rm_get_event_data( -- nv_file_private_t *nvfp, -- NvP64 pEvent, -- NvU32 *MoreEvents --) --{ -- NV_STATUS RmStatus; -- -- // LOCK: acquire API lock -- if ((RmStatus = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_EVENT)) == NV_OK) -- { -- RmStatus = RmGetEventData(nvfp, pEvent, MoreEvents, NV_TRUE); -- -- // UNLOCK: release API lock -- rmapiLockRelease(); -- } -- -- return RmStatus; --} -- - NV_STATUS NV_API_CALL rm_read_registry_dword( - nvidia_stack_t *sp, - nv_state_t *nv, -diff --git a/src/nvidia/exports_link_command.txt b/src/nvidia/exports_link_command.txt -index de3cf86d..b92185de 100644 ---- a/src/nvidia/exports_link_command.txt -+++ b/src/nvidia/exports_link_command.txt -@@ -1,6 +1,5 @@ - --undefined=rm_disable_adapter - --undefined=rm_execute_work_item ----undefined=rm_free_os_event - --undefined=rm_free_private_state - --undefined=rm_cleanup_file_private - --undefined=rm_unbind_lock -diff --git a/src/nvidia/src/kernel/disp/disp_sw.c b/src/nvidia/src/kernel/disp/disp_sw.c -index 03ce58f7..bb7396b6 100644 ---- a/src/nvidia/src/kernel/disp/disp_sw.c -+++ b/src/nvidia/src/kernel/disp/disp_sw.c -@@ -141,8 +141,15 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill - NvBool bFound = NV_FALSE; - NV_STATUS status; - -+#define PRINT_INTERVAL 3600 // At 60Hz, this will emit about once per minute. -+ - if (flags & F_SEMAPHORE_ADDR_VALID) - { -+ static NvU64 counter; -+ if ((++counter % PRINT_INTERVAL) == 0) { -+ NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - F_SEMAPHORE_ADDR_VALID = %llu\n", counter); -+ } -+ - bFound = CliGetDmaMappingInfo(RES_GET_CLIENT(pDevice), - RES_GET_HANDLE(pDevice), - vaSpace, -@@ -154,6 +161,11 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill - } - else if (flags & F_SEMAPHORE_RELEASE) - { -+ static NvU64 counter; -+ if ((++counter % PRINT_INTERVAL) == 0) { -+ NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - F_SEMAPHORE_RELEASE = %llu\n", counter); -+ } -+ - status = semaphoreFillGPUVA(pGpu, - pDevice, - vaSpace, -@@ -165,6 +177,11 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill - } - else if (flags & F_NOTIFIER_FILL) - { -+ static NvU64 counter; -+ if ((++counter % PRINT_INTERVAL) == 0) { -+ NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - F_NOTIFIER_FILL = %llu\n", counter); -+ } -+ - status = notifyFillNotifierGPUVA(pGpu, - pDevice, - vaSpace, -@@ -175,5 +192,11 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill - NV9072_NOTIFIERS_NOTIFY_ON_VBLANK /* Index */); - return status; - } -+ else { -+ static NvU64 counter; -+ if ((++counter % PRINT_INTERVAL) == 0) { -+ NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - ??? 0x%08x = %llu\n", flags, counter); -+ } -+ } - return NV9072_NOTIFICATION_STATUS_DONE_SUCCESS; - } --- -2.45.2 - - --- a/nvidia-drm/nvidia-drm-linux.c -+++ b/nvidia-drm/nvidia-drm-linux.c -@@ -31,13 +31,13 @@ - - MODULE_PARM_DESC( - modeset, -- "Enable atomic kernel modesetting (1 = enable, 0 = disable (default))"); -+ "Enable atomic kernel modesetting (1 = enable (default), 0 = disable)"); - module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400); - - #if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) - MODULE_PARM_DESC( - fbdev, -- "Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)"); -+ "Create a framebuffer device (1 = enable (default), 0 = disable) (EXPERIMENTAL)"); - module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400); - #endif - ---- a/nvidia-drm/nvidia-drm-os-interface.c -+++ b/nvidia-drm/nvidia-drm-os-interface.c -@@ -41,8 +41,8 @@ - #include - #endif - --bool nv_drm_modeset_module_param = false; --bool nv_drm_fbdev_module_param = false; -+bool nv_drm_modeset_module_param = true; -+bool nv_drm_fbdev_module_param = true; - - void *nv_drm_calloc(size_t nmemb, size_t size) - { - ---- a/src/nvidia-modeset/Makefile -+++ b/src/nvidia-modeset/Makefile -@@ -142,6 +142,7 @@ ifeq ($(TARGET_ARCH),x86_64) - CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -fno-jump-tables) - CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch=thunk-extern) - CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch-register) -+ CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mharden-sls=all) - endif - - CFLAGS += $(CONDITIONAL_CFLAGS) \ No newline at end of file diff --git a/patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch b/patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch deleted file mode 100644 index ecd0304..0000000 --- a/patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch +++ /dev/null @@ -1,258 +0,0 @@ -From 498e88ae626be4f523063c8a7027b4b02eca31d2 Mon Sep 17 00:00:00 2001 -From: GloriousEggroll -Date: Tue, 17 Jan 2023 12:08:46 -0700 -Subject: [PATCH] Allow to set custom USB pollrate for specific devices like - so: usbcore.interrupt_interval_override=045e:00db:16,1bcf:0005:1 - ---- - .../admin-guide/kernel-parameters.txt | 8 + - drivers/usb/core/config.c | 170 +++++++++++++++++- - drivers/usb/core/usb.c | 1 + - drivers/usb/core/usb.h | 1 + - 4 files changed, 179 insertions(+), 1 deletion(-) - -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index dbd26fde4..c9b8b80af 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -6552,6 +6552,14 @@ - delay after resetting its port); - Example: quirks=0781:5580:bk,0a5c:5834:gij - -+ usbcore.interrupt_interval_override= -+ [USB] A list of USB devices for which a different polling -+ interval than the default shall be used on all interrupt-type -+ endpoints. The format is VendorID:ProductID:interval, with -+ the vendor and product ids specified hexadecimally, and the -+ interval decimally in milliseconds. -+ Example: interrupt_interval_override=045e:00db:16,1bcf:0005:2 -+ - usbhid.mousepoll= - [USBHID] The interval which mice are to be polled at. - -diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c -index 48bc8a481..84bd550ad 100644 ---- a/drivers/usb/core/config.c -+++ b/drivers/usb/core/config.c -@@ -19,6 +19,149 @@ - #define USB_MAXCONFIG 8 /* Arbitrary limit */ - - -+/* A struct associated with the interrupt_interval_override module parameter, representing -+ an user's choice to force a specific interrupt interval upon all interrupt endpoints of -+ a certain device. */ -+struct interrupt_interval_override { -+ /* The vendor ID of the device of which the interrupt interval shall be overridden */ -+ u16 vendor; -+ /* The product ID of the device of which the interrupt interval shall be overridden */ -+ u16 product; -+ /* The new interval measured in milliseconds that shall be given to all endpoints of type interrupt on said device */ -+ unsigned int interval; -+}; -+ -+static DEFINE_MUTEX(interrupt_interval_override_mutex); -+static char interrupt_interval_override_param[128]; -+static struct interrupt_interval_override *interrupt_interval_override_list = NULL; -+static size_t interrupt_interval_override_count = 0; -+ -+static int interrupt_interval_override_param_set(const char *value, const struct kernel_param *kp) -+{ -+ const char *p; -+ unsigned short vendor, product; -+ unsigned int interval; -+ struct interrupt_interval_override* list; -+ struct interrupt_interval_override param; -+ size_t count, max_count, i, len; -+ int err, res; -+ -+ mutex_lock(&interrupt_interval_override_mutex); -+ -+ if (!value || !*value) { -+ /* Unset the current variable. */ -+ kfree(interrupt_interval_override_list); -+ interrupt_interval_override_list = NULL; -+ interrupt_interval_override_count = 0; -+ param_set_copystring(value, kp); /* Does not fail: the empty string is short enough to fit. */ -+ mutex_unlock(&interrupt_interval_override_mutex); -+ return 0; -+ } -+ -+ /* Compute an upper bound on the amount of entries we need. */ -+ for (max_count = 1, i = 0; value[i]; i++) { -+ if (value[i] == ',') -+ max_count++; -+ } -+ -+ /* Ensure we can allocate enough memory before overwriting the global variables. */ -+ list = kcalloc(max_count, -+ sizeof(struct interrupt_interval_override), -+ GFP_KERNEL); -+ -+ if (!list) { -+ mutex_unlock(&interrupt_interval_override_mutex); -+ return -ENOMEM; -+ } -+ -+ err = param_set_copystring(value, kp); -+ if (err) { -+ kfree(list); -+ mutex_unlock(&interrupt_interval_override_mutex); -+ return err; -+ } -+ -+ /* Parse the parameter. Example of a valid parameter: 045e:00db:16,1bcf:0005:2 */ -+ for (count = 0, p = (const char*)value; p && *p;) { -+ res = sscanf(p, "%hx:%hx:%d%zn", &vendor, &product, &interval, &len); -+ -+ /* Check whether all variables (vendor, product, interval, len) were assigned. -+ %zn does not increase the assignment count, so we need to check for value 3 instead of 4. -+ %zn does not consume input either, so setting len shouldn't fail if interval has been properly set. */ -+ if (res != 3) { -+ pr_warn("Error while parsing USB interrupt interval override parameter %s.\n", value); -+ break; -+ } -+ -+ param.vendor = (u16)vendor; -+ param.product = (u16)product; -+ param.interval = interval; -+ list[count++] = param; -+ -+ p += len; -+ if (*p == ',' && *(p+1) != '\0') { -+ p++; -+ continue; -+ } else if(*p == '\0' || (*p == '\n' && *(p+1) == '\0')) { -+ break; -+ } else { -+ pr_warn("Error while parsing USB interrupt interval override parameter %s.\n", value); -+ break; -+ } -+ } -+ -+ /* Overwrite the global variables with the local ones. */ -+ kfree(interrupt_interval_override_list); -+ interrupt_interval_override_list = list; -+ interrupt_interval_override_count = count; -+ mutex_unlock(&interrupt_interval_override_mutex); -+ return 0; -+} -+ -+static const struct kernel_param_ops interrupt_interval_override_param_ops = { -+ .set = interrupt_interval_override_param_set, -+ .get = param_get_string, -+}; -+ -+static struct kparam_string interrupt_interval_override_param_string = { -+ .maxlen = sizeof(interrupt_interval_override_param), -+ .string = interrupt_interval_override_param, -+}; -+ -+device_param_cb(interrupt_interval_override, -+ &interrupt_interval_override_param_ops, -+ &interrupt_interval_override_param_string, -+ 0644); -+MODULE_PARM_DESC(interrupt_interval_override, -+ "Override the polling interval of all interrupt-type endpoints of a specific USB" -+ " device by specifying interrupt_interval_override=vendorID:productID:interval."); -+ -+/* Given an USB device, this checks whether the user has specified they want to override the interrupt -+ polling interval on all interrupt-type endpoints of said device. -+ -+ This function returns the user-desired amount of milliseconds between interrupts on said endpoint. -+ If this function returns zero, the device-requested interrupt interval should be used. */ -+static unsigned int usb_check_interrupt_interval_override(struct usb_device* udev) -+{ -+ size_t i; -+ unsigned int res; -+ u16 vendor = le16_to_cpu(udev->descriptor.idVendor); -+ u16 product = le16_to_cpu(udev->descriptor.idProduct); -+ -+ mutex_lock(&interrupt_interval_override_mutex); -+ for (i = 0; i < interrupt_interval_override_count; i++) { -+ if (interrupt_interval_override_list[i].vendor == vendor -+ && interrupt_interval_override_list[i].product == product) { -+ -+ res = interrupt_interval_override_list[i].interval; -+ mutex_unlock(&interrupt_interval_override_mutex); -+ return res; -+ } -+ } -+ mutex_unlock(&interrupt_interval_override_mutex); -+ return 0; -+} -+ - static inline const char *plural(int n) - { - return (n == 1 ? "" : "s"); -@@ -261,7 +404,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, - struct usb_endpoint_descriptor *d; - struct usb_host_endpoint *endpoint; - int n, i, j, retval; -- unsigned int maxp; -+ unsigned int maxp, ival; - const unsigned short *maxpacket_maxes; - - d = (struct usb_endpoint_descriptor *) buffer; -@@ -386,6 +529,23 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, - endpoint->desc.bInterval = n; - } - -+ /* Override the interrupt polling interval if a module parameter tells us to do so. */ -+ if (usb_endpoint_xfer_int(d)) { -+ ival = usb_check_interrupt_interval_override(udev); -+ if (ival > 0) { -+ switch (udev->speed) { -+ case USB_SPEED_SUPER_PLUS: -+ case USB_SPEED_SUPER: -+ case USB_SPEED_HIGH: -+ endpoint->desc.bInterval = fls(ival) + 3; -+ break; -+ default: /* USB_SPEED_FULL or _LOW */ -+ endpoint->desc.bInterval = ival; -+ break; -+ } -+ } -+ } -+ - /* Some buggy low-speed devices have Bulk endpoints, which is - * explicitly forbidden by the USB spec. In an attempt to make - * them usable, we will try treating them as Interrupt endpoints. -@@ -1092,3 +1252,11 @@ int usb_get_bos_descriptor(struct usb_device *dev) - usb_release_bos_descriptor(dev); - return ret; - } -+ -+void usb_release_interrupt_interval_override_list(void) -+{ -+ mutex_lock(&interrupt_interval_override_mutex); -+ kfree(interrupt_interval_override_list); -+ interrupt_interval_override_list = NULL; -+ mutex_unlock(&interrupt_interval_override_mutex); -+} -diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c -index 11b15d7b3..ec52c6322 100644 ---- a/drivers/usb/core/usb.c -+++ b/drivers/usb/core/usb.c -@@ -1066,6 +1066,7 @@ static void __exit usb_exit(void) - return; - - usb_release_quirk_list(); -+ usb_release_interrupt_interval_override_list(); - usb_deregister_device_driver(&usb_generic_driver); - usb_major_cleanup(); - usb_deregister(&usbfs_driver); -diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h -index 82538daac..b6faa897c 100644 ---- a/drivers/usb/core/usb.h -+++ b/drivers/usb/core/usb.h -@@ -37,6 +37,7 @@ extern void usb_authorize_interface(struct usb_interface *); - extern void usb_detect_quirks(struct usb_device *udev); - extern void usb_detect_interface_quirks(struct usb_device *udev); - extern void usb_release_quirk_list(void); -+extern void usb_release_interrupt_interval_override_list(void); - extern bool usb_endpoint_is_ignored(struct usb_device *udev, - struct usb_host_interface *intf, - struct usb_endpoint_descriptor *epd); --- -2.39.0 - diff --git a/patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch b/patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch deleted file mode 100644 index dd8f961..0000000 --- a/patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 4b4ce124699c160925e5fdeb147a78f79d38351f Mon Sep 17 00:00:00 2001 -From: Simon May -Date: Sun, 19 Sep 2021 23:45:59 +0200 -Subject: [PATCH] Revert "PCI: Add a REBAR size quirk for Sapphire RX 5600 XT - Pulse" - -This reverts commit 907830b0fc9e374d00f3c83de5e426157b482c01. ---- - drivers/pci/pci.c | 9 +-------- - 1 file changed, 1 insertion(+), 8 deletions(-) - -diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c -index a607f277c..3174fa871 100644 ---- a/drivers/pci/pci.c -+++ b/drivers/pci/pci.c -@@ -3755,14 +3755,8 @@ u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar) - return 0; - - pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap); -- cap = FIELD_GET(PCI_REBAR_CAP_SIZES, cap); - -- /* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */ -- if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f && -- bar == 0 && cap == 0x700) -- return 0x3f00; -- -- return cap; -+ return (cap & PCI_REBAR_CAP_SIZES) >> 4; - } - EXPORT_SYMBOL(pci_rebar_get_possible_sizes); - --- -2.30.2 - diff --git a/patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch b/patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch deleted file mode 100644 index 6eac856..0000000 --- a/patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 7d86ca8db51f6b75b5c1470d6294c6f24221f560 Mon Sep 17 00:00:00 2001 -From: GloriousEggroll -Date: Mon, 30 Oct 2023 22:36:19 -0600 -Subject: [PATCH] Revert "nvme-pci: drop redundant - pci_enable_pcie_error_reporting()" - -This reverts commits: -1ad11eafc63ac16e667853bee4273879226d2d1b -7ec4b34be4234599cf1241ef807cdb7c3636f6fe -69b264df8a412820e98867dbab871c6526c5e5aa - ---- - drivers/nvme/host/pci.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c -index 3f0c9ee09a12..bc11bfe6f87a 100644 ---- a/drivers/nvme/host/pci.c -+++ b/drivers/nvme/host/pci.c -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -2537,6 +2538,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) - - nvme_map_cmb(dev); - -+ pci_enable_pcie_error_reporting(pdev); - pci_save_state(pdev); - - result = nvme_pci_configure_admin_queue(dev); -@@ -2601,8 +2603,10 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) - nvme_suspend_io_queues(dev); - nvme_suspend_queue(dev, 0); - pci_free_irq_vectors(pdev); -- if (pci_is_enabled(pdev)) -+ if (pci_is_enabled(pdev)) { -+ pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); -+ } - nvme_reap_pending_cqes(dev); - - nvme_cancel_tagset(&dev->ctrl); --- -2.41.0 -diff --git a/include/linux/aer.h b/include/linux/aer.h -index 29cc10220..94ce49a5f 100644 ---- a/include/linux/aer.h -+++ b/include/linux/aer.h -@@ -41,9 +41,20 @@ struct aer_capability_regs { - }; - - #if defined(CONFIG_PCIEAER) -+/* PCIe port driver needs this function to enable AER */ -+int pci_enable_pcie_error_reporting(struct pci_dev *dev); -+int pci_disable_pcie_error_reporting(struct pci_dev *dev); - int pci_aer_clear_nonfatal_status(struct pci_dev *dev); - int pcie_aer_is_native(struct pci_dev *dev); - #else -+static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) -+{ -+ return -EINVAL; -+} -+static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) -+{ -+ return -EINVAL; -+} - static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) - { - return -EINVAL; - -diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c -index 9c8fd69ae..0dc7be481 100644 ---- a/drivers/pci/pcie/aer.c -+++ b/drivers/pci/pcie/aer.c -@@ -231,7 +231,7 @@ int pcie_aer_is_native(struct pci_dev *dev) - } - EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); - --static int pci_enable_pcie_error_reporting(struct pci_dev *dev) -+int pci_enable_pcie_error_reporting(struct pci_dev *dev) - { - int rc; - -@@ -241,6 +241,19 @@ static int pci_enable_pcie_error_reporting(struct pci_dev *dev) - rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); - return pcibios_err_to_errno(rc); - } -+EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); -+ -+int pci_disable_pcie_error_reporting(struct pci_dev *dev) -+{ -+ int rc; -+ -+ if (!pcie_aer_is_native(dev)) -+ return -EIO; -+ -+ rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); -+ return pcibios_err_to_errno(rc); -+} -+EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); - - int pci_aer_clear_nonfatal_status(struct pci_dev *dev) - { diff --git a/patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch b/patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch deleted file mode 100644 index 26e3ab7..0000000 --- a/patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 9179080ffaaf1d438db6e0a5a37bdf8dafe233a6 Mon Sep 17 00:00:00 2001 -From: Thomas Crider -Date: Mon, 27 Nov 2023 16:13:13 -0500 -Subject: [PATCH] Set amdgpu.ppfeaturemask=0xffffffff as default - ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index e06009966..4e791eb8f 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -158,7 +158,7 @@ bool enforce_isolation; - * OverDrive(bit 14) disabled by default - * GFX DCS(bit 19) disabled by default - */ --uint amdgpu_pp_feature_mask = 0xfff7bfff; -+uint amdgpu_pp_feature_mask = 0xffffffff; - uint amdgpu_force_long_training; - int amdgpu_lbpw = -1; - int amdgpu_compute_multipipe = -1; --- -2.43.0 - diff --git a/patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch b/patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch deleted file mode 100644 index eb3cf54..0000000 --- a/patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch +++ /dev/null @@ -1,125 +0,0 @@ -Processors based on the Zen microarchitecture support IOPORT based deeper -C-states. The idle driver reads the acpi_gbl_FADT.xpm_timer_block.address -in the IOPORT based C-state exit path which is claimed to be a -"Dummy wait op" and has been around since ACPI introduction to Linux -dating back to Andy Grover's Mar 14, 2002 posting [1]. -The comment above the dummy operation was elaborated by Andreas Mohr back -in 2006 in commit b488f02156d3d ("ACPI: restore comment justifying 'extra' -P_LVLx access") [2] where the commit log claims: -"this dummy read was about: STPCLK# doesn't get asserted in time on -(some) chipsets, which is why we need to have a dummy I/O read to delay -further instruction processing until the CPU is fully stopped." - -However, sampling certain workloads with IBS on AMD Zen3 system shows -that a significant amount of time is spent in the dummy op, which -incorrectly gets accounted as C-State residency. A large C-State -residency value can prime the cpuidle governor to recommend a deeper -C-State during the subsequent idle instances, starting a vicious cycle, -leading to performance degradation on workloads that rapidly switch -between busy and idle phases. - -One such workload is tbench where a massive performance degradation can -be observed during certain runs. Following are some statistics gathered -by running tbench with 128 clients, on a dual socket (2 x 64C/128T) Zen3 -system with the baseline kernel, baseline kernel keeping C2 disabled, -and baseline kernel with this patch applied keeping C2 enabled: - -baseline kernel was tip:sched/core at -commit f3dd3f674555 ("sched: Remove the limitation of WF_ON_CPU on -wakelist if wakee cpu is idle") - -Kernel : baseline baseline + C2 disabled baseline + patch - -Min (MB/s) : 2215.06 33072.10 (+1393.05%) 33016.10 (+1390.52%) -Max (MB/s) : 32938.80 34399.10 34774.50 -Median (MB/s) : 32191.80 33476.60 33805.70 -AMean (MB/s) : 22448.55 33649.27 (+49.89%) 33865.43 (+50.85%) -AMean Stddev : 17526.70 680.14 880.72 -AMean CoefVar : 78.07% 2.02% 2.60% - -The data shows there are edge cases that can cause massive regressions -in case of tbench. Profiling the bad runs with IBS shows a significant -amount of time being spent in acpi_idle_do_entry method: - -Overhead Command Shared Object Symbol - 74.76% swapper [kernel.kallsyms] [k] acpi_idle_do_entry - 0.71% tbench [kernel.kallsyms] [k] update_sd_lb_stats.constprop.0 - 0.69% tbench_srv [kernel.kallsyms] [k] update_sd_lb_stats.constprop.0 - 0.49% swapper [kernel.kallsyms] [k] psi_group_change - ... - -Annotation of acpi_idle_do_entry method reveals almost all the time in -acpi_idle_do_entry is spent on the port I/O in wait_for_freeze(): - - 0.14 │ in (%dx),%al # <------ First "in" corresponding to inb(cx->address) - 0.51 │ mov 0x144d64d(%rip),%rax - 0.00 │ test $0x80000000,%eax - │ ↓ jne 62 # <------ Skip if running in guest - 0.00 │ mov 0x19800c3(%rip),%rdx - 99.33 │ in (%dx),%eax # <------ Second "in" corresponding to inl(acpi_gbl_FADT.xpm_timer_block.address) - 0.00 │62: mov -0x8(%rbp),%r12 - 0.00 │ leave - 0.00 │ ← ret - -This overhead is reflected in the C2 residency on the test system where -C2 is an IOPORT based C-State. The total C-state residency reported by -"cpupower idle-info" on CPU0 for good and bad case over the 80s tbench -run is as follows (all numbers are in microseconds): - - Good Run Bad Run - (Baseline) - -POLL: 43338 6231 (-85.62%) -C1 (MWAIT Based): 23576156 363861 (-98.45%) -C2 (IOPORT Based): 10781218 77027280 (+614.45%) - -The larger residency value in bad case leads to the system recommending -C2 state again for subsequent idle instances. The pattern lasts till the -end of the tbench run. Following is the breakdown of "entry_method" -passed to acpi_idle_do_entry during good run and bad run: - - Good Run Bad Run - (Baseline) - -Number of times acpi_idle_do_entry was called: 6149573 6149050 (-0.01%) - |-> Number of times entry_method was "ACPI_CSTATE_FFH": 6141494 88144 (-98.56%) - |-> Number of times entry_method was "ACPI_CSTATE_HALT": 0 0 (+0.00%) - |-> Number of times entry_method was "ACPI_CSTATE_SYSTEMIO": 8079 6060906 (+74920.49%) - -For processors based on the Zen microarchitecture, this dummy wait op is -unnecessary and can be skipped when choosing IOPORT based C-States to -avoid polluting the C-state residency information. - -Link: https://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux-fullhistory.git/commit/?id=972c16130d9dc182cedcdd408408d9eacc7d6a2d [1] -Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b488f02156d3deb08f5ad7816d565c370a8cc6f1 [2] - -Suggested-by: Calvin Ong -Cc: stable@vger.kernel.org -Cc: regressions@lists.linux.dev -Signed-off-by: K Prateek Nayak ---- - drivers/acpi/processor_idle.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c -index 16a1663d02d4..18850aa2b79b 100644 ---- a/drivers/acpi/processor_idle.c -+++ b/drivers/acpi/processor_idle.c -@@ -529,9 +529,11 @@ static __cpuidle void io_idle(unsigned long addr) - inb(addr); - - #ifdef CONFIG_X86 -- /* No delay is needed if we are in guest */ -- if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) -- return; -+ /* -+ * No delay is needed if we are in guest or on a processor -+ * based on the Zen microarchitecture. -+ */ -+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || boot_cpu_has(X86_FEATURE_ZEN)) - /* - * Modern (>=Nehalem) Intel systems use ACPI via intel_idle, - * not this code. Assume that any Intel systems using this - --- -2.25.1 diff --git a/patches/nobara/0001-add-acpi_call.patch b/patches/nobara/0001-add-acpi_call.patch deleted file mode 100644 index b0a185a..0000000 --- a/patches/nobara/0001-add-acpi_call.patch +++ /dev/null @@ -1,506 +0,0 @@ -From 3f14226e2e90dba5d72c106da29e1876eb7b88ff Mon Sep 17 00:00:00 2001 -From: Denis -Date: Thu, 28 Sep 2023 03:40:53 +0200 -Subject: [PATCH] add acpi_call - ---- - drivers/platform/x86/Kconfig | 5 + - drivers/platform/x86/Makefile | 4 + - drivers/platform/x86/acpi_call.c | 449 +++++++++++++++++++++++++++++++ - 3 files changed, 458 insertions(+) - create mode 100644 drivers/platform/x86/acpi_call.c - -diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig -index 49c2c4cd8d00..fde791e51261 100644 ---- a/drivers/platform/x86/Kconfig -+++ b/drivers/platform/x86/Kconfig -@@ -170,6 +170,11 @@ config ACER_WIRELESS - If you choose to compile this driver as a module the module will be - called acer-wireless. - -+config ACPI_CALL -+ tristate "acpi_call module" -+ help -+ This embeds acpi_call module into the kernel -+ - config ACER_WMI - tristate "Acer WMI Laptop Extras" - depends on BACKLIGHT_CLASS_DEVICE -diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile -index 52dfdf574ac2..1e434fcb8273 100644 ---- a/drivers/platform/x86/Makefile -+++ b/drivers/platform/x86/Makefile -@@ -4,10 +4,14 @@ - # x86 Platform-Specific Drivers - # - -+# ACPI calls -+ - # Windows Management Interface - obj-$(CONFIG_ACPI_WMI) += wmi.o - obj-$(CONFIG_WMI_BMOF) += wmi-bmof.o - -+obj-$(CONFIG_ACPI_CALL) += acpi_call.o -+ - # WMI drivers - obj-$(CONFIG_HUAWEI_WMI) += huawei-wmi.o - obj-$(CONFIG_MXM_WMI) += mxm-wmi.o -diff --git a/drivers/platform/x86/acpi_call.c b/drivers/platform/x86/acpi_call.c -new file mode 100644 -index 000000000000..d7bc238e16da ---- /dev/null -+++ b/drivers/platform/x86/acpi_call.c -@@ -0,0 +1,449 @@ -+/* Copyright (c) 2010: Michal Kottman */ -+ -+#define BUILDING_ACPICA -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) -+#include -+#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) -+#include -+#else -+#include -+#endif -+ -+MODULE_LICENSE("GPL"); -+ -+/* Uncomment the following line to enable debug messages */ -+/* -+#define DEBUG -+*/ -+ -+#define BUFFER_SIZE 4096 -+#define INPUT_BUFFER_SIZE (2 * BUFFER_SIZE) -+#define MAX_ACPI_ARGS 16 -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) -+#define HAVE_PROC_CREATE -+#endif -+ -+extern struct proc_dir_entry *acpi_root_dir; -+ -+static char input_buffer[INPUT_BUFFER_SIZE]; -+static char result_buffer[BUFFER_SIZE]; -+static char not_called_message[11] = "not called"; -+ -+static u8 temporary_buffer[BUFFER_SIZE]; -+ -+static size_t get_avail_bytes(void) { -+ return BUFFER_SIZE - strlen(result_buffer); -+} -+static char *get_buffer_end(void) { -+ return result_buffer + strlen(result_buffer); -+} -+ -+/** Appends the contents of an acpi_object to the result buffer -+@param result An acpi object holding result data -+@returns 0 if the result could fully be saved, a higher value otherwise -+*/ -+static int acpi_result_to_string(union acpi_object *result) { -+ if (result->type == ACPI_TYPE_INTEGER) { -+ snprintf(get_buffer_end(), get_avail_bytes(), -+ "0x%x", (int)result->integer.value); -+ } else if (result->type == ACPI_TYPE_STRING) { -+ snprintf(get_buffer_end(), get_avail_bytes(), -+ "\"%*s\"", result->string.length, result->string.pointer); -+ } else if (result->type == ACPI_TYPE_BUFFER) { -+ int i; -+ // do not store more than data if it does not fit. The first element is -+ // just 4 chars, but there is also two bytes from the curly brackets -+ int show_values = min((size_t)result->buffer.length, get_avail_bytes() / 6); -+ -+ snprintf(get_buffer_end(), get_avail_bytes(), "{"); -+ for (i = 0; i < show_values; i++) -+ sprintf(get_buffer_end(), -+ i == 0 ? "0x%02x" : ", 0x%02x", result->buffer.pointer[i]); -+ -+ if (result->buffer.length > show_values) { -+ // if data was truncated, show a trailing comma if there is space -+ snprintf(get_buffer_end(), get_avail_bytes(), ","); -+ return 1; -+ } else { -+ // in case show_values == 0, but the buffer is too small to hold -+ // more values (i.e. the buffer cannot have anything more than "{") -+ snprintf(get_buffer_end(), get_avail_bytes(), "}"); -+ } -+ } else if (result->type == ACPI_TYPE_PACKAGE) { -+ int i; -+ snprintf(get_buffer_end(), get_avail_bytes(), "["); -+ for (i=0; ipackage.count; i++) { -+ if (i > 0) -+ snprintf(get_buffer_end(), get_avail_bytes(), ", "); -+ -+ // abort if there is no more space available -+ if (!get_avail_bytes() || acpi_result_to_string(&result->package.elements[i])) -+ return 1; -+ } -+ snprintf(get_buffer_end(), get_avail_bytes(), "]"); -+ } else { -+ snprintf(get_buffer_end(), get_avail_bytes(), -+ "Object type 0x%x\n", result->type); -+ } -+ -+ // return 0 if there are still bytes available, 1 otherwise -+ return !get_avail_bytes(); -+} -+ -+/** -+@param method The full name of ACPI method to call -+@param argc The number of parameters -+@param argv A pre-allocated array of arguments of type acpi_object -+*/ -+static void do_acpi_call(const char * method, int argc, union acpi_object *argv) -+{ -+ acpi_status status; -+ acpi_handle handle; -+ struct acpi_object_list arg; -+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; -+ -+#ifdef DEBUG -+ printk(KERN_INFO "acpi_call: Calling %s\n", method); -+#endif -+ -+ // get the handle of the method, must be a fully qualified path -+ status = acpi_get_handle(NULL, (acpi_string) method, &handle); -+ -+ if (ACPI_FAILURE(status)) -+ { -+ snprintf(result_buffer, BUFFER_SIZE, "Error: %s", acpi_format_exception(status)); -+ printk(KERN_ERR "acpi_call: Cannot get handle: %s\n", result_buffer); -+ return; -+ } -+ -+ // prepare parameters -+ arg.count = argc; -+ arg.pointer = argv; -+ -+ // call the method -+ status = acpi_evaluate_object(handle, NULL, &arg, &buffer); -+ if (ACPI_FAILURE(status)) -+ { -+ snprintf(result_buffer, BUFFER_SIZE, "Error: %s", acpi_format_exception(status)); -+ printk(KERN_ERR "acpi_call: Method call failed: %s\n", result_buffer); -+ return; -+ } -+ -+ // reset the result buffer -+ *result_buffer = '\0'; -+ acpi_result_to_string(buffer.pointer); -+ kfree(buffer.pointer); -+ -+#ifdef DEBUG -+ printk(KERN_INFO "acpi_call: Call successful: %s\n", result_buffer); -+#endif -+} -+ -+/** Decodes 2 hex characters to an u8 int -+*/ -+u8 decodeHex(char *hex) { -+ char buf[3] = { hex[0], hex[1], 0}; -+ return (u8) simple_strtoul(buf, NULL, 16); -+} -+ -+/** Parses method name and arguments -+@param input Input string to be parsed. Modified in the process. -+@param nargs Set to number of arguments parsed (output) -+@param args -+*/ -+static char *parse_acpi_args(char *input, int *nargs, union acpi_object **args) -+{ -+ char *s = input; -+ int i; -+ -+ *nargs = 0; -+ *args = NULL; -+ -+ // the method name is separated from the arguments by a space -+ while (*s && *s != ' ') -+ s++; -+ // if no space is found, return 0 arguments -+ if (*s == 0) -+ return input; -+ -+ *args = (union acpi_object *) kmalloc(MAX_ACPI_ARGS * sizeof(union acpi_object), GFP_KERNEL); -+ if (!*args) { -+ printk(KERN_ERR "acpi_call: unable to allocate buffer\n"); -+ return NULL; -+ } -+ -+ while (*s) { -+ if (*s == ' ') { -+ if (*nargs == 0) -+ *s = 0; // change first space to nul -+ ++ *nargs; -+ ++ s; -+ } else { -+ union acpi_object *arg = (*args) + (*nargs - 1); -+ if (*s == '"') { -+ // decode string -+ arg->type = ACPI_TYPE_STRING; -+ arg->string.pointer = ++s; -+ arg->string.length = 0; -+ while (*s && *s++ != '"') -+ arg->string.length ++; -+ // skip the last " -+ if (*s == '"') -+ ++s; -+ } else if (*s == 'b') { -+ // decode buffer - bXXXX -+ char *p = ++s; -+ int len = 0, i; -+ u8 *buf = NULL; -+ -+ while (*p && *p!=' ') -+ p++; -+ -+ len = p - s; -+ if (len % 2 == 1) { -+ printk(KERN_ERR "acpi_call: buffer arg%d is not multiple of 8 bits\n", *nargs); -+ --*nargs; -+ goto err; -+ } -+ len /= 2; -+ -+ buf = (u8*) kmalloc(len, GFP_KERNEL); -+ if (!buf) { -+ printk(KERN_ERR "acpi_call: unable to allocate buffer\n"); -+ --*nargs; -+ goto err; -+ } -+ for (i=0; itype = ACPI_TYPE_BUFFER; -+ arg->buffer.pointer = buf; -+ arg->buffer.length = len; -+ } else if (*s == '{') { -+ // decode buffer - { b1, b2 ...} -+ u8 *buf = temporary_buffer; -+ arg->type = ACPI_TYPE_BUFFER; -+ arg->buffer.pointer = buf; -+ arg->buffer.length = 0; -+ while (*s && *s++ != '}') { -+ if (buf >= temporary_buffer + sizeof(temporary_buffer)) { -+ printk(KERN_ERR "acpi_call: buffer arg%d is truncated because the buffer is full\n", *nargs); -+ // clear remaining arguments -+ while (*s && *s != '}') -+ ++s; -+ break; -+ } -+ else if (*s >= '0' && *s <= '9') { -+ // decode integer into buffer -+ arg->buffer.length ++; -+ if (s[0] == '0' && s[1] == 'x') -+ *buf++ = simple_strtol(s+2, 0, 16); -+ else -+ *buf++ = simple_strtol(s, 0, 10); -+ } -+ // skip until space or comma or '}' -+ while (*s && *s != ' ' && *s != ',' && *s != '}') -+ ++s; -+ } -+ // store the result in new allocated buffer -+ buf = (u8*) kmalloc(arg->buffer.length, GFP_KERNEL); -+ if (!buf) { -+ printk(KERN_ERR "acpi_call: unable to allocate buffer\n"); -+ --*nargs; -+ goto err; -+ } -+ memcpy(buf, temporary_buffer, arg->buffer.length); -+ arg->buffer.pointer = buf; -+ } else { -+ // decode integer, N or 0xN -+ arg->type = ACPI_TYPE_INTEGER; -+ if (s[0] == '0' && s[1] == 'x') { -+ arg->integer.value = simple_strtol(s+2, 0, 16); -+ } else { -+ arg->integer.value = simple_strtol(s, 0, 10); -+ } -+ while (*s && *s != ' ') { -+ ++s; -+ } -+ } -+ } -+ } -+ -+ return input; -+ -+err: -+ for (i=0; i<*nargs; i++) -+ if ((*args)[i].type == ACPI_TYPE_BUFFER && (*args)[i].buffer.pointer) -+ kfree((*args)[i].buffer.pointer); -+ kfree(*args); -+ return NULL; -+} -+ -+/** procfs write callback. Called when writing into /proc/acpi/call. -+*/ -+#ifdef HAVE_PROC_CREATE -+static ssize_t acpi_proc_write( struct file *filp, const char __user *buff, -+ size_t len, loff_t *data ) -+#else -+static int acpi_proc_write( struct file *filp, const char __user *buff, -+ unsigned long len, void *data ) -+#endif -+{ -+ union acpi_object *args; -+ int nargs, i; -+ char *method; -+ -+ memset(input_buffer, 0, INPUT_BUFFER_SIZE); -+ if (len > sizeof(input_buffer) - 1) { -+#ifdef HAVE_PROC_CREATE -+ printk(KERN_ERR "acpi_call: Input too long! (%zu)\n", len); -+#else -+ printk(KERN_ERR "acpi_call: Input too long! (%lu)\n", len); -+#endif -+ return -ENOSPC; -+ } -+ -+ if (copy_from_user( input_buffer, buff, len )) { -+ return -EFAULT; -+ } -+ input_buffer[len] = '\0'; -+ if (input_buffer[len-1] == '\n') -+ input_buffer[len-1] = '\0'; -+ -+ method = parse_acpi_args(input_buffer, &nargs, &args); -+ if (method) { -+ do_acpi_call(method, nargs, args); -+ if (args) { -+ for (i=0; i count) { -+ // user buffer is too small -+ ret = 0; -+ } else if(*off == len + 1) { -+ // we're done -+ ret = 0; -+ result_buffer[0] = '\0'; -+ } else { -+ // output the current result buffer -+ ret = simple_read_from_buffer(buff, count, off, result_buffer, len + 1); -+ *off = ret; -+ } -+ -+ return ret; -+} -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) -+static struct proc_ops proc_acpi_operations = { -+ .proc_read = acpi_proc_read, -+ .proc_write = acpi_proc_write, -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0) -+ .proc_lseek = default_llseek, -+#endif -+}; -+#else -+static struct file_operations proc_acpi_operations = { -+ .owner = THIS_MODULE, -+ .read = acpi_proc_read, -+ .write = acpi_proc_write, -+}; -+#endif -+ -+#else -+static int acpi_proc_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = 0; -+ -+ if (off > 0) { -+ *eof = 1; -+ return 0; -+ } -+ -+ // output the current result buffer -+ len = strlen(result_buffer); -+ memcpy(page, result_buffer, len + 1); -+ -+ // initialize the result buffer for later -+ strcpy(result_buffer, "not called"); -+ -+ return len; -+} -+#endif -+ -+/** module initialization function */ -+static int __init init_acpi_call(void) -+{ -+#ifdef HAVE_PROC_CREATE -+ struct proc_dir_entry *acpi_entry = proc_create("call", -+ 0660, -+ acpi_root_dir, -+ &proc_acpi_operations); -+#else -+ struct proc_dir_entry *acpi_entry = create_proc_entry("call", 0660, acpi_root_dir); -+#endif -+ -+ strcpy(result_buffer, "not called"); -+ -+ if (acpi_entry == NULL) { -+ printk(KERN_ERR "acpi_call: Couldn't create proc entry\n"); -+ return -ENOMEM; -+ } -+ -+#ifndef HAVE_PROC_CREATE -+ acpi_entry->write_proc = acpi_proc_write; -+ acpi_entry->read_proc = acpi_proc_read; -+#endif -+ -+#ifdef DEBUG -+ printk(KERN_INFO "acpi_call: Module loaded successfully\n"); -+#endif -+ -+ return 0; -+} -+ -+static void __exit unload_acpi_call(void) -+{ -+ remove_proc_entry("call", acpi_root_dir); -+ -+#ifdef DEBUG -+ printk(KERN_INFO "acpi_call: Module unloaded successfully\n"); -+#endif -+} -+ -+module_init(init_acpi_call); -+module_exit(unload_acpi_call); -\ No newline at end of file --- -2.42.0 - diff --git a/patches/nobara/0001-amd-hdr.patch b/patches/nobara/0001-amd-hdr.patch deleted file mode 100644 index 030317f..0000000 --- a/patches/nobara/0001-amd-hdr.patch +++ /dev/null @@ -1,2042 +0,0 @@ -From af60f9afa522f5f337d9b4e24eef1fdcd0ab6c05 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Mon, 11 Sep 2023 14:31:43 +0200 -Subject: [PATCH 1/7] amd-hdr - -Signed-off-by: Peter Jung ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 71 ++ - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 34 +- - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 100 +++ - .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 805 ++++++++++++++++-- - .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 72 ++ - .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 224 ++++- - .../amd/display/dc/dcn10/dcn10_cm_common.c | 95 ++- - .../drm/amd/display/dc/dcn30/dcn30_hwseq.c | 37 + - .../drm/amd/display/dc/dcn30/dcn30_hwseq.h | 3 + - .../drm/amd/display/dc/dcn301/dcn301_init.c | 2 +- - .../gpu/drm/amd/display/include/fixed31_32.h | 12 + - drivers/gpu/drm/arm/malidp_crtc.c | 2 +- - drivers/gpu/drm/drm_atomic.c | 1 + - drivers/gpu/drm/drm_atomic_state_helper.c | 1 + - drivers/gpu/drm/drm_property.c | 49 ++ - include/drm/drm_mode_object.h | 2 +- - include/drm/drm_plane.h | 7 + - include/drm/drm_property.h | 6 + - include/uapi/drm/drm_mode.h | 8 + - 19 files changed, 1441 insertions(+), 90 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -index 32fe05c810c6..84bf501b02f4 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -@@ -343,6 +343,77 @@ struct amdgpu_mode_info { - int disp_priority; - const struct amdgpu_display_funcs *funcs; - const enum drm_plane_type *plane_type; -+ -+ /* Driver-private color mgmt props */ -+ -+ /* @plane_degamma_lut_property: Plane property to set a degamma LUT to -+ * convert input space before blending. -+ */ -+ struct drm_property *plane_degamma_lut_property; -+ /* @plane_degamma_lut_size_property: Plane property to define the max -+ * size of degamma LUT as supported by the driver (read-only). -+ */ -+ struct drm_property *plane_degamma_lut_size_property; -+ /** -+ * @plane_degamma_tf_property: Plane pre-defined transfer function to -+ * to go from scanout/encoded values to linear values. -+ */ -+ struct drm_property *plane_degamma_tf_property; -+ /** -+ * @plane_hdr_mult_property: -+ */ -+ struct drm_property *plane_hdr_mult_property; -+ -+ struct drm_property *plane_ctm_property; -+ /** -+ * @shaper_lut_property: Plane property to set pre-blending shaper LUT -+ * that converts color content before 3D LUT. -+ */ -+ struct drm_property *plane_shaper_lut_property; -+ /** -+ * @shaper_lut_size_property: Plane property for the size of -+ * pre-blending shaper LUT as supported by the driver (read-only). -+ */ -+ struct drm_property *plane_shaper_lut_size_property; -+ /** -+ * @plane_shaper_tf_property: Plane property to set a predefined -+ * transfer function for pre-blending shaper (before applying 3D LUT) -+ * with or without LUT. -+ */ -+ struct drm_property *plane_shaper_tf_property; -+ /** -+ * @plane_lut3d_property: Plane property for gamma correction using a -+ * 3D LUT (pre-blending). -+ */ -+ struct drm_property *plane_lut3d_property; -+ /** -+ * @plane_degamma_lut_size_property: Plane property to define the max -+ * size of 3D LUT as supported by the driver (read-only). -+ */ -+ struct drm_property *plane_lut3d_size_property; -+ /** -+ * @plane_blend_lut_property: Plane property for output gamma before -+ * blending. Userspace set a blend LUT to convert colors after 3D LUT -+ * conversion. It works as a post-3D LUT 1D LUT, with shaper LUT, they -+ * are sandwiching 3D LUT with two 1D LUT. -+ */ -+ struct drm_property *plane_blend_lut_property; -+ /** -+ * @plane_blend_lut_size_property: Plane property to define the max -+ * size of blend LUT as supported by the driver (read-only). -+ */ -+ struct drm_property *plane_blend_lut_size_property; -+ /** -+ * @plane_blend_tf_property: Plane property to set a predefined -+ * transfer function for pre-blending blend (before applying 3D LUT) -+ * with or without LUT. -+ */ -+ struct drm_property *plane_blend_tf_property; -+ /* @regamma_tf_property: Transfer function for CRTC regamma -+ * (post-blending). Possible values are defined by `enum -+ * amdgpu_transfer_function`. -+ */ -+ struct drm_property *regamma_tf_property; - }; - - #define AMDGPU_MAX_BL_LEVEL 0xFF -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index 34f011cedd06..fb3400eff0b6 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -@@ -4021,6 +4021,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) - return r; - } - -+#ifdef AMD_PRIVATE_COLOR -+ if (amdgpu_dm_create_color_properties(adev)) -+ return -ENOMEM; -+#endif -+ - r = amdgpu_dm_audio_init(adev); - if (r) { - dc_release_state(state->context); -@@ -5093,7 +5098,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, - * Always set input transfer function, since plane state is refreshed - * every time. - */ -- ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); -+ ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, -+ plane_state, -+ dc_plane_state); - if (ret) - return ret; - -@@ -8113,6 +8120,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, - bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; - bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; - bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix; -+ bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; -+ bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; -+ bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; -+ bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf; - } - - amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, -@@ -8324,6 +8335,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, - &acrtc_state->stream->csc_color_matrix; - bundle->stream_update.out_transfer_func = - acrtc_state->stream->out_transfer_func; -+ bundle->stream_update.lut3d_func = -+ (struct dc_3dlut *) acrtc_state->stream->lut3d_func; -+ bundle->stream_update.func_shaper = -+ (struct dc_transfer_func *) acrtc_state->stream->func_shaper; - } - - acrtc_state->stream->abm_level = acrtc_state->abm_level; -@@ -9512,6 +9527,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, - * when a modeset is needed, to ensure it gets reprogrammed. - */ - if (dm_new_crtc_state->base.color_mgmt_changed || -+ dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || - drm_atomic_crtc_needs_modeset(new_crtc_state)) { - ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); - if (ret) -@@ -9579,6 +9595,10 @@ static bool should_reset_plane(struct drm_atomic_state *state, - */ - for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) { - struct amdgpu_framebuffer *old_afb, *new_afb; -+ struct dm_plane_state *dm_new_other_state, *dm_old_other_state; -+ -+ dm_new_other_state = to_dm_plane_state(new_other_state); -+ dm_old_other_state = to_dm_plane_state(old_other_state); - - if (other->type == DRM_PLANE_TYPE_CURSOR) - continue; -@@ -9615,6 +9635,18 @@ static bool should_reset_plane(struct drm_atomic_state *state, - old_other_state->color_encoding != new_other_state->color_encoding) - return true; - -+ /* HDR/Transfer Function changes. */ -+ if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf || -+ dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut || -+ dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult || -+ dm_old_other_state->ctm != dm_new_other_state->ctm || -+ dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut || -+ dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf || -+ dm_old_other_state->lut3d != dm_new_other_state->lut3d || -+ dm_old_other_state->blend_lut != dm_new_other_state->blend_lut || -+ dm_old_other_state->blend_tf != dm_new_other_state->blend_tf) -+ return true; -+ - /* Framebuffer checks fall at the end. */ - if (!old_other_state->fb || !new_other_state->fb) - continue; -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h -index 9e4cc5eeda76..24c87f425afb 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h -@@ -33,6 +33,8 @@ - #include - #include "link_service_types.h" - -+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL) -+ - /* - * This file contains the definition for amdgpu_display_manager - * and its API for amdgpu driver's use. -@@ -716,9 +718,91 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status, - - extern const struct amdgpu_ip_block_version dm_ip_block; - -+enum amdgpu_transfer_function { -+ AMDGPU_TRANSFER_FUNCTION_DEFAULT, -+ AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_BT709_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_PQ_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_LINEAR, -+ AMDGPU_TRANSFER_FUNCTION_UNITY, -+ AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF, -+ AMDGPU_TRANSFER_FUNCTION_COUNT -+}; -+ - struct dm_plane_state { - struct drm_plane_state base; - struct dc_plane_state *dc_state; -+ -+ /* Plane color mgmt */ -+ /** -+ * @degamma_lut: -+ * -+ * 1D LUT for mapping framebuffer/plane pixel data before sampling or -+ * blending operations. It's usually applied to linearize input space. -+ * The blob (if not NULL) is an array of &struct drm_color_lut. -+ */ -+ struct drm_property_blob *degamma_lut; -+ /** -+ * @degamma_tf: -+ * -+ * Predefined transfer function to tell DC driver the input space to -+ * linearize. -+ */ -+ enum amdgpu_transfer_function degamma_tf; -+ /** -+ * @hdr_mult: -+ * -+ * Multiplier to 'gain' the plane. When PQ is decoded using the fixed -+ * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on -+ * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously. -+ * Therefore, 1.0 multiplier = 80 nits for SDR content. So if you -+ * want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is -+ * S31.32 sign-magnitude. -+ */ -+ __u64 hdr_mult; -+ /** -+ * @ctm: -+ * -+ * Color transformation matrix. See drm_crtc_enable_color_mgmt(). The -+ * blob (if not NULL) is a &struct drm_color_ctm. -+ */ -+ struct drm_property_blob *ctm; -+ /** -+ * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an -+ * array of &struct drm_color_lut. -+ */ -+ struct drm_property_blob *shaper_lut; -+ /** -+ * @shaper_tf: -+ * -+ * Predefined transfer function to delinearize color space. -+ */ -+ enum amdgpu_transfer_function shaper_tf; -+ /** -+ * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of -+ * &struct drm_color_lut. -+ */ -+ struct drm_property_blob *lut3d; -+ /** -+ * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an -+ * array of &struct drm_color_lut. -+ */ -+ struct drm_property_blob *blend_lut; -+ /** -+ * @blend_tf: -+ * -+ * Pre-defined transfer function for converting plane pixel data before -+ * applying blend LUT. -+ */ -+ enum amdgpu_transfer_function blend_tf; - }; - - struct dm_crtc_state { -@@ -743,6 +827,14 @@ struct dm_crtc_state { - struct dc_info_packet vrr_infopacket; - - int abm_level; -+ -+ /** -+ * @regamma_tf: -+ * -+ * Pre-defined transfer function for converting internal FB -> wire -+ * encoding. -+ */ -+ enum amdgpu_transfer_function regamma_tf; - }; - - #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) -@@ -804,14 +896,22 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, - - void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); - -+/* 3D LUT max size is 17x17x17 */ -+#define MAX_COLOR_3DLUT_ENTRIES 4913 -+#define MAX_COLOR_3DLUT_BITDEPTH 12 -+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, -+ struct drm_plane_state *plane_state); -+/* 1D LUT size */ - #define MAX_COLOR_LUT_ENTRIES 4096 - /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ - #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 - - void amdgpu_dm_init_color_mod(void); -+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); - int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); - int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); - int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, -+ struct drm_plane_state *plane_state, - struct dc_plane_state *dc_plane_state); - - void amdgpu_dm_update_connector_after_detect( -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -index a4cb23d059bd..0442eeaa9763 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c -@@ -72,6 +72,7 @@ - */ - - #define MAX_DRM_LUT_VALUE 0xFFFF -+#define SDR_WHITE_LEVEL_INIT_VALUE 80 - - /** - * amdgpu_dm_init_color_mod - Initialize the color module. -@@ -84,6 +85,213 @@ void amdgpu_dm_init_color_mod(void) - setup_x_points_distribution(); - } - -+#ifdef AMD_PRIVATE_COLOR -+/* Pre-defined Transfer Functions (TF) -+ * -+ * AMD driver supports pre-defined mathematical functions for transferring -+ * between encoded values and optical/linear space. Depending on HW color caps, -+ * ROMs and curves built by the AMD color module support these transforms. -+ * -+ * The driver-specific color implementation exposes properties for pre-blending -+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and -+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma -+ * supports ROM curves. AMD color module uses pre-defined coefficients to build -+ * curves for the other blocks. What can be done by each color block is -+ * described by struct dpp_color_capsand struct mpc_color_caps. -+ * -+ * AMD driver-specific color API exposes the following pre-defined transfer -+ * functions: -+ * -+ * - Linear/Unity: linear/identity relationship between pixel value and -+ * luminance value; -+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure gamma functions; -+ * - sRGB: 2.4 gamma with small initial linear section as standardized by IEC -+ * 61966-2-1:1999; -+ * - BT.709 (BT.1886): 2.4 gamma with differences in the dark end of the scale. -+ * Used in HD-TV and standardized by ITU-R BT.1886; -+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range -+ * capability of 0 to 10,000 nits; standardized by SMPTE ST 2084. -+ * -+ * In the driver-specific API, color block names attached to TF properties -+ * suggest the intention regarding non-linear encoding pixel's luminance -+ * values. As some newer encodings don't use gamma curve, we make encoding and -+ * decoding explicit by defining an enum list of transfer functions supported -+ * in terms of EOTF and inverse EOTF, where: -+ * -+ * - EOTF (electro-optical transfer function): is the transfer function to go -+ * from the encoded value to an optical (linear) value. De-gamma functions -+ * traditionally do this. -+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go -+ * from an optical/linear space (which might have been used for blending) -+ * back to the encoded values. Gamma functions traditionally do this. -+ */ -+static const char * const -+amdgpu_transfer_function_names[] = { -+ [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default", -+ [AMDGPU_TRANSFER_FUNCTION_LINEAR] = "Linear", -+ [AMDGPU_TRANSFER_FUNCTION_UNITY] = "Unity", -+ [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF] = "sRGB EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_BT709_EOTF] = "BT.709 EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF] = "PQ EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF] = "sRGB inv_EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF] = "BT.709 inv_EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF] = "PQ inv_EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF", -+ [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF", -+}; -+ -+static const u32 amdgpu_eotf = -+ BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_BT709_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF); -+ -+static const u32 amdgpu_inv_eotf = -+ BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF); -+ -+static struct drm_property * -+amdgpu_create_tf_property(struct drm_device *dev, -+ const char *name, -+ u32 supported_tf) -+{ -+ u32 transfer_functions = supported_tf | -+ BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_LINEAR) | -+ BIT(AMDGPU_TRANSFER_FUNCTION_UNITY); -+ struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT]; -+ int i, len; -+ -+ len = 0; -+ for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) { -+ if ((transfer_functions & BIT(i)) == 0) -+ continue; -+ -+ enum_list[len].type = i; -+ enum_list[len].name = amdgpu_transfer_function_names[i]; -+ len++; -+ } -+ -+ return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, -+ name, enum_list, len); -+} -+ -+int -+amdgpu_dm_create_color_properties(struct amdgpu_device *adev) -+{ -+ struct drm_property *prop; -+ -+ prop = drm_property_create(adev_to_drm(adev), -+ DRM_MODE_PROP_BLOB, -+ "AMD_PLANE_DEGAMMA_LUT", 0); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_degamma_lut_property = prop; -+ -+ prop = drm_property_create_range(adev_to_drm(adev), -+ DRM_MODE_PROP_IMMUTABLE, -+ "AMD_PLANE_DEGAMMA_LUT_SIZE", 0, UINT_MAX); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_degamma_lut_size_property = prop; -+ -+ prop = amdgpu_create_tf_property(adev_to_drm(adev), -+ "AMD_PLANE_DEGAMMA_TF", -+ amdgpu_eotf); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_degamma_tf_property = prop; -+ -+ prop = drm_property_create_range(adev_to_drm(adev), -+ 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_hdr_mult_property = prop; -+ -+ prop = drm_property_create(adev_to_drm(adev), -+ DRM_MODE_PROP_BLOB, -+ "AMD_PLANE_CTM", 0); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_ctm_property = prop; -+ -+ prop = drm_property_create(adev_to_drm(adev), -+ DRM_MODE_PROP_BLOB, -+ "AMD_PLANE_SHAPER_LUT", 0); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_shaper_lut_property = prop; -+ -+ prop = drm_property_create_range(adev_to_drm(adev), -+ DRM_MODE_PROP_IMMUTABLE, -+ "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_shaper_lut_size_property = prop; -+ -+ prop = amdgpu_create_tf_property(adev_to_drm(adev), -+ "AMD_PLANE_SHAPER_TF", -+ amdgpu_inv_eotf); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_shaper_tf_property = prop; -+ -+ prop = drm_property_create(adev_to_drm(adev), -+ DRM_MODE_PROP_BLOB, -+ "AMD_PLANE_LUT3D", 0); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_lut3d_property = prop; -+ -+ prop = drm_property_create_range(adev_to_drm(adev), -+ DRM_MODE_PROP_IMMUTABLE, -+ "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_lut3d_size_property = prop; -+ -+ prop = drm_property_create(adev_to_drm(adev), -+ DRM_MODE_PROP_BLOB, -+ "AMD_PLANE_BLEND_LUT", 0); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_blend_lut_property = prop; -+ -+ prop = drm_property_create_range(adev_to_drm(adev), -+ DRM_MODE_PROP_IMMUTABLE, -+ "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_blend_lut_size_property = prop; -+ -+ prop = amdgpu_create_tf_property(adev_to_drm(adev), -+ "AMD_PLANE_BLEND_TF", -+ amdgpu_eotf); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.plane_blend_tf_property = prop; -+ -+ prop = amdgpu_create_tf_property(adev_to_drm(adev), -+ "AMD_CRTC_REGAMMA_TF", -+ amdgpu_inv_eotf); -+ if (!prop) -+ return -ENOMEM; -+ adev->mode_info.regamma_tf_property = prop; -+ -+ return 0; -+} -+#endif -+ - /** - * __extract_blob_lut - Extracts the DRM lut and lut size from a blob. - * @blob: DRM color mgmt property blob -@@ -182,7 +390,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, - static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, - struct fixed31_32 *matrix) - { -- int64_t val; - int i; - - /* -@@ -201,12 +408,33 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, - } - - /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ -- val = ctm->matrix[i - (i / 4)]; -- /* If negative, convert to 2's complement. */ -- if (val & (1ULL << 63)) -- val = -(val & ~(1ULL << 63)); -+ matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); -+ } -+} - -- matrix[i].value = val; -+/** -+ * __drm_ctm2_to_dc_matrix - converts a DRM CTM2 to a DC CSC float matrix -+ * @ctm: DRM color transformation matrix -+ * @matrix: DC CSC float matrix -+ * -+ * The matrix needs to be a 3x4 (12 entry) matrix. -+ */ -+static void __drm_ctm2_to_dc_matrix(const struct drm_color_ctm2 *ctm, -+ struct fixed31_32 *matrix) -+{ -+ int i; -+ -+ /* -+ * DRM gives a 3x3 matrix, but DC wants 3x4. Assuming we're operating -+ * with homogeneous coordinates, augment the matrix with 0's. -+ * -+ * The format provided is S31.32, using signed-magnitude representation. -+ * Our fixed31_32 is also S31.32, but is using 2's complement. We have -+ * to convert from signed-magnitude to 2's complement. -+ */ -+ for (i = 0; i < 12; i++) { -+ /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ -+ matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]); - } - } - -@@ -268,16 +496,18 @@ static int __set_output_tf(struct dc_transfer_func *func, - struct calculate_buffer cal_buffer = {0}; - bool res; - -- ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); -- - cal_buffer.buffer_index = -1; - -- gamma = dc_create_gamma(); -- if (!gamma) -- return -ENOMEM; -+ if (lut_size) { -+ ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - -- gamma->num_entries = lut_size; -- __drm_lut_to_dc_gamma(lut, gamma, false); -+ gamma = dc_create_gamma(); -+ if (!gamma) -+ return -ENOMEM; -+ -+ gamma->num_entries = lut_size; -+ __drm_lut_to_dc_gamma(lut, gamma, false); -+ } - - if (func->tf == TRANSFER_FUNCTION_LINEAR) { - /* -@@ -285,27 +515,63 @@ static int __set_output_tf(struct dc_transfer_func *func, - * on top of a linear input. But degamma params can be used - * instead to simulate this. - */ -- gamma->type = GAMMA_CUSTOM; -+ if (gamma) -+ gamma->type = GAMMA_CUSTOM; - res = mod_color_calculate_degamma_params(NULL, func, -- gamma, true); -+ gamma, gamma != NULL); - } else { - /* - * Assume sRGB. The actual mapping will depend on whether the - * input was legacy or not. - */ -- gamma->type = GAMMA_CS_TFM_1D; -- res = mod_color_calculate_regamma_params(func, gamma, false, -+ if (gamma) -+ gamma->type = GAMMA_CS_TFM_1D; -+ res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL, - has_rom, NULL, &cal_buffer); - } - -- dc_gamma_release(&gamma); -+ if (gamma) -+ dc_gamma_release(&gamma); - - return res ? 0 : -ENOMEM; - } - -+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, -+ const struct drm_color_lut *regamma_lut, -+ uint32_t regamma_size, bool has_rom, -+ enum dc_transfer_func_predefined tf) -+{ -+ struct dc_transfer_func *out_tf = stream->out_transfer_func; -+ int ret = 0; -+ -+ if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { -+ /* CRTC RGM goes into RGM LUT. -+ * -+ * Note: there is no implicit sRGB regamma here. We are using -+ * degamma calculation from color module to calculate the curve -+ * from a linear base. -+ */ -+ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; -+ out_tf->tf = tf; -+ out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; -+ -+ ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); -+ } else { -+ /* -+ * No CRTC RGM means we can just put the block into bypass -+ * since we don't have any plane level adjustments using it. -+ */ -+ out_tf->type = TF_TYPE_BYPASS; -+ out_tf->tf = TRANSFER_FUNCTION_LINEAR; -+ } -+ -+ return ret; -+} -+ - /** - * __set_input_tf - calculates the input transfer function based on expected - * input space. -+ * @caps: dc color capabilities - * @func: transfer function - * @lut: lookup table that defines the color space - * @lut_size: size of respective lut. -@@ -313,27 +579,249 @@ static int __set_output_tf(struct dc_transfer_func *func, - * Returns: - * 0 in case of success. -ENOMEM if fails. - */ --static int __set_input_tf(struct dc_transfer_func *func, -+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func, - const struct drm_color_lut *lut, uint32_t lut_size) - { - struct dc_gamma *gamma = NULL; - bool res; - -- gamma = dc_create_gamma(); -- if (!gamma) -- return -ENOMEM; -+ if (lut_size) { -+ gamma = dc_create_gamma(); -+ if (!gamma) -+ return -ENOMEM; - -- gamma->type = GAMMA_CUSTOM; -- gamma->num_entries = lut_size; -+ gamma->type = GAMMA_CUSTOM; -+ gamma->num_entries = lut_size; - -- __drm_lut_to_dc_gamma(lut, gamma, false); -+ __drm_lut_to_dc_gamma(lut, gamma, false); -+ } - -- res = mod_color_calculate_degamma_params(NULL, func, gamma, true); -- dc_gamma_release(&gamma); -+ res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL); -+ -+ if (gamma) -+ dc_gamma_release(&gamma); - - return res ? 0 : -ENOMEM; - } - -+static enum dc_transfer_func_predefined -+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) -+{ -+ switch (tf) -+ { -+ default: -+ case AMDGPU_TRANSFER_FUNCTION_DEFAULT: -+ case AMDGPU_TRANSFER_FUNCTION_LINEAR: -+ return TRANSFER_FUNCTION_LINEAR; -+ case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF: -+ case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF: -+ return TRANSFER_FUNCTION_SRGB; -+ case AMDGPU_TRANSFER_FUNCTION_BT709_EOTF: -+ case AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF: -+ return TRANSFER_FUNCTION_BT709; -+ case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF: -+ case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF: -+ return TRANSFER_FUNCTION_PQ; -+ case AMDGPU_TRANSFER_FUNCTION_UNITY: -+ return TRANSFER_FUNCTION_UNITY; -+ case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF: -+ case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF: -+ return TRANSFER_FUNCTION_GAMMA22; -+ case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF: -+ case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF: -+ return TRANSFER_FUNCTION_GAMMA24; -+ case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF: -+ case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF: -+ return TRANSFER_FUNCTION_GAMMA26; -+ } -+} -+ -+static void __to_dc_lut3d_color(struct dc_rgb *rgb, -+ const struct drm_color_lut lut, -+ int bit_precision) -+{ -+ rgb->red = drm_color_lut_extract(lut.red, bit_precision); -+ rgb->green = drm_color_lut_extract(lut.green, bit_precision); -+ rgb->blue = drm_color_lut_extract(lut.blue, bit_precision); -+} -+ -+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut, -+ uint32_t lut3d_size, -+ struct tetrahedral_params *params, -+ bool use_tetrahedral_9, -+ int bit_depth) -+{ -+ struct dc_rgb *lut0; -+ struct dc_rgb *lut1; -+ struct dc_rgb *lut2; -+ struct dc_rgb *lut3; -+ int lut_i, i; -+ -+ -+ if (use_tetrahedral_9) { -+ lut0 = params->tetrahedral_9.lut0; -+ lut1 = params->tetrahedral_9.lut1; -+ lut2 = params->tetrahedral_9.lut2; -+ lut3 = params->tetrahedral_9.lut3; -+ } else { -+ lut0 = params->tetrahedral_17.lut0; -+ lut1 = params->tetrahedral_17.lut1; -+ lut2 = params->tetrahedral_17.lut2; -+ lut3 = params->tetrahedral_17.lut3; -+ } -+ -+ for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) { -+ /* We should consider the 3dlut RGB values are distributed -+ * along four arrays lut0-3 where the first sizes 1229 and the -+ * other 1228. The bit depth supported for 3dlut channel is -+ * 12-bit, but DC also supports 10-bit. -+ * -+ * TODO: improve color pipeline API to enable the userspace set -+ * bit depth and 3D LUT size/stride, as specified by VA-API. -+ */ -+ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); -+ __to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth); -+ __to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth); -+ __to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth); -+ } -+ /* lut0 has 1229 points (lut_size/4 + 1) */ -+ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); -+} -+ -+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream -+ * @drm_lut3d: DRM CRTC (user) 3D LUT -+ * @drm_lut3d_size: size of 3D LUT -+ * @lut3d: DC 3D LUT -+ * -+ * Map DRM CRTC 3D LUT to DC 3D LUT and all necessary bits to program it -+ * on DCN MPC accordingly. -+ */ -+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut, -+ uint32_t drm_lut3d_size, -+ struct dc_3dlut *lut) -+{ -+ if (!drm_lut3d_size) { -+ lut->state.bits.initialized = 0; -+ } else { -+ /* Stride and bit depth are not programmable by API yet. -+ * Therefore, only supports 17x17x17 3D LUT (12-bit). -+ */ -+ lut->lut_3d.use_tetrahedral_9 = false; -+ lut->lut_3d.use_12bits = true; -+ lut->state.bits.initialized = 1; -+ __drm_3dlut_to_dc_3dlut(drm_lut, drm_lut3d_size, &lut->lut_3d, -+ lut->lut_3d.use_tetrahedral_9, -+ MAX_COLOR_3DLUT_BITDEPTH); -+ } -+} -+ -+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, -+ bool has_rom, -+ enum dc_transfer_func_predefined tf, -+ uint32_t shaper_size, -+ struct dc_transfer_func *func_shaper) -+{ -+ int ret = 0; -+ -+ if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) { -+ /* If DRM shaper LUT is set, we assume a linear color space -+ * (linearized by DRM degamma 1D LUT or not) -+ */ -+ func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; -+ func_shaper->tf = tf; -+ func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; -+ -+ ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom); -+ } else { -+ func_shaper->type = TF_TYPE_BYPASS; -+ func_shaper->tf = TRANSFER_FUNCTION_LINEAR; -+ } -+ -+ return ret; -+} -+ -+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, -+ bool has_rom, -+ enum dc_transfer_func_predefined tf, -+ uint32_t blend_size, -+ struct dc_transfer_func *func_blend) -+{ -+ int ret = 0; -+ -+ if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) { -+ /* DRM plane gamma LUT or TF means we are linearizing color -+ * space before blending (similar to degamma programming). As -+ * we don't have hardcoded curve support, or we use AMD color -+ * module to fill the parameters that will be translated to HW -+ * points. -+ */ -+ func_blend->type = TF_TYPE_DISTRIBUTED_POINTS; -+ func_blend->tf = tf; -+ func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; -+ -+ ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size); -+ } else { -+ func_blend->type = TF_TYPE_BYPASS; -+ func_blend->tf = TRANSFER_FUNCTION_LINEAR; -+ } -+ -+ return ret; -+} -+ -+/* amdgpu_dm_lut3d_size - get expected size according to hw color caps -+ * @adev: amdgpu device -+ * @lut_size: default size -+ * -+ * Return: -+ * lut_size if DC 3D LUT is supported, zero otherwise. -+ */ -+static uint32_t amdgpu_dm_get_lut3d_size(struct amdgpu_device *adev, -+ uint32_t lut_size) -+{ -+ return adev->dm.dc->caps.color.dpp.hw_3d_lut ? lut_size : 0; -+} -+ -+/** -+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if DRM 3D -+ * LUT matches the hw supported size -+ * @adev: amdgpu device -+ * @crtc_state: the DRM CRTC state -+ * -+ * Verifies if post-blending (MPC) 3D LUT is supported by the HW (DCN 3.0 or -+ * newer) and if the DRM 3D LUT matches the supported size. -+ * -+ * Returns: -+ * 0 on success. -EINVAL if lut size are invalid. -+ */ -+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, -+ struct drm_plane_state *plane_state) -+{ -+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); -+ const struct drm_color_lut *shaper = NULL, *lut3d = NULL; -+ uint32_t exp_size, size; -+ -+ /* shaper LUT is only available if 3D LUT color caps*/ -+ exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_LUT_ENTRIES); -+ shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size); -+ -+ if (shaper && size != exp_size) { -+ drm_dbg(&adev->ddev, -+ "Invalid Shaper LUT size. Should be %u but got %u.\n", -+ exp_size, size); -+ } -+ -+ exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_3DLUT_ENTRIES); -+ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size); -+ -+ if (lut3d && size != exp_size) { -+ drm_dbg(&adev->ddev, "Invalid 3D LUT size. Should be %u but got %u.\n", -+ exp_size, size); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ - /** - * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes - * @crtc_state: the DRM CRTC state -@@ -401,9 +889,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) - const struct drm_color_lut *degamma_lut, *regamma_lut; - uint32_t degamma_size, regamma_size; - bool has_regamma, has_degamma; -+ enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR; - bool is_legacy; - int r; - -+ tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf); -+ - r = amdgpu_dm_verify_lut_sizes(&crtc->base); - if (r) - return r; -@@ -440,26 +931,22 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) - stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; - -+ /* Note: although we pass has_rom as parameter here, we never -+ * actually use ROM because the color module only takes the ROM -+ * path if transfer_func->type == PREDEFINED. -+ * -+ * See more in mod_color_calculate_regamma_params() -+ */ - r = __set_legacy_tf(stream->out_transfer_func, regamma_lut, - regamma_size, has_rom); - if (r) - return r; -- } else if (has_regamma) { -- /* If atomic regamma, CRTC RGM goes into RGM LUT. */ -- stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; -- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; -- -- r = __set_output_tf(stream->out_transfer_func, regamma_lut, -- regamma_size, has_rom); -+ } else { -+ regamma_size = has_regamma ? regamma_size : 0; -+ r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, -+ regamma_size, has_rom, tf); - if (r) - return r; -- } else { -- /* -- * No CRTC RGM means we can just put the block into bypass -- * since we don't have any plane level adjustments using it. -- */ -- stream->out_transfer_func->type = TF_TYPE_BYPASS; -- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - } - - /* -@@ -495,20 +982,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) - return 0; - } - --/** -- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. -- * @crtc: amdgpu_dm crtc state -- * @dc_plane_state: target DC surface -- * -- * Update the underlying dc_stream_state's input transfer function (ITF) in -- * preparation for hardware commit. The transfer function used depends on -- * the preparation done on the stream for color management. -- * -- * Returns: -- * 0 on success. -ENOMEM if mem allocation fails. -- */ --int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, -- struct dc_plane_state *dc_plane_state) -+static int -+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, -+ struct dc_plane_state *dc_plane_state, -+ struct dc_color_caps *caps) - { - const struct drm_color_lut *degamma_lut; - enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; -@@ -531,8 +1008,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, - °amma_size); - ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); - -- dc_plane_state->in_transfer_func->type = -- TF_TYPE_DISTRIBUTED_POINTS; -+ dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - - /* - * This case isn't fully correct, but also fairly -@@ -564,11 +1040,11 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, - dc_plane_state->in_transfer_func->tf = - TRANSFER_FUNCTION_LINEAR; - -- r = __set_input_tf(dc_plane_state->in_transfer_func, -+ r = __set_input_tf(caps, dc_plane_state->in_transfer_func, - degamma_lut, degamma_size); - if (r) - return r; -- } else if (crtc->cm_is_degamma_srgb) { -+ } else { - /* - * For legacy gamma support we need the regamma input - * in linear space. Assume that the input is sRGB. -@@ -577,14 +1053,213 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, - dc_plane_state->in_transfer_func->tf = tf; - - if (tf != TRANSFER_FUNCTION_SRGB && -- !mod_color_calculate_degamma_params(NULL, -- dc_plane_state->in_transfer_func, NULL, false)) -+ !mod_color_calculate_degamma_params(caps, -+ dc_plane_state->in_transfer_func, -+ NULL, false)) -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static int -+__set_dm_plane_degamma(struct drm_plane_state *plane_state, -+ struct dc_plane_state *dc_plane_state, -+ struct dc_color_caps *color_caps) -+{ -+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); -+ const struct drm_color_lut *degamma_lut; -+ enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; -+ uint32_t degamma_size; -+ bool has_degamma_lut; -+ int ret; -+ -+ degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut, -+ °amma_size); -+ -+ has_degamma_lut = degamma_lut && -+ !__is_lut_linear(degamma_lut, degamma_size); -+ -+ tf = dm_plane_state->degamma_tf; -+ -+ /* If we don't have plane degamma LUT nor TF to set on DC, we have -+ * nothing to do here, return. -+ */ -+ if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT) -+ return -EINVAL; -+ -+ dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf); -+ -+ if (has_degamma_lut) { -+ ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); -+ -+ dc_plane_state->in_transfer_func->type = -+ TF_TYPE_DISTRIBUTED_POINTS; -+ -+ ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func, -+ degamma_lut, degamma_size); -+ if (ret) -+ return ret; -+ } else { -+ dc_plane_state->in_transfer_func->type = -+ TF_TYPE_PREDEFINED; -+ -+ if (!mod_color_calculate_degamma_params(color_caps, -+ dc_plane_state->in_transfer_func, NULL, false)) - return -ENOMEM; -- } else { -- /* ...Otherwise we can just bypass the DGM block. */ -- dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; -- dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; -+ } -+ return 0; -+} -+ -+static int -+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, -+ struct dc_plane_state *dc_plane_state, -+ struct dc_color_caps *color_caps) -+{ -+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); -+ enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; -+ enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; -+ const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut; -+ uint32_t shaper_size, lut3d_size, blend_size; -+ int ret; -+ -+ /* We have nothing to do here, return */ -+ if (!plane_state->color_mgmt_changed) -+ return 0; -+ -+ dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); -+ -+ shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); -+ shaper_size = shaper_lut != NULL ? shaper_size : 0; -+ shaper_tf = dm_plane_state->shaper_tf; -+ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size); -+ lut3d_size = lut3d != NULL ? lut3d_size : 0; -+ -+ amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func); -+ ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false, -+ amdgpu_tf_to_dc_tf(shaper_tf), -+ shaper_size, -+ dc_plane_state->in_shaper_func); -+ if (ret) { -+ drm_dbg_kms(plane_state->plane->dev, -+ "setting plane %d shaper LUT failed.\n", -+ plane_state->plane->index); -+ -+ return ret; -+ } -+ -+ blend_tf = dm_plane_state->blend_tf; -+ blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size); -+ blend_size = blend_lut != NULL ? blend_size : 0; -+ -+ ret = amdgpu_dm_atomic_blend_lut(blend_lut, false, -+ amdgpu_tf_to_dc_tf(blend_tf), -+ blend_size, dc_plane_state->blend_tf); -+ if (ret) { -+ drm_dbg_kms(plane_state->plane->dev, -+ "setting plane %d gamma lut failed.\n", -+ plane_state->plane->index); -+ -+ return ret; - } - - return 0; - } -+ -+/** -+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. -+ * @crtc: amdgpu_dm crtc state -+ * @plane_state: DRM plane state -+ * @dc_plane_state: target DC surface -+ * -+ * Update the underlying dc_stream_state's input transfer function (ITF) in -+ * preparation for hardware commit. The transfer function used depends on -+ * the preparation done on the stream for color management. -+ * -+ * Returns: -+ * 0 on success. -ENOMEM if mem allocation fails. -+ */ -+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, -+ struct drm_plane_state *plane_state, -+ struct dc_plane_state *dc_plane_state) -+{ -+ struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); -+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); -+ struct drm_color_ctm2 *ctm = NULL; -+ struct dc_color_caps *color_caps = NULL; -+ bool has_crtc_cm_degamma; -+ int ret; -+ -+ ret = amdgpu_dm_verify_lut3d_size(adev, plane_state); -+ if (ret) { -+ drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n"); -+ return ret; -+ } -+ -+ if (dc_plane_state->ctx && dc_plane_state->ctx->dc) -+ color_caps = &dc_plane_state->ctx->dc->caps.color; -+ -+ /* Initially, we can just bypass the DGM block. */ -+ dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; -+ dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; -+ -+ /* After, we start to update values according to color props */ -+ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); -+ -+ ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps); -+ if (ret == -ENOMEM) -+ return ret; -+ -+ /* We only have one degamma block available (pre-blending) for the -+ * whole color correction pipeline, so that we can't actually perform -+ * plane and CRTC degamma at the same time. Explicitly reject atomic -+ * updates when userspace sets both plane and CRTC degamma properties. -+ */ -+ if (has_crtc_cm_degamma && ret != -EINVAL){ -+ drm_dbg_kms(crtc->base.crtc->dev, -+ "doesn't support plane and CRTC degamma at the same time\n"); -+ return -EINVAL; -+ } -+ -+ /* If we are here, it means we don't have plane degamma settings, check -+ * if we have CRTC degamma waiting for mapping to pre-blending degamma -+ * block -+ */ -+ if (has_crtc_cm_degamma) { -+ /* AMD HW doesn't have post-blending degamma caps. When DRM -+ * CRTC atomic degamma is set, we maps it to DPP degamma block -+ * (pre-blending) or, on legacy gamma, we use DPP degamma to -+ * linearize (implicit degamma) from sRGB/BT709 according to -+ * the input space. -+ */ -+ ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps); -+ if (ret) -+ return ret; -+ } -+ -+ /* Setup CRTC CTM. */ -+ if (dm_plane_state->ctm) { -+ ctm = (struct drm_color_ctm2 *)dm_plane_state->ctm->data; -+ -+ /* -+ * So far, if we have both plane and CRTC CTM, plane CTM takes -+ * the priority and we discard data for CRTC CTM, as -+ * implemented in dcn10_program_gamut_remap(). However, we -+ * have MPC gamut_remap_matrix from DCN3 family, therefore we -+ * can remap MPC programing of the matrix to MPC block and -+ * provide support for both DPP and MPC matrix at the same -+ * time. -+ */ -+ __drm_ctm2_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); -+ -+ dc_plane_state->gamut_remap_matrix.enable_remap = true; -+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false; -+ } else { -+ /* Bypass CTM. */ -+ dc_plane_state->gamut_remap_matrix.enable_remap = false; -+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false; -+ } -+ -+ return amdgpu_dm_plane_set_color_properties(plane_state, -+ dc_plane_state, color_caps); -+} -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -index 97b7a0b8a1c2..a05c210754d4 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c -@@ -260,6 +260,7 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) - state->freesync_config = cur->freesync_config; - state->cm_has_degamma = cur->cm_has_degamma; - state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; -+ state->regamma_tf = cur->regamma_tf; - state->crc_skip_count = cur->crc_skip_count; - state->mpo_requested = cur->mpo_requested; - /* TODO Duplicate dc_stream after objects are stream object is flattened */ -@@ -296,6 +297,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) - } - #endif - -+#ifdef AMD_PRIVATE_COLOR -+/** -+ * drm_crtc_additional_color_mgmt - enable additional color properties -+ * @crtc: DRM CRTC -+ * -+ * This function lets the driver enable post-blending CRTC regamma transfer -+ * function property in addition to DRM CRTC gamma LUT. Default value means -+ * linear transfer function, which is the default CRTC gamma LUT behaviour -+ * without this property. -+ */ -+static void -+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc) -+{ -+ struct amdgpu_device *adev = drm_to_adev(crtc->dev); -+ -+ if(adev->dm.dc->caps.color.mpc.ogam_ram) -+ drm_object_attach_property(&crtc->base, -+ adev->mode_info.regamma_tf_property, -+ AMDGPU_TRANSFER_FUNCTION_DEFAULT); -+} -+ -+static int -+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc, -+ struct drm_crtc_state *state, -+ struct drm_property *property, -+ uint64_t val) -+{ -+ struct amdgpu_device *adev = drm_to_adev(crtc->dev); -+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); -+ -+ if (property == adev->mode_info.regamma_tf_property) { -+ if (acrtc_state->regamma_tf != val) { -+ acrtc_state->regamma_tf = val; -+ acrtc_state->base.color_mgmt_changed |= 1; -+ } -+ } else { -+ drm_dbg_atomic(crtc->dev, -+ "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n", -+ crtc->base.id, crtc->name, -+ property->base.id, property->name); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int -+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc, -+ const struct drm_crtc_state *state, -+ struct drm_property *property, -+ uint64_t *val) -+{ -+ struct amdgpu_device *adev = drm_to_adev(crtc->dev); -+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); -+ -+ if (property == adev->mode_info.regamma_tf_property) -+ *val = acrtc_state->regamma_tf; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+#endif -+ - /* Implemented only the options currently available for the driver */ - static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - .reset = dm_crtc_reset_state, -@@ -314,6 +379,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - #if defined(CONFIG_DEBUG_FS) - .late_register = amdgpu_dm_crtc_late_register, - #endif -+#ifdef AMD_PRIVATE_COLOR -+ .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, -+ .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, -+#endif - }; - - static void dm_crtc_helper_disable(struct drm_crtc *crtc) -@@ -489,6 +558,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - - drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); - -+#ifdef AMD_PRIVATE_COLOR -+ dm_crtc_additional_color_mgmt(&acrtc->base); -+#endif - return 0; - - fail: -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -index cc74dd69acf2..17719e15cbe5 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -@@ -1333,8 +1333,14 @@ static void dm_drm_plane_reset(struct drm_plane *plane) - amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); - WARN_ON(amdgpu_state == NULL); - -- if (amdgpu_state) -- __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); -+ if (!amdgpu_state) -+ return; -+ -+ __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); -+ amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; -+ amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; -+ amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; -+ amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; - } - - static struct drm_plane_state * -@@ -1354,6 +1360,22 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane) - dc_plane_state_retain(dm_plane_state->dc_state); - } - -+ if (dm_plane_state->degamma_lut) -+ drm_property_blob_get(dm_plane_state->degamma_lut); -+ if (dm_plane_state->ctm) -+ drm_property_blob_get(dm_plane_state->ctm); -+ if (dm_plane_state->shaper_lut) -+ drm_property_blob_get(dm_plane_state->shaper_lut); -+ if (dm_plane_state->lut3d) -+ drm_property_blob_get(dm_plane_state->lut3d); -+ if (dm_plane_state->blend_lut) -+ drm_property_blob_get(dm_plane_state->blend_lut); -+ -+ dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; -+ dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; -+ dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf; -+ dm_plane_state->blend_tf = old_dm_plane_state->blend_tf; -+ - return &dm_plane_state->base; - } - -@@ -1421,12 +1443,203 @@ static void dm_drm_plane_destroy_state(struct drm_plane *plane, - { - struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); - -+ if (dm_plane_state->degamma_lut) -+ drm_property_blob_put(dm_plane_state->degamma_lut); -+ if (dm_plane_state->ctm) -+ drm_property_blob_put(dm_plane_state->ctm); -+ if (dm_plane_state->lut3d) -+ drm_property_blob_put(dm_plane_state->lut3d); -+ if (dm_plane_state->shaper_lut) -+ drm_property_blob_put(dm_plane_state->shaper_lut); -+ if (dm_plane_state->blend_lut) -+ drm_property_blob_put(dm_plane_state->blend_lut); -+ - if (dm_plane_state->dc_state) - dc_plane_state_release(dm_plane_state->dc_state); - - drm_atomic_helper_plane_destroy_state(plane, state); - } - -+#ifdef AMD_PRIVATE_COLOR -+static void -+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, -+ struct drm_plane *plane) -+{ -+ struct amdgpu_mode_info mode_info = dm->adev->mode_info; -+ struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp; -+ -+ /* Check HW color pipeline capabilities for DPP (pre-blending) before expose*/ -+ if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) { -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_degamma_lut_property, 0); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_degamma_lut_size_property, -+ MAX_COLOR_LUT_ENTRIES); -+ drm_object_attach_property(&plane->base, -+ dm->adev->mode_info.plane_degamma_tf_property, -+ AMDGPU_TRANSFER_FUNCTION_DEFAULT); -+ } -+ /* HDR MULT is always available */ -+ drm_object_attach_property(&plane->base, -+ dm->adev->mode_info.plane_hdr_mult_property, -+ AMDGPU_HDR_MULT_DEFAULT); -+ -+ /* Only enable plane CTM if both DPP and MPC gamut remap is available. */ -+ if (dm->dc->caps.color.mpc.gamut_remap) -+ drm_object_attach_property(&plane->base, -+ dm->adev->mode_info.plane_ctm_property, 0); -+ -+ if (dpp_color_caps.hw_3d_lut) { -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_shaper_lut_property, 0); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_shaper_lut_size_property, -+ MAX_COLOR_LUT_ENTRIES); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_shaper_tf_property, -+ AMDGPU_TRANSFER_FUNCTION_DEFAULT); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_lut3d_property, 0); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_lut3d_size_property, -+ MAX_COLOR_3DLUT_ENTRIES); -+ } -+ -+ if (dpp_color_caps.ogam_ram) { -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_blend_lut_property, 0); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_blend_lut_size_property, -+ MAX_COLOR_LUT_ENTRIES); -+ drm_object_attach_property(&plane->base, -+ mode_info.plane_blend_tf_property, -+ AMDGPU_TRANSFER_FUNCTION_DEFAULT); -+ } -+} -+ -+static int -+dm_atomic_plane_set_property(struct drm_plane *plane, -+ struct drm_plane_state *state, -+ struct drm_property *property, -+ uint64_t val) -+{ -+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); -+ struct amdgpu_device *adev = drm_to_adev(plane->dev); -+ bool replaced = false; -+ int ret; -+ -+ if (property == adev->mode_info.plane_degamma_lut_property) { -+ ret = drm_property_replace_blob_from_id(plane->dev, -+ &dm_plane_state->degamma_lut, -+ val, -+ -1, sizeof(struct drm_color_lut), -+ &replaced); -+ dm_plane_state->base.color_mgmt_changed |= replaced; -+ return ret; -+ } else if (property == adev->mode_info.plane_degamma_tf_property) { -+ if (dm_plane_state->degamma_tf != val) { -+ dm_plane_state->degamma_tf = val; -+ dm_plane_state->base.color_mgmt_changed = 1; -+ } -+ } else if (property == adev->mode_info.plane_hdr_mult_property) { -+ if (dm_plane_state->hdr_mult != val) { -+ dm_plane_state->hdr_mult = val; -+ dm_plane_state->base.color_mgmt_changed = 1; -+ } -+ } else if (property == adev->mode_info.plane_ctm_property) { -+ ret = drm_property_replace_blob_from_id(plane->dev, -+ &dm_plane_state->ctm, -+ val, -+ sizeof(struct drm_color_ctm2), -1, -+ &replaced); -+ dm_plane_state->base.color_mgmt_changed |= replaced; -+ return ret; -+ } else if (property == adev->mode_info.plane_shaper_lut_property) { -+ ret = drm_property_replace_blob_from_id(plane->dev, -+ &dm_plane_state->shaper_lut, -+ val, -1, -+ sizeof(struct drm_color_lut), -+ &replaced); -+ dm_plane_state->base.color_mgmt_changed |= replaced; -+ return ret; -+ } else if (property == adev->mode_info.plane_shaper_tf_property) { -+ if (dm_plane_state->shaper_tf != val) { -+ dm_plane_state->shaper_tf = val; -+ dm_plane_state->base.color_mgmt_changed = 1; -+ } -+ } else if (property == adev->mode_info.plane_lut3d_property) { -+ ret = drm_property_replace_blob_from_id(plane->dev, -+ &dm_plane_state->lut3d, -+ val, -1, -+ sizeof(struct drm_color_lut), -+ &replaced); -+ dm_plane_state->base.color_mgmt_changed |= replaced; -+ return ret; -+ } else if (property == adev->mode_info.plane_blend_lut_property) { -+ ret = drm_property_replace_blob_from_id(plane->dev, -+ &dm_plane_state->blend_lut, -+ val, -1, -+ sizeof(struct drm_color_lut), -+ &replaced); -+ dm_plane_state->base.color_mgmt_changed |= replaced; -+ return ret; -+ } else if (property == adev->mode_info.plane_blend_tf_property) { -+ if (dm_plane_state->blend_tf != val) { -+ dm_plane_state->blend_tf = val; -+ dm_plane_state->base.color_mgmt_changed = 1; -+ } -+ } else { -+ drm_dbg_atomic(plane->dev, -+ "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", -+ plane->base.id, plane->name, -+ property->base.id, property->name); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int -+dm_atomic_plane_get_property(struct drm_plane *plane, -+ const struct drm_plane_state *state, -+ struct drm_property *property, -+ uint64_t *val) -+{ -+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); -+ struct amdgpu_device *adev = drm_to_adev(plane->dev); -+ -+ if (property == adev->mode_info.plane_degamma_lut_property) { -+ *val = (dm_plane_state->degamma_lut) ? -+ dm_plane_state->degamma_lut->base.id : 0; -+ } else if (property == adev->mode_info.plane_degamma_tf_property) { -+ *val = dm_plane_state->degamma_tf; -+ } else if (property == adev->mode_info.plane_hdr_mult_property) { -+ *val = dm_plane_state->hdr_mult; -+ } else if (property == adev->mode_info.plane_ctm_property) { -+ *val = (dm_plane_state->ctm) ? -+ dm_plane_state->ctm->base.id : 0; -+ } else if (property == adev->mode_info.plane_shaper_lut_property) { -+ *val = (dm_plane_state->shaper_lut) ? -+ dm_plane_state->shaper_lut->base.id : 0; -+ } else if (property == adev->mode_info.plane_shaper_tf_property) { -+ *val = dm_plane_state->shaper_tf; -+ } else if (property == adev->mode_info.plane_lut3d_property) { -+ *val = (dm_plane_state->lut3d) ? -+ dm_plane_state->lut3d->base.id : 0; -+ } else if (property == adev->mode_info.plane_blend_lut_property) { -+ *val = (dm_plane_state->blend_lut) ? -+ dm_plane_state->blend_lut->base.id : 0; -+ } else if (property == adev->mode_info.plane_blend_tf_property) { -+ *val = dm_plane_state->blend_tf; -+ -+ } else { -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+#endif -+ - static const struct drm_plane_funcs dm_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, - .disable_plane = drm_atomic_helper_disable_plane, -@@ -1435,6 +1648,10 @@ static const struct drm_plane_funcs dm_plane_funcs = { - .atomic_duplicate_state = dm_drm_plane_duplicate_state, - .atomic_destroy_state = dm_drm_plane_destroy_state, - .format_mod_supported = dm_plane_format_mod_supported, -+#ifdef AMD_PRIVATE_COLOR -+ .atomic_set_property = dm_atomic_plane_set_property, -+ .atomic_get_property = dm_atomic_plane_get_property, -+#endif - }; - - int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, -@@ -1514,6 +1731,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - - drm_plane_helper_add(plane, &dm_plane_helper_funcs); - -+#ifdef AMD_PRIVATE_COLOR -+ dm_atomic_plane_attach_color_mgmt_properties(dm, plane); -+#endif - /* Create (reset) the plane state */ - if (plane->funcs->reset) - plane->funcs->reset(plane); -diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c -index 3538973bd0c6..04b2e04b68f3 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c -@@ -349,20 +349,37 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx, - * segment is from 2^-10 to 2^1 - * There are less than 256 points, for optimization - */ -- seg_distr[0] = 3; -- seg_distr[1] = 4; -- seg_distr[2] = 4; -- seg_distr[3] = 4; -- seg_distr[4] = 4; -- seg_distr[5] = 4; -- seg_distr[6] = 4; -- seg_distr[7] = 4; -- seg_distr[8] = 4; -- seg_distr[9] = 4; -- seg_distr[10] = 1; -- -- region_start = -10; -- region_end = 1; -+ if (output_tf->tf == TRANSFER_FUNCTION_LINEAR) { -+ seg_distr[0] = 0; /* 2 */ -+ seg_distr[1] = 1; /* 4 */ -+ seg_distr[2] = 2; /* 4 */ -+ seg_distr[3] = 3; /* 8 */ -+ seg_distr[4] = 4; /* 16 */ -+ seg_distr[5] = 5; /* 32 */ -+ seg_distr[6] = 6; /* 64 */ -+ seg_distr[7] = 7; /* 128 */ -+ -+ region_start = -8; -+ region_end = 1; -+ } else { -+ seg_distr[0] = 3; /* 8 */ -+ seg_distr[1] = 4; /* 16 */ -+ seg_distr[2] = 4; -+ seg_distr[3] = 4; -+ seg_distr[4] = 4; -+ seg_distr[5] = 4; -+ seg_distr[6] = 4; -+ seg_distr[7] = 4; -+ seg_distr[8] = 4; -+ seg_distr[9] = 4; -+ seg_distr[10] = 1; /* 2 */ -+ /* total = 8*16 + 8 + 64 + 2 = */ -+ -+ region_start = -10; -+ region_end = 1; -+ } -+ -+ - } - - for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) -@@ -375,16 +392,56 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx, - - j = 0; - for (k = 0; k < (region_end - region_start); k++) { -- increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); -+ /* -+ * We're using an ugly-ish hack here. Our HW allows for -+ * 256 segments per region but SW_SEGMENTS is 16. -+ * SW_SEGMENTS has some undocumented relationship to -+ * the number of points in the tf_pts struct, which -+ * is 512, unlike what's suggested TRANSFER_FUNC_POINTS. -+ * -+ * In order to work past this dilemma we'll scale our -+ * increment by (1 << 4) and then do the inverse (1 >> 4) -+ * when accessing the elements in tf_pts. -+ * -+ * TODO: find a better way using SW_SEGMENTS and -+ * TRANSFER_FUNC_POINTS definitions -+ */ -+ increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]); - start_index = (region_start + k + MAX_LOW_POINT) * - NUMBER_SW_SEGMENTS; -- for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; -+ for (i = (start_index << 4); i < (start_index << 4) + (NUMBER_SW_SEGMENTS << 4); - i += increment) { -+ struct fixed31_32 in_plus_one, in; -+ struct fixed31_32 value, red_value, green_value, blue_value; -+ uint32_t t = i & 0xf; -+ - if (j == hw_points - 1) - break; -- rgb_resulted[j].red = output_tf->tf_pts.red[i]; -- rgb_resulted[j].green = output_tf->tf_pts.green[i]; -- rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; -+ -+ in_plus_one = output_tf->tf_pts.red[(i >> 4) + 1]; -+ in = output_tf->tf_pts.red[i >> 4]; -+ value = dc_fixpt_sub(in_plus_one, in); -+ value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); -+ value = dc_fixpt_add(in, value); -+ red_value = value; -+ -+ in_plus_one = output_tf->tf_pts.green[(i >> 4) + 1]; -+ in = output_tf->tf_pts.green[i >> 4]; -+ value = dc_fixpt_sub(in_plus_one, in); -+ value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); -+ value = dc_fixpt_add(in, value); -+ green_value = value; -+ -+ in_plus_one = output_tf->tf_pts.blue[(i >> 4) + 1]; -+ in = output_tf->tf_pts.blue[i >> 4]; -+ value = dc_fixpt_sub(in_plus_one, in); -+ value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); -+ value = dc_fixpt_add(in, value); -+ blue_value = value; -+ -+ rgb_resulted[j].red = red_value; -+ rgb_resulted[j].green = green_value; -+ rgb_resulted[j].blue = blue_value; - j++; - } - } -diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c -index 255713ec29bb..fce9b33c0f88 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c -@@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc, - return result; - } - -+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx) -+{ -+ int i = 0; -+ struct dpp_grph_csc_adjustment dpp_adjust; -+ struct mpc_grph_gamut_adjustment mpc_adjust; -+ int mpcc_id = pipe_ctx->plane_res.hubp->inst; -+ struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; -+ -+ memset(&dpp_adjust, 0, sizeof(dpp_adjust)); -+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; -+ -+ if (pipe_ctx->plane_state && -+ pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) { -+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; -+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) -+ dpp_adjust.temperature_matrix[i] = -+ pipe_ctx->plane_state->gamut_remap_matrix.matrix[i]; -+ } -+ -+ pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp, -+ &dpp_adjust); -+ -+ memset(&mpc_adjust, 0, sizeof(mpc_adjust)); -+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; -+ -+ if (pipe_ctx->top_pipe == NULL) { -+ if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { -+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; -+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) -+ mpc_adjust.temperature_matrix[i] = -+ pipe_ctx->stream->gamut_remap_matrix.matrix[i]; -+ } -+ } -+ -+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); -+} -+ - bool dcn30_set_output_transfer_func(struct dc *dc, - struct pipe_ctx *pipe_ctx, - const struct dc_stream_state *stream) -diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h -index ce19c54097f8..e557e2b98618 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h -+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h -@@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx, - bool dcn30_set_input_transfer_func(struct dc *dc, - struct pipe_ctx *pipe_ctx, - const struct dc_plane_state *plane_state); -+ -+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx); -+ - bool dcn30_set_output_transfer_func(struct dc *dc, - struct pipe_ctx *pipe_ctx, - const struct dc_stream_state *stream); -diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c -index 61205cdbe2d5..fdbe3d42cd7b 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c -@@ -33,7 +33,7 @@ - #include "dcn301_init.h" - - static const struct hw_sequencer_funcs dcn301_funcs = { -- .program_gamut_remap = dcn10_program_gamut_remap, -+ .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn10_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, -diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h -index d4cf7ead1d87..84da1dd34efd 100644 ---- a/drivers/gpu/drm/amd/display/include/fixed31_32.h -+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h -@@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL }; - static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL }; - static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL }; - -+static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x) -+{ -+ struct fixed31_32 val; -+ -+ /* If negative, convert to 2's complement. */ -+ if (x & (1ULL << 63)) -+ x = -(x & ~(1ULL << 63)); -+ -+ val.value = x; -+ return val; -+} -+ - /* - * @brief - * Initialization routines -diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c -index dc01c43f6193..d72c22dcf685 100644 ---- a/drivers/gpu/drm/arm/malidp_crtc.c -+++ b/drivers/gpu/drm/arm/malidp_crtc.c -@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc, - - /* - * The size of the ctm is checked in -- * drm_atomic_replace_property_blob_from_id. -+ * drm_property_replace_blob_from_id. - */ - ctm = (struct drm_color_ctm *)state->ctm->data; - for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) { -diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c -index c277b198fa3f..c3df45f90145 100644 ---- a/drivers/gpu/drm/drm_atomic.c -+++ b/drivers/gpu/drm/drm_atomic.c -@@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, - drm_get_color_encoding_name(state->color_encoding)); - drm_printf(p, "\tcolor-range=%s\n", - drm_get_color_range_name(state->color_range)); -+ drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); - - if (plane->funcs->atomic_print_state) - plane->funcs->atomic_print_state(p, state); -diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c -index 784e63d70a42..25bb0859fda7 100644 ---- a/drivers/gpu/drm/drm_atomic_state_helper.c -+++ b/drivers/gpu/drm/drm_atomic_state_helper.c -@@ -338,6 +338,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, - state->fence = NULL; - state->commit = NULL; - state->fb_damage_clips = NULL; -+ state->color_mgmt_changed = false; - } - EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); - -diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c -index dfec479830e4..f72ef6493340 100644 ---- a/drivers/gpu/drm/drm_property.c -+++ b/drivers/gpu/drm/drm_property.c -@@ -751,6 +751,55 @@ bool drm_property_replace_blob(struct drm_property_blob **blob, - } - EXPORT_SYMBOL(drm_property_replace_blob); - -+/** -+ * drm_property_replace_blob_from_id - replace a blob property taking a reference -+ * @dev: DRM device -+ * @blob: a pointer to the member blob to be replaced -+ * @blob_id: the id of the new blob to replace with -+ * @expected_size: expected size of the blob property -+ * @expected_elem_size: expected size of an element in the blob property -+ * @replaced: if the blob was in fact replaced -+ * -+ * Look up the new blob from id, take its reference, check expected sizes of -+ * the blob and its element and replace the old blob by the new one. Advertise -+ * if the replacement operation was successful. -+ * -+ * Return: true if the blob was in fact replaced. -EINVAL if the new blob was -+ * not found or sizes don't match. -+ */ -+int drm_property_replace_blob_from_id(struct drm_device *dev, -+ struct drm_property_blob **blob, -+ uint64_t blob_id, -+ ssize_t expected_size, -+ ssize_t expected_elem_size, -+ bool *replaced) -+{ -+ struct drm_property_blob *new_blob = NULL; -+ -+ if (blob_id != 0) { -+ new_blob = drm_property_lookup_blob(dev, blob_id); -+ if (new_blob == NULL) -+ return -EINVAL; -+ -+ if (expected_size > 0 && -+ new_blob->length != expected_size) { -+ drm_property_blob_put(new_blob); -+ return -EINVAL; -+ } -+ if (expected_elem_size > 0 && -+ new_blob->length % expected_elem_size != 0) { -+ drm_property_blob_put(new_blob); -+ return -EINVAL; -+ } -+ } -+ -+ *replaced |= drm_property_replace_blob(blob, new_blob); -+ drm_property_blob_put(new_blob); -+ -+ return 0; -+} -+EXPORT_SYMBOL(drm_property_replace_blob_from_id); -+ - int drm_mode_getblob_ioctl(struct drm_device *dev, - void *data, struct drm_file *file_priv) - { -diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h -index 912f1e415685..08d7a7f0188f 100644 ---- a/include/drm/drm_mode_object.h -+++ b/include/drm/drm_mode_object.h -@@ -60,7 +60,7 @@ struct drm_mode_object { - void (*free_cb)(struct kref *kref); - }; - --#define DRM_OBJECT_MAX_PROPERTY 24 -+#define DRM_OBJECT_MAX_PROPERTY 64 - /** - * struct drm_object_properties - property tracking for &drm_mode_object - */ -diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h -index 79d62856defb..4f87803b3ea1 100644 ---- a/include/drm/drm_plane.h -+++ b/include/drm/drm_plane.h -@@ -237,6 +237,13 @@ struct drm_plane_state { - - /** @state: backpointer to global drm_atomic_state */ - struct drm_atomic_state *state; -+ -+ /** -+ * @color_mgmt_changed: Color management properties have changed. Used -+ * by the atomic helpers and drivers to steer the atomic commit control -+ * flow. -+ */ -+ bool color_mgmt_changed : 1; - }; - - static inline struct drm_rect -diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h -index 65bc9710a470..082f29156b3e 100644 ---- a/include/drm/drm_property.h -+++ b/include/drm/drm_property.h -@@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev, - const void *data); - struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev, - uint32_t id); -+int drm_property_replace_blob_from_id(struct drm_device *dev, -+ struct drm_property_blob **blob, -+ uint64_t blob_id, -+ ssize_t expected_size, -+ ssize_t expected_elem_size, -+ bool *replaced); - int drm_property_replace_global_blob(struct drm_device *dev, - struct drm_property_blob **replace, - size_t length, -diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h -index ea1b639bcb28..cea5653e4020 100644 ---- a/include/uapi/drm/drm_mode.h -+++ b/include/uapi/drm/drm_mode.h -@@ -846,6 +846,14 @@ struct drm_color_ctm { - __u64 matrix[9]; - }; - -+struct drm_color_ctm2 { -+ /* -+ * Conversion matrix in S31.32 sign-magnitude -+ * (not two's complement!) format. -+ */ -+ __u64 matrix[12]; -+}; -+ - struct drm_color_lut { - /* - * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and --- -2.43.0.rc2 - diff --git a/patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch b/patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch deleted file mode 100644 index 757f777..0000000 --- a/patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jan200101 -Date: Mon, 14 Nov 2022 20:13:53 +0100 -Subject: [PATCH] drm/i915/quirks: disable async flipping on specific devices - -Signed-off-by: Jan200101 ---- - drivers/gpu/drm/i915/display/intel_quirks.c | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) - -diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c -index a280448df771..1596114dd9ae 100644 ---- a/drivers/gpu/drm/i915/display/intel_quirks.c -+++ b/drivers/gpu/drm/i915/display/intel_quirks.c -@@ -14,6 +14,12 @@ static void intel_set_quirk(struct drm_i915_private *i915, enum intel_quirk_id q - i915->display.quirks.mask |= BIT(quirk); - } - -+static void quirk_async_page_flips_force_disable(struct drm_i915_private *i915) -+{ -+ i915->drm.mode_config.async_page_flip = false; -+ drm_info(&i915->drm, "applying async flip disable quirk\n"); -+} -+ - /* - * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason - */ -@@ -136,6 +142,20 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = { - }, - .hook = quirk_no_pps_backlight_power_hook, - }, -+ { -+ .dmi_id_list = &(const struct dmi_system_id[]) { -+ { -+ .callback = NULL, -+ .ident = "ASUS TUF DASH F15", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -+ DMI_MATCH(DMI_PRODUCT_NAME, "ASUS TUF Dash F15 FX516PC_FX516PC"), -+ }, -+ }, -+ { } -+ }, -+ .hook = quirk_async_page_flips_force_disable, -+ }, - }; - - static struct intel_quirk intel_quirks[] = { diff --git a/patches/nobara/0001-hid-asus-nero-patches-rogue.patch b/patches/nobara/0001-hid-asus-nero-patches-rogue.patch deleted file mode 100644 index 2ca98bd..0000000 --- a/patches/nobara/0001-hid-asus-nero-patches-rogue.patch +++ /dev/null @@ -1,972 +0,0 @@ -diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c -index fd61dba88..3220d96fc 100644 ---- a/drivers/hid/hid-asus.c -+++ b/drivers/hid/hid-asus.c -@@ -26,7 +26,9 @@ - #include - #include - #include -+#include - #include -+#include - #include - #include /* For to_usb_interface for T100 touchpad intf check */ - #include -@@ -94,6 +96,435 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad"); - - #define TRKID_SGN ((TRKID_MAX + 1) >> 1) - -+/* -+ * USB buffers to be used in a control transfer to make the joystick change buttons mode and scancodes -+ * 0 is default (game_mode with back buttons sending F17 and F18 instead of F15 for both as when unconfigured) -+ * 1 is mouse mode: back buttons still are F17 and F18 -+ * 2 is macro mode -+ */ -+static const u8 rc71l_mode_switch_commands[][23][64] = { -+ { -+ { -+ 0x5A, 0xD1, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x01, 0x2C, 0x01, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x05, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0A, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x8C, 0x88, 0x76, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x02, 0x2C, 0x01, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x23, 0x00, 0x00, 0x00, 0x01, 0x0C, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x0D, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x03, 0x2C, 0x01, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x04, 0x2C, 0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x06, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x05, 0x2C, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x05, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x31, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x06, 0x2C, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x4D, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x07, 0x2C, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x08, 0x2C, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x30, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x09, 0x2C, 0x01, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0E, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0F, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x06, 0x02, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x04, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x05, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ } -+ }, -+ { -+ { -+ 0x5A, 0xD1, 0x01, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x01, 0x2C, 0x02, 0x00, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x05, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x99, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x8C, 0x88, 0x76, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x02, 0x2C, 0x02, 0x00, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x23, 0x00, 0x00, 0x00, 0x02, 0x00, 0x9B, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x0D, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x03, 0x2C, 0x02, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x04, 0x2C, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x05, 0x2C, 0x02, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x05, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x31, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x06, 0x2C, 0x02, 0x00, 0x97, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x4D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x96, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x07, 0x2C, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x08, 0x2C, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x30, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x09, 0x2C, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x88, 0x0D, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0F, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x06, 0x02, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x04, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x05, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ } -+ }, -+ { -+ { -+ 0x5A, 0xD1, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x01, 0x2C, 0x01, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x05, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0A, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x8C, 0x88, 0x76, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x02, 0x2C, 0x01, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x23, 0x00, 0x00, 0x00, 0x01, 0x0C, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x0D, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x03, 0x2C, 0x01, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x04, 0x2C, 0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x06, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x05, 0x2C, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x05, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x31, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x06, 0x2C, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x4D, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x07, 0x2C, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x08, 0x2C, 0x02, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x02, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x8F, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x02, 0x09, 0x2C, 0x01, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0E, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x0F, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x06, 0x02, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x04, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ }, -+ { -+ 0x5A, 0xD1, 0x05, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+ } -+ } -+}; -+ - struct asus_kbd_leds { - struct led_classdev cdev; - struct hid_device *hdev; -@@ -103,6 +534,25 @@ struct asus_kbd_leds { - bool removed; - }; - -+enum rc71l_controller_mode { -+ rc71l_gamepad_mode, -+ rc71l_mouse_mode, -+ rc71l_macro_mode, -+}; -+ -+struct asus_rc71l { -+ unsigned int usb_pipe; -+ -+ struct platform_device *mcu_dev; -+ -+ struct mutex mutex; /* Mutex that protects everything below it */ -+ -+ enum rc71l_controller_mode mode; -+ -+ u8 usb_in_buf[32]; -+ u8 usb_out_buf[64]; /* A temporary buffer to hold data that gets sent over USB (must be accessed upon locking the appropriate mutex) */ -+}; -+ - struct asus_touchpad_info { - int max_x; - int max_y; -@@ -127,6 +577,7 @@ struct asus_drvdata { - int battery_stat; - bool battery_in_query; - unsigned long battery_next_query; -+ struct asus_rc71l *rc71l_data; - }; - - static int asus_report_battery(struct asus_drvdata *, u8 *, int); -@@ -189,6 +640,245 @@ static const struct asus_touchpad_info medion_e1239t_tp = { - .report_size = 32 /* 2 byte header + 5 * 5 + 5 byte footer */, - }; - -+/** -+ * This function reads data over the USB device on the ROG Ally. -+ * Unlike outgoing traffic the inbound always performs 32-bytes transfers. -+ * -+ * PRE: -+ * - rc71l internal mutex MUST be locked -+ */ -+static int rc71l_usb_read(struct hid_device * hdev) { -+ struct asus_drvdata *drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev); -+ if (drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent); -+ struct usb_device *dev = interface_to_usbdev(intf); -+ -+ const int retval = usb_control_msg_recv(dev, 0x80, 0x01, 0xa1, 0x035A, 0x0002, (void*)&rc71l_drvdata->usb_in_buf[0], 32, 250, GFP_KERNEL); -+ -+ if (retval < 0) { -+ hid_err(hdev, "Ally read failed performing control read, error %d\n", retval); -+ goto rc71l_usb_read_err; -+ } -+ -+ const char* b = (const u8*)&rc71l_drvdata->usb_in_buf[0]; -+ hid_info(hdev, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", -+ b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], -+ b[10], b[11], b[12], b[13], b[14], b[15], b[16], b[17], b[18], b[19], -+ b[20], b[21], b[22], b[23], b[24], b[25], b[26], b[27], b[28], b[29], -+ b[30], b[31] -+ ); -+ -+rc71l_usb_read_err: -+ return retval; -+} -+ -+/** -+ * This function writes a command over the USB device on the ROG Ally. -+ * The ROG Ally accepts 64-bytes long messages as commands: as such at most 64-bytes will be sent -+ * and unused bytes will be zeroed out. -+ * -+ * PRE: -+ * - rc71l internal mutex MUST be locked -+ */ -+static int rc71l_usb_write(struct hid_device * hdev, const void* buf, size_t buf_sz) { -+ struct asus_drvdata *drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev); -+ -+ if (drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent); -+ struct usb_device *dev = interface_to_usbdev(intf); -+ -+ if (buf_sz > 64) { -+ hid_err(hdev, "Bug in the kernel: cannot write more than 64-bytes\n"); -+ -+ return -EINVAL; -+ } -+ -+ // make sure bytes in excess will be zeroes and copy the user-provided buffer -+ memset((void*)&rc71l_drvdata->usb_out_buf[0], 0, 64); -+ memcpy((void*)&rc71l_drvdata->usb_out_buf[0], buf, buf_sz); -+ -+ /* send the data out the bulk port */ -+ const int retval = usb_control_msg(dev, rc71l_drvdata->usb_pipe, 0x09, 0x21, 0x035A, 0x0002, (void*)&rc71l_drvdata->usb_out_buf[0], 64, 250); -+ if (retval < 0) { -+ hid_err(hdev, -+ "Failed submitting control write error %d\n", retval); -+ -+ goto rc71l_usb_write_err; -+ } -+ -+rc71l_usb_write_err: -+ return retval < 0 ? retval : 0; -+} -+ -+static int rc71l_mode_change(struct hid_device * hdev, enum rc71l_controller_mode new_mode) { -+ struct asus_drvdata *drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev); -+ if (drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ int ret = 0; -+ -+ size_t packets_group = 0; -+ switch (new_mode) { -+ case rc71l_gamepad_mode: -+ packets_group = 0; -+ break; -+ -+ case rc71l_mouse_mode: -+ packets_group = 1; -+ break; -+ -+ case rc71l_macro_mode: -+ packets_group = 2; -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ for (int i = 0; (i < 23) && (ret == 0); ++i) { -+ ret = rc71l_usb_write(hdev, (const void*)&rc71l_mode_switch_commands[packets_group][i][0], 64); -+ if (ret > 0) { -+ hid_err(hdev, "Ally controller mode switch %d/23 error %d\n", i, ret); -+ goto rc71l_mode_change_err; -+ } -+ } -+ -+ // controller mode has been switched successfully: change that in driver data -+ if (ret == 0) { -+ hid_info(hdev, "ROG Ally [RC71L] controller mode switch succeeded\n"); -+ rc71l_drvdata->mode = new_mode; -+ } -+ -+rc71l_mode_change_err: -+ return ret; -+} -+ -+static ssize_t __maybe_unused mode_show(struct device *raw_dev, struct device_attribute *attr, char *buf) { -+ struct platform_device *const pdev = to_platform_device(raw_dev); -+ struct hid_device *const hdev = platform_get_drvdata(pdev); -+ if (hdev == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_drvdata *const drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev); -+ if (drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_rc71l *const rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ mutex_lock(&rc71l_drvdata->mutex); -+ int current_mode = 0; -+ switch (rc71l_drvdata->mode) { -+ case rc71l_gamepad_mode: -+ current_mode = 0; -+ break; -+ -+ case rc71l_mouse_mode: -+ current_mode = 1; -+ break; -+ -+ case rc71l_macro_mode: -+ current_mode = 2; -+ break; -+ -+ default: -+ mutex_unlock(&rc71l_drvdata->mutex); -+ return -EINVAL; -+ } -+ mutex_unlock(&rc71l_drvdata->mutex); -+ -+ return sysfs_emit(buf, "%d\n", (int)current_mode); -+} -+ -+static ssize_t __maybe_unused mode_store(struct device *raw_dev, struct device_attribute *attr, const char *buf, size_t count) { -+ struct platform_device *const pdev = to_platform_device(raw_dev); -+ struct hid_device *const hdev = platform_get_drvdata(pdev); -+ if (hdev == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_drvdata *const drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev); -+ if (drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ struct asus_rc71l *const rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata == NULL) { -+ return -EINVAL; -+ } -+ -+ int res = -EINVAL; -+ int val = -EINVAL; -+ res = kstrtoint(buf, 0, &val); -+ if (res) -+ return res; -+ -+ switch (val) { -+ case 0: -+ mutex_lock(&rc71l_drvdata->mutex); -+ res = rc71l_mode_change(hdev, rc71l_gamepad_mode); -+ mutex_unlock(&rc71l_drvdata->mutex); -+ break; -+ -+ case 1: -+ mutex_lock(&rc71l_drvdata->mutex); -+ res = rc71l_mode_change(hdev, rc71l_mouse_mode); -+ mutex_unlock(&rc71l_drvdata->mutex); -+ break; -+ -+ case 2: -+ mutex_lock(&rc71l_drvdata->mutex); -+ res = rc71l_mode_change(hdev, rc71l_macro_mode); -+ mutex_unlock(&rc71l_drvdata->mutex); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ hid_err(hdev, "Ally controller mode switch to %d mode op result: %d\n", val, res); -+ -+ return count; -+} -+ -+DEVICE_ATTR_RW(mode); -+ -+static struct attribute *rc71l_input_attrs[] = { -+ &dev_attr_mode.attr, -+ NULL -+}; -+ -+static const struct attribute_group mcu_attr_group = { -+ .name = "input", -+ .attrs = rc71l_input_attrs, -+}; -+ - static void asus_report_contact_down(struct asus_drvdata *drvdat, - int toolType, u8 *data) - { -@@ -386,7 +1076,7 @@ static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size - unsigned char *dmabuf; - int ret; - -- dmabuf = kmemdup(buf, buf_size, GFP_KERNEL); -+ dmabuf = kmemdup((const void*)buf, buf_size, GFP_KERNEL); - if (!dmabuf) - return -ENOMEM; - -@@ -897,6 +1587,10 @@ static int asus_input_mapping(struct hid_device *hdev, - case 0xb3: asus_map_key_clear(KEY_PROG3); break; /* Fn+Left next aura */ - case 0x6a: asus_map_key_clear(KEY_F13); break; /* Screenpad toggle */ - case 0x4b: asus_map_key_clear(KEY_F14); break; /* Arrows/Pg-Up/Dn toggle */ -+ case 0xa5: asus_map_key_clear(KEY_F15); break; /* ROG Ally left back */ -+ case 0xa6: asus_map_key_clear(KEY_F16); break; /* ROG Ally QAM button */ -+ case 0xa7: asus_map_key_clear(KEY_F17); break; /* ROG Ally ROG long-press */ -+ case 0xa8: asus_map_key_clear(KEY_F18); break; /* ROG Ally ROG long-press-release */ - - - default: -@@ -1000,16 +1694,108 @@ static int asus_start_multitouch(struct hid_device *hdev) - return 0; - } - -+#ifdef CONFIG_PM - static int __maybe_unused asus_reset_resume(struct hid_device *hdev) - { -+ int ret = 0; -+ - struct asus_drvdata *drvdata = hid_get_drvdata(hdev); -+ if (drvdata != NULL) { -+ return -EINVAL; -+ } - - if (drvdata->tp) - return asus_start_multitouch(hdev); - -- return 0; -+ return ret; - } - -+static int __maybe_unused asus_resume(struct hid_device *hdev) -+{ -+ int ret = 0; -+ struct asus_drvdata *drvdata = hid_get_drvdata(hdev); -+/* -+ // Controller mode is kept on device sleep -+ if (dmi_match(DMI_PRODUCT_NAME, "ROG Ally RC71L_RC71L")) -+ { -+ // Apply the joystick mode switch -+ ret = rog_ally_controller_mode_change(hdev, game_mode); -+ -+ hid_err(hdev, "Asus wake, restore controller %d\n", ret); -+ } -+*/ -+ -+ struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata != NULL) { -+ mutex_lock(&rc71l_drvdata->mutex); -+ ret = rc71l_mode_change(hdev, rc71l_drvdata->mode); -+ mutex_unlock(&rc71l_drvdata->mutex); -+ -+ if (ret < 0) { -+ hid_err(hdev, "ROG Ally [RC71L] failed to reset controller mode: %d\n", ret); -+ goto asus_resume_err; -+ } -+ } -+ -+ -+ /* -+ * On some devices such as the Asus RC71L leds are reset to default after sleep and sysfs attribute will report -+ * something that won't be true: resetting the user-provided value is necessary to maintain coherency and avoid -+ * flashing full brightness leds in face of the user. -+ */ -+ if (drvdata->kbd_backlight) { -+ const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4, drvdata->kbd_backlight->cdev.brightness }; -+ ret = asus_kbd_set_report(hdev, buf, sizeof(buf)); -+ if (ret < 0) { -+ hid_err(hdev, "Asus failed to set keyboard backlight: %d\n", ret); -+ goto asus_resume_err; -+ } -+ -+ hid_err(hdev, "Asus ROG Ally asus_reset_resume, leds reset: %d at brightness %d\n", ret, (int)drvdata->kbd_backlight->cdev.brightness); -+ } -+ -+ asus_resume_err: -+ return ret; -+} -+ -+static int __maybe_unused asus_suspend(struct hid_device *hdev, struct pm_message) -+ { -+ struct asus_drvdata *drvdata = hid_get_drvdata(hdev); -+ -+ if (drvdata == NULL) { -+ return 0; -+ } -+ -+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent); -+ struct usb_device *dev = interface_to_usbdev(intf); -+ -+ int ret = 0; -+ -+ if (dmi_match(DMI_PRODUCT_NAME, "ROG Ally RC71L_RC71L")) { -+ // Send the USB ABORT_PIPE command -+ int result = usb_control_msg( -+ dev, usb_sndctrlpipe(dev, 0), USB_REQ_SET_FEATURE, -+ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT, -+ USB_ENDPOINT_HALT, 0x02, NULL, 0, 1000); -+ -+ if (result < 0) { -+ printk("USB ABORT_PIPE failed: %d\n", result); -+ } else { -+ printk("USB ABORT_PIPE succeeded\n"); -+ } -+ } -+ -+ struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata != NULL) { -+ mutex_lock(&rc71l_drvdata->mutex); -+ // TODO: send ABORT_PIPE here -+ mutex_unlock(&rc71l_drvdata->mutex); -+ } -+ -+ return ret; -+} -+#endif -+ - static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) - { - int ret; -@@ -1021,6 +1807,8 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) - return -ENOMEM; - } - -+ drvdata->rc71l_data = NULL; -+ - hid_set_drvdata(hdev, drvdata); - - drvdata->quirks = id->driver_data; -@@ -1109,6 +1897,51 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) - goto err_stop_hw; - } - -+ if ((dmi_match(DMI_PRODUCT_NAME, "ROG Ally RC71L_RC71L")) && (hdev->rsize > 9) && (hdev->rdesc[7] == 0x85) && (hdev->rdesc[8] == 0x5a)) -+ { -+ drvdata->rc71l_data = devm_kzalloc(&hdev->dev, sizeof(*drvdata->rc71l_data), GFP_KERNEL); -+ if (drvdata->rc71l_data == NULL) { -+ hid_err(hdev, "Can't alloc Asus ROG Ally [RC71L] descriptor\n"); -+ ret = -ENOMEM; -+ goto err_stop_hw; -+ } -+ -+ mutex_init(&drvdata->rc71l_data->mutex); -+ -+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent); -+ struct usb_device *dev = interface_to_usbdev(intf); -+ -+ // default controller mode -+ drvdata->rc71l_data->mode = rc71l_gamepad_mode; -+ -+ // usb_device and endpoint -+ drvdata->rc71l_data->usb_pipe = usb_sndctrlpipe(dev, 0); -+ -+ // apply the default controller mode -+ mutex_lock(&drvdata->rc71l_data->mutex); -+ ret = rc71l_mode_change(hdev, drvdata->rc71l_data->mode); -+ mutex_unlock(&drvdata->rc71l_data->mutex); -+ -+ if (ret < 0) { -+ hid_err(hdev, "Asus ROG Ally [RC71L] error setting the default controller mode: %d\n", ret); -+ goto err_stop_hw; -+ } -+ -+ drvdata->rc71l_data->mcu_dev = platform_device_register_simple("asus-mcu", 0, NULL, 0); -+ if (IS_ERR(drvdata->rc71l_data->mcu_dev)) { -+ hid_err(hdev, "Error registering MCU platform device: %ld\n", PTR_ERR(drvdata->rc71l_data->mcu_dev)); -+ goto err_stop_hw; -+ } -+ -+ platform_set_drvdata(drvdata->rc71l_data->mcu_dev, hdev); -+ -+ ret = devm_device_add_group(&drvdata->rc71l_data->mcu_dev->dev, &mcu_attr_group); -+ if (ret != 0) { -+ platform_device_unregister(drvdata->rc71l_data->mcu_dev); -+ goto err_stop_hw; -+ } -+ } -+ - if (drvdata->tp) { - drvdata->input->name = "Asus TouchPad"; - } else { -@@ -1140,6 +1973,16 @@ static void asus_remove(struct hid_device *hdev) - cancel_work_sync(&drvdata->kbd_backlight->work); - } - -+ struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data; -+ if (rc71l_drvdata != NULL) { -+ platform_device_unregister(rc71l_drvdata->mcu_dev); -+ -+ mutex_lock(&rc71l_drvdata->mutex); -+ platform_device_unregister(rc71l_drvdata->mcu_dev); -+ // TODO: perform cleanup operations -+ mutex_unlock(&rc71l_drvdata->mutex); -+ } -+ - hid_hw_stop(hdev); - } - -@@ -1258,6 +2101,9 @@ static const struct hid_device_id asus_devices[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, - USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3), - QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, -+ { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, -+ USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), -+ QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, - { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, - USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), - QUIRK_ROG_CLAYMORE_II_KEYBOARD }, -@@ -1294,6 +2140,8 @@ static struct hid_driver asus_driver = { - .input_configured = asus_input_configured, - #ifdef CONFIG_PM - .reset_resume = asus_reset_resume, -+ .resume = asus_resume, -+ .suspend = asus_suspend, - #endif - .event = asus_event, - .raw_event = asus_raw_event -diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h -index d10ccfa17..213492ee8 100644 ---- a/drivers/hid/hid-ids.h -+++ b/drivers/hid/hid-ids.h -@@ -208,6 +208,7 @@ - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD 0x1866 - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2 0x19b6 - #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 -+#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe - #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b - #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 - --- -2.43.0 - diff --git a/patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch b/patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch deleted file mode 100644 index 24f6807..0000000 --- a/patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jan200101 -Date: Wed, 8 Mar 2023 20:51:16 +0100 -Subject: [PATCH] drm/i915: add kernel parameter to disable async page flipping - -Signed-off-by: Jan200101 ---- - drivers/gpu/drm/i915/display/intel_display_driver.c | 2 +- - drivers/gpu/drm/i915/i915_params.c | 4 ++++ - drivers/gpu/drm/i915/i915_params.h | 3 ++- - 3 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c -index ade744cccfea..119be26b5641 100644 ---- a/drivers/gpu/drm/i915/i915_params.c -+++ b/drivers/gpu/drm/i915/i915_params.c -@@ -222,6 +222,10 @@ i915_param_named_unsafe(lmem_size, uint, 0400, - i915_param_named_unsafe(lmem_bar_size, uint, 0400, - "Set the lmem bar size(in MiB)."); - -+i915_param_named_unsafe(disable_async_page_flip, bool, 0400, -+ "Disable async page flipping" -+ "(0=disabled [default], 1=enabled)"); -+ - static void _param_print_bool(struct drm_printer *p, const char *name, - bool val) - { -diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h -index 3f51f90145b6..37f25ec1b874 100644 ---- a/drivers/gpu/drm/i915/i915_params.h -+++ b/drivers/gpu/drm/i915/i915_params.h -@@ -85,7 +85,8 @@ struct drm_printer; - param(bool, verbose_state_checks, true, 0) \ - param(bool, nuclear_pageflip, false, 0400) \ - param(bool, enable_dp_mst, true, 0600) \ -- param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) -+ param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) \ -+ param(bool, disable_async_page_flip, false, 0400) - - #define MEMBER(T, member, ...) T member; - struct i915_params { -diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c -index b909814ae..918b8b589 100644 ---- a/drivers/gpu/drm/i915/display/intel_display_driver.c -+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c -@@ -121,7 +121,7 @@ static void intel_mode_config_init(struct drm_i915_private *i915) - mode_config->funcs = &intel_mode_funcs; - mode_config->helper_private = &intel_mode_config_funcs; - -- mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915); -+ mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915) && !i915->params.disable_async_page_flip; - - /* - * Maximum framebuffer dimensions, chosen to match diff --git a/patches/nobara/OpenRGB.patch b/patches/nobara/OpenRGB.patch deleted file mode 100644 index 3ddf50e..0000000 --- a/patches/nobara/OpenRGB.patch +++ /dev/null @@ -1,703 +0,0 @@ -diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig -index 2ddca08f8a76..72647850f08e 100644 ---- a/drivers/i2c/busses/Kconfig -+++ b/drivers/i2c/busses/Kconfig -@@ -217,6 +217,15 @@ config I2C_CHT_WC - combined with a FUSB302 Type-C port-controller as such it is advised - to also select CONFIG_TYPEC_FUSB302=m. - -+config I2C_NCT6775 -+ tristate "Nuvoton NCT6775 and compatible SMBus controller" -+ help -+ If you say yes to this option, support will be included for the -+ Nuvoton NCT6775 and compatible SMBus controllers. -+ -+ This driver can also be built as a module. If so, the module -+ will be called i2c-nct6775. -+ - config I2C_NFORCE2 - tristate "Nvidia nForce2, nForce3 and nForce4" - depends on PCI -diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile -index 25d60889713c..3c2a9b237ac6 100644 ---- a/drivers/i2c/busses/Makefile -+++ b/drivers/i2c/busses/Makefile -@@ -17,6 +17,7 @@ obj-$(CONFIG_I2C_CHT_WC) += i2c-cht-wc.o - obj-$(CONFIG_I2C_I801) += i2c-i801.o - obj-$(CONFIG_I2C_ISCH) += i2c-isch.o - obj-$(CONFIG_I2C_ISMT) += i2c-ismt.o -+obj-$(CONFIG_I2C_NCT6775) += i2c-nct6775.o - obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o - obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o - obj-$(CONFIG_I2C_NVIDIA_GPU) += i2c-nvidia-gpu.o -diff --git a/drivers/i2c/busses/i2c-nct6775.c b/drivers/i2c/busses/i2c-nct6775.c -new file mode 100644 -index 000000000000..0462f0952043 ---- /dev/null -+++ b/drivers/i2c/busses/i2c-nct6775.c -@@ -0,0 +1,647 @@ -+/* -+ * i2c-nct6775 - Driver for the SMBus master functionality of -+ * Nuvoton NCT677x Super-I/O chips -+ * -+ * Copyright (C) 2019 Adam Honse -+ * -+ * Derived from nct6775 hwmon driver -+ * Copyright (C) 2012 Guenter Roeck -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DRVNAME "i2c-nct6775" -+ -+/* Nuvoton SMBus address offsets */ -+#define SMBHSTDAT (0 + nuvoton_nct6793d_smba) -+#define SMBBLKSZ (1 + nuvoton_nct6793d_smba) -+#define SMBHSTCMD (2 + nuvoton_nct6793d_smba) -+#define SMBHSTIDX (3 + nuvoton_nct6793d_smba) //Index field is the Command field on other controllers -+#define SMBHSTCTL (4 + nuvoton_nct6793d_smba) -+#define SMBHSTADD (5 + nuvoton_nct6793d_smba) -+#define SMBHSTERR (9 + nuvoton_nct6793d_smba) -+#define SMBHSTSTS (0xE + nuvoton_nct6793d_smba) -+ -+/* Command register */ -+#define NCT6793D_READ_BYTE 0 -+#define NCT6793D_READ_WORD 1 -+#define NCT6793D_READ_BLOCK 2 -+#define NCT6793D_BLOCK_WRITE_READ_PROC_CALL 3 -+#define NCT6793D_PROC_CALL 4 -+#define NCT6793D_WRITE_BYTE 8 -+#define NCT6793D_WRITE_WORD 9 -+#define NCT6793D_WRITE_BLOCK 10 -+ -+/* Control register */ -+#define NCT6793D_MANUAL_START 128 -+#define NCT6793D_SOFT_RESET 64 -+ -+/* Error register */ -+#define NCT6793D_NO_ACK 32 -+ -+/* Status register */ -+#define NCT6793D_FIFO_EMPTY 1 -+#define NCT6793D_FIFO_FULL 2 -+#define NCT6793D_MANUAL_ACTIVE 4 -+ -+#define NCT6775_LD_SMBUS 0x0B -+ -+/* Other settings */ -+#define MAX_RETRIES 400 -+ -+enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793, -+ nct6795, nct6796, nct6798 }; -+ -+struct nct6775_sio_data { -+ int sioreg; -+ enum kinds kind; -+}; -+ -+/* used to set data->name = nct6775_device_names[data->sio_kind] */ -+static const char * const nct6775_device_names[] = { -+ "nct6106", -+ "nct6775", -+ "nct6776", -+ "nct6779", -+ "nct6791", -+ "nct6792", -+ "nct6793", -+ "nct6795", -+ "nct6796", -+ "nct6798", -+}; -+ -+static const char * const nct6775_sio_names[] __initconst = { -+ "NCT6106D", -+ "NCT6775F", -+ "NCT6776D/F", -+ "NCT6779D", -+ "NCT6791D", -+ "NCT6792D", -+ "NCT6793D", -+ "NCT6795D", -+ "NCT6796D", -+ "NCT6798D", -+}; -+ -+#define SIO_REG_LDSEL 0x07 /* Logical device select */ -+#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ -+#define SIO_REG_SMBA 0x62 /* SMBus base address register */ -+ -+#define SIO_NCT6106_ID 0xc450 -+#define SIO_NCT6775_ID 0xb470 -+#define SIO_NCT6776_ID 0xc330 -+#define SIO_NCT6779_ID 0xc560 -+#define SIO_NCT6791_ID 0xc800 -+#define SIO_NCT6792_ID 0xc910 -+#define SIO_NCT6793_ID 0xd120 -+#define SIO_NCT6795_ID 0xd350 -+#define SIO_NCT6796_ID 0xd420 -+#define SIO_NCT6798_ID 0xd428 -+#define SIO_ID_MASK 0xFFF0 -+ -+static inline void -+superio_outb(int ioreg, int reg, int val) -+{ -+ outb(reg, ioreg); -+ outb(val, ioreg + 1); -+} -+ -+static inline int -+superio_inb(int ioreg, int reg) -+{ -+ outb(reg, ioreg); -+ return inb(ioreg + 1); -+} -+ -+static inline void -+superio_select(int ioreg, int ld) -+{ -+ outb(SIO_REG_LDSEL, ioreg); -+ outb(ld, ioreg + 1); -+} -+ -+static inline int -+superio_enter(int ioreg) -+{ -+ /* -+ * Try to reserve and for exclusive access. -+ */ -+ if (!request_muxed_region(ioreg, 2, DRVNAME)) -+ return -EBUSY; -+ -+ outb(0x87, ioreg); -+ outb(0x87, ioreg); -+ -+ return 0; -+} -+ -+static inline void -+superio_exit(int ioreg) -+{ -+ outb(0xaa, ioreg); -+ outb(0x02, ioreg); -+ outb(0x02, ioreg + 1); -+ release_region(ioreg, 2); -+} -+ -+/* -+ * ISA constants -+ */ -+ -+#define IOREGION_ALIGNMENT (~7) -+#define IOREGION_LENGTH 2 -+#define ADDR_REG_OFFSET 0 -+#define DATA_REG_OFFSET 1 -+ -+#define NCT6775_REG_BANK 0x4E -+#define NCT6775_REG_CONFIG 0x40 -+ -+static struct i2c_adapter *nct6775_adapter; -+ -+struct i2c_nct6775_adapdata { -+ unsigned short smba; -+}; -+ -+/* Return negative errno on error. */ -+static s32 nct6775_access(struct i2c_adapter * adap, u16 addr, -+ unsigned short flags, char read_write, -+ u8 command, int size, union i2c_smbus_data * data) -+{ -+ struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap); -+ unsigned short nuvoton_nct6793d_smba = adapdata->smba; -+ int i, len, cnt; -+ union i2c_smbus_data tmp_data; -+ int timeout = 0; -+ -+ tmp_data.word = 0; -+ cnt = 0; -+ len = 0; -+ -+ outb_p(NCT6793D_SOFT_RESET, SMBHSTCTL); -+ -+ switch (size) { -+ case I2C_SMBUS_QUICK: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ break; -+ case I2C_SMBUS_BYTE_DATA: -+ tmp_data.byte = data->byte; -+ case I2C_SMBUS_BYTE: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ outb_p(command, SMBHSTIDX); -+ if (read_write == I2C_SMBUS_WRITE) { -+ outb_p(tmp_data.byte, SMBHSTDAT); -+ outb_p(NCT6793D_WRITE_BYTE, SMBHSTCMD); -+ } -+ else { -+ outb_p(NCT6793D_READ_BYTE, SMBHSTCMD); -+ } -+ break; -+ case I2C_SMBUS_WORD_DATA: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ outb_p(command, SMBHSTIDX); -+ if (read_write == I2C_SMBUS_WRITE) { -+ outb_p(data->word & 0xff, SMBHSTDAT); -+ outb_p((data->word & 0xff00) >> 8, SMBHSTDAT); -+ outb_p(NCT6793D_WRITE_WORD, SMBHSTCMD); -+ } -+ else { -+ outb_p(NCT6793D_READ_WORD, SMBHSTCMD); -+ } -+ break; -+ case I2C_SMBUS_BLOCK_DATA: -+ outb_p((addr << 1) | read_write, -+ SMBHSTADD); -+ outb_p(command, SMBHSTIDX); -+ if (read_write == I2C_SMBUS_WRITE) { -+ len = data->block[0]; -+ if (len == 0 || len > I2C_SMBUS_BLOCK_MAX) -+ return -EINVAL; -+ outb_p(len, SMBBLKSZ); -+ -+ cnt = 1; -+ if (len >= 4) { -+ for (i = cnt; i <= 4; i++) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len -= 4; -+ cnt += 4; -+ } -+ else { -+ for (i = cnt; i <= len; i++ ) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len = 0; -+ } -+ -+ outb_p(NCT6793D_WRITE_BLOCK, SMBHSTCMD); -+ } -+ else { -+ return -ENOTSUPP; -+ } -+ break; -+ default: -+ dev_warn(&adap->dev, "Unsupported transaction %d\n", size); -+ return -EOPNOTSUPP; -+ } -+ -+ outb_p(NCT6793D_MANUAL_START, SMBHSTCTL); -+ -+ while ((size == I2C_SMBUS_BLOCK_DATA) && (len > 0)) { -+ if (read_write == I2C_SMBUS_WRITE) { -+ timeout = 0; -+ while ((inb_p(SMBHSTSTS) & NCT6793D_FIFO_EMPTY) == 0) -+ { -+ if(timeout > MAX_RETRIES) -+ { -+ return -ETIMEDOUT; -+ } -+ usleep_range(250, 500); -+ timeout++; -+ } -+ -+ //Load more bytes into FIFO -+ if (len >= 4) { -+ for (i = cnt; i <= (cnt + 4); i++) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len -= 4; -+ cnt += 4; -+ } -+ else { -+ for (i = cnt; i <= (cnt + len); i++) { -+ outb_p(data->block[i], SMBHSTDAT); -+ } -+ -+ len = 0; -+ } -+ } -+ else { -+ return -ENOTSUPP; -+ } -+ -+ } -+ -+ //wait for manual mode to complete -+ timeout = 0; -+ while ((inb_p(SMBHSTSTS) & NCT6793D_MANUAL_ACTIVE) != 0) -+ { -+ if(timeout > MAX_RETRIES) -+ { -+ return -ETIMEDOUT; -+ } -+ usleep_range(250, 500); -+ timeout++; -+ } -+ -+ if ((inb_p(SMBHSTERR) & NCT6793D_NO_ACK) != 0) { -+ return -ENXIO; -+ } -+ else if ((read_write == I2C_SMBUS_WRITE) || (size == I2C_SMBUS_QUICK)) { -+ return 0; -+ } -+ -+ switch (size) { -+ case I2C_SMBUS_QUICK: -+ case I2C_SMBUS_BYTE_DATA: -+ data->byte = inb_p(SMBHSTDAT); -+ break; -+ case I2C_SMBUS_WORD_DATA: -+ data->word = inb_p(SMBHSTDAT) + (inb_p(SMBHSTDAT) << 8); -+ break; -+ } -+ return 0; -+} -+ -+static u32 nct6775_func(struct i2c_adapter *adapter) -+{ -+ return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | -+ I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | -+ I2C_FUNC_SMBUS_BLOCK_DATA; -+} -+ -+static const struct i2c_algorithm smbus_algorithm = { -+ .smbus_xfer = nct6775_access, -+ .functionality = nct6775_func, -+}; -+ -+static int nct6775_add_adapter(unsigned short smba, const char *name, struct i2c_adapter **padap) -+{ -+ struct i2c_adapter *adap; -+ struct i2c_nct6775_adapdata *adapdata; -+ int retval; -+ -+ adap = kzalloc(sizeof(*adap), GFP_KERNEL); -+ if (adap == NULL) { -+ return -ENOMEM; -+ } -+ -+ adap->owner = THIS_MODULE; -+ adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; -+ adap->algo = &smbus_algorithm; -+ -+ adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL); -+ if (adapdata == NULL) { -+ kfree(adap); -+ return -ENOMEM; -+ } -+ -+ adapdata->smba = smba; -+ -+ snprintf(adap->name, sizeof(adap->name), -+ "SMBus NCT67xx adapter%s at %04x", name, smba); -+ -+ i2c_set_adapdata(adap, adapdata); -+ -+ retval = i2c_add_adapter(adap); -+ if (retval) { -+ kfree(adapdata); -+ kfree(adap); -+ return retval; -+ } -+ -+ *padap = adap; -+ return 0; -+} -+ -+static void nct6775_remove_adapter(struct i2c_adapter *adap) -+{ -+ struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap); -+ -+ if (adapdata->smba) { -+ i2c_del_adapter(adap); -+ kfree(adapdata); -+ kfree(adap); -+ } -+} -+ -+//static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume); -+ -+/* -+ * when Super-I/O functions move to a separate file, the Super-I/O -+ * bus will manage the lifetime of the device and this module will only keep -+ * track of the nct6775 driver. But since we use platform_device_alloc(), we -+ * must keep track of the device -+ */ -+static struct platform_device *pdev[2]; -+ -+static int nct6775_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct nct6775_sio_data *sio_data = dev_get_platdata(dev); -+ struct resource *res; -+ -+ res = platform_get_resource(pdev, IORESOURCE_IO, 0); -+ if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH, -+ DRVNAME)) -+ return -EBUSY; -+ -+ switch (sio_data->kind) { -+ case nct6791: -+ case nct6792: -+ case nct6793: -+ case nct6795: -+ case nct6796: -+ case nct6798: -+ nct6775_add_adapter(res->start, "", &nct6775_adapter); -+ break; -+ default: -+ return -ENODEV; -+ } -+ -+ return 0; -+} -+/* -+static void nct6791_enable_io_mapping(int sioaddr) -+{ -+ int val; -+ -+ val = superio_inb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE); -+ if (val & 0x10) { -+ pr_info("Enabling hardware monitor logical device mappings.\n"); -+ superio_outb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE, -+ val & ~0x10); -+ } -+}*/ -+ -+static struct platform_driver i2c_nct6775_driver = { -+ .driver = { -+ .name = DRVNAME, -+// .pm = &nct6775_dev_pm_ops, -+ }, -+ .probe = nct6775_probe, -+}; -+ -+static void __exit i2c_nct6775_exit(void) -+{ -+ int i; -+ -+ if(nct6775_adapter) -+ nct6775_remove_adapter(nct6775_adapter); -+ -+ for (i = 0; i < ARRAY_SIZE(pdev); i++) { -+ if (pdev[i]) -+ platform_device_unregister(pdev[i]); -+ } -+ platform_driver_unregister(&i2c_nct6775_driver); -+} -+ -+/* nct6775_find() looks for a '627 in the Super-I/O config space */ -+static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) -+{ -+ u16 val; -+ int err; -+ int addr; -+ -+ err = superio_enter(sioaddr); -+ if (err) -+ return err; -+ -+ val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) | -+ superio_inb(sioaddr, SIO_REG_DEVID + 1); -+ -+ switch (val & SIO_ID_MASK) { -+ case SIO_NCT6106_ID: -+ sio_data->kind = nct6106; -+ break; -+ case SIO_NCT6775_ID: -+ sio_data->kind = nct6775; -+ break; -+ case SIO_NCT6776_ID: -+ sio_data->kind = nct6776; -+ break; -+ case SIO_NCT6779_ID: -+ sio_data->kind = nct6779; -+ break; -+ case SIO_NCT6791_ID: -+ sio_data->kind = nct6791; -+ break; -+ case SIO_NCT6792_ID: -+ sio_data->kind = nct6792; -+ break; -+ case SIO_NCT6793_ID: -+ sio_data->kind = nct6793; -+ break; -+ case SIO_NCT6795_ID: -+ sio_data->kind = nct6795; -+ break; -+ case SIO_NCT6796_ID: -+ sio_data->kind = nct6796; -+ break; -+ case SIO_NCT6798_ID: -+ sio_data->kind = nct6798; -+ break; -+ default: -+ if (val != 0xffff) -+ pr_debug("unsupported chip ID: 0x%04x\n", val); -+ superio_exit(sioaddr); -+ return -ENODEV; -+ } -+ -+ /* We have a known chip, find the SMBus I/O address */ -+ superio_select(sioaddr, NCT6775_LD_SMBUS); -+ val = (superio_inb(sioaddr, SIO_REG_SMBA) << 8) -+ | superio_inb(sioaddr, SIO_REG_SMBA + 1); -+ addr = val & IOREGION_ALIGNMENT; -+ if (addr == 0) { -+ pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n"); -+ superio_exit(sioaddr); -+ return -ENODEV; -+ } -+ -+ //if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || -+ // sio_data->kind == nct6793 || sio_data->kind == nct6795 || -+ // sio_data->kind == nct6796) -+ // nct6791_enable_io_mapping(sioaddr); -+ -+ superio_exit(sioaddr); -+ pr_info("Found %s or compatible chip at %#x:%#x\n", -+ nct6775_sio_names[sio_data->kind], sioaddr, addr); -+ sio_data->sioreg = sioaddr; -+ -+ return addr; -+} -+ -+static int __init i2c_nct6775_init(void) -+{ -+ int i, err; -+ bool found = false; -+ int address; -+ struct resource res; -+ struct nct6775_sio_data sio_data; -+ int sioaddr[2] = { 0x2e, 0x4e }; -+ -+ err = platform_driver_register(&i2c_nct6775_driver); -+ if (err) -+ return err; -+ -+ /* -+ * initialize sio_data->kind and sio_data->sioreg. -+ * -+ * when Super-I/O functions move to a separate file, the Super-I/O -+ * driver will probe 0x2e and 0x4e and auto-detect the presence of a -+ * nct6775 hardware monitor, and call probe() -+ */ -+ for (i = 0; i < ARRAY_SIZE(pdev); i++) { -+ address = nct6775_find(sioaddr[i], &sio_data); -+ if (address <= 0) -+ continue; -+ -+ found = true; -+ -+ pdev[i] = platform_device_alloc(DRVNAME, address); -+ if (!pdev[i]) { -+ err = -ENOMEM; -+ goto exit_device_unregister; -+ } -+ -+ err = platform_device_add_data(pdev[i], &sio_data, -+ sizeof(struct nct6775_sio_data)); -+ if (err) -+ goto exit_device_put; -+ -+ memset(&res, 0, sizeof(res)); -+ res.name = DRVNAME; -+ res.start = address; -+ res.end = address + IOREGION_LENGTH - 1; -+ res.flags = IORESOURCE_IO; -+ -+ err = acpi_check_resource_conflict(&res); -+ if (err) { -+ platform_device_put(pdev[i]); -+ pdev[i] = NULL; -+ continue; -+ } -+ -+ err = platform_device_add_resources(pdev[i], &res, 1); -+ if (err) -+ goto exit_device_put; -+ -+ /* platform_device_add calls probe() */ -+ err = platform_device_add(pdev[i]); -+ if (err) -+ goto exit_device_put; -+ } -+ if (!found) { -+ err = -ENODEV; -+ goto exit_unregister; -+ } -+ -+ return 0; -+ -+exit_device_put: -+ platform_device_put(pdev[i]); -+exit_device_unregister: -+ while (--i >= 0) { -+ if (pdev[i]) -+ platform_device_unregister(pdev[i]); -+ } -+exit_unregister: -+ platform_driver_unregister(&i2c_nct6775_driver); -+ return err; -+} -+ -+MODULE_AUTHOR("Adam Honse "); -+MODULE_DESCRIPTION("SMBus driver for NCT6775F and compatible chips"); -+MODULE_LICENSE("GPL"); -+ -+module_init(i2c_nct6775_init); -+module_exit(i2c_nct6775_exit); -diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c -index 30ded6422e7b..e25ce84c26af 100644 ---- a/drivers/i2c/busses/i2c-piix4.c -+++ b/drivers/i2c/busses/i2c-piix4.c -@@ -467,11 +467,11 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter) - if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */ - usleep_range(2000, 2100); - else -- usleep_range(250, 500); -+ usleep_range(25, 50); - - while ((++timeout < MAX_TIMEOUT) && - ((temp = inb_p(SMBHSTSTS)) & 0x01)) -- usleep_range(250, 500); -+ usleep_range(25, 50); - - /* If the SMBus is still busy, we give up */ - if (timeout == MAX_TIMEOUT) { diff --git a/patches/nobara/amdgpu-si-cik-default.patch b/patches/nobara/amdgpu-si-cik-default.patch deleted file mode 100644 index d2d3178..0000000 --- a/patches/nobara/amdgpu-si-cik-default.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jan200101 -Date: Mon, 27 Nov 2023 09:53:59 +0100 -Subject: [PATCH] drm/amdgpu: enable SI and CIK support by default - -Signed-off-by: Jan200101 ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ---------- - drivers/gpu/drm/radeon/radeon_drv.c | 10 ++++++++++ - 2 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index 81edf66dbea8..5021d03089ff 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -582,13 +582,8 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); - */ - #ifdef CONFIG_DRM_AMDGPU_SI - --#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) --int amdgpu_si_support = 0; --MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); --#else - int amdgpu_si_support = 1; - MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); --#endif - - module_param_named(si_support, amdgpu_si_support, int, 0444); - #endif -@@ -601,13 +596,8 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); - */ - #ifdef CONFIG_DRM_AMDGPU_CIK - --#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) --int amdgpu_cik_support = 0; --MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); --#else - int amdgpu_cik_support = 1; - MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); --#endif - - module_param_named(cik_support, amdgpu_cik_support, int, 0444); - #endif -diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c -index 7bf08164140e..865f186f48c4 100644 ---- a/drivers/gpu/drm/radeon/radeon_drv.c -+++ b/drivers/gpu/drm/radeon/radeon_drv.c -@@ -239,12 +239,22 @@ module_param_named(uvd, radeon_uvd, int, 0444); - MODULE_PARM_DESC(vce, "vce enable/disable vce support (1 = enable, 0 = disable)"); - module_param_named(vce, radeon_vce, int, 0444); - -+#ifdef CONFIG_DRM_AMDGPU_SI -+int radeon_si_support = 0; -+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); -+#else - int radeon_si_support = 1; - MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); -+#endif - module_param_named(si_support, radeon_si_support, int, 0444); - -+#ifdef CONFIG_DRM_AMDGPU_CIK -+int radeon_cik_support = 0; -+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); -+#else - int radeon_cik_support = 1; - MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); -+#endif - module_param_named(cik_support, radeon_cik_support, int, 0444); - - static struct pci_device_id pciidlist[] = { diff --git a/patches/nobara/lenovo-legion-laptop.patch b/patches/nobara/lenovo-legion-laptop.patch deleted file mode 100644 index a2bb5c8..0000000 --- a/patches/nobara/lenovo-legion-laptop.patch +++ /dev/null @@ -1,6143 +0,0 @@ -From 26077d270f462eaf3da592ed047956df3436ed36 Mon Sep 17 00:00:00 2001 -From: John Martens -Date: Fri, 29 Mar 2024 20:18:47 +0000 -Subject: [PATCH] Add legion-laptop v0.0.12 - -Add extra support for Lenovo Legion laptops. ---- - drivers/platform/x86/Kconfig | 10 + - drivers/platform/x86/Makefile | 1 + - drivers/platform/x86/legion-laptop.c | 6089 ++++++++++++++++++++++++++ - 3 files changed, 6100 insertions(+) - create mode 100644 drivers/platform/x86/legion-laptop.c - -diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig -index 49c2c4cd8..b7d70c20e 100644 ---- a/drivers/platform/x86/Kconfig -+++ b/drivers/platform/x86/Kconfig -@@ -643,6 +643,16 @@ config THINKPAD_LMI - To compile this driver as a module, choose M here: the module will - be called think-lmi. - -+config LEGION_LAPTOP -+ tristate "Lenovo Legion Laptop Extras" -+ depends on ACPI -+ depends on ACPI_WMI || ACPI_WMI = n -+ depends on HWMON || HWMON = n -+ select ACPI_PLATFORM_PROFILE -+ help -+ This is a driver for Lenovo Legion laptops and contains drivers for -+ hotkey, fan control, and power mode. -+ - source "drivers/platform/x86/intel/Kconfig" - - config MSI_EC -diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile -index 52dfdf574..5f32dd9df 100644 ---- a/drivers/platform/x86/Makefile -+++ b/drivers/platform/x86/Makefile -@@ -65,6 +65,7 @@ obj-$(CONFIG_LENOVO_YMC) += lenovo-ymc.o - obj-$(CONFIG_SENSORS_HDAPS) += hdaps.o - obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o - obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o -+obj-$(CONFIG_LEGION_LAPTOP) += legion-laptop.o - obj-$(CONFIG_YOGABOOK) += lenovo-yogabook.o - - # Intel -diff --git a/drivers/platform/x86/legion-laptop.c b/drivers/platform/x86/legion-laptop.c -new file mode 100644 -index 000000000..5ec0a518f ---- /dev/null -+++ b/drivers/platform/x86/legion-laptop.c -@@ -0,0 +1,6089 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * legion-laptop.c - Extra Lenovo Legion laptop support, in -+ * particular for fan curve control and power mode. -+ * -+ * Copyright (C) 2022 johnfan -+ * -+ * -+ * This driver might work on other Lenovo Legion models. If you -+ * want to try it you can pass force=1 as argument -+ * to the module which will force it to load even when the DMI -+ * data doesn't match the model AND FIRMWARE. -+ * -+ * Support for other hardware of this model is already partially -+ * provided by the module ideapad-laptop. -+ * -+ * The development page for this driver is located at -+ * https://github.com/johnfanv2/LenovoLegionLinux -+ * -+ * This driver exports the files: -+ * - /sys/kernel/debug/legion/fancurve (ro) -+ * The fan curve stored in the firmware in the form of a -+ * human readable table. -+ * -+ * - /sys/module/legion_laptop/drivers/platform\:legion/PNP0C09\:00/powermode (rw) -+ * 0: balanced mode (white) -+ * 1: performance mode (red) -+ * 2: quiet mode (blue) -+ * ?: custom mode (pink) -+ * -+ * NOTE: Writing to this will load the default fan curve from -+ * the firmware for this mode, so the fan curve might -+ * have to be reconfigured if needed. -+ * -+ * It implements the usual hwmon interface to monitor fan speed and temmperature -+ * and allows to set the fan curve inside the firware. -+ * -+ * - /sys/class/hwmon/X/fan1_input or /sys/class/hwmon/X/fan2_input (ro) -+ * Current fan speed of fan1/fan2. -+ * - /sys/class/hwmon/X/temp1_input (ro) -+ * - /sys/class/hwmon/X/temp2_input (ro) -+ * - /sys/class/hwmon/X/temp3_input (ro) -+ * Temperature (Celsius) of CPU, GPU, and IC used for fan control. -+ * - /sys/class/hwmon/X/pwmY_auto_pointZ_pwm (rw) -+ * PWM (0-255) of the fan at the Y-level in the fan curve -+ * - /sys/class/hwmon/X/pwmY_auto_pointZ_temp (rw) -+ * upper temperature of tempZ (CPU, GPU, or IC) at the Y-level in the fan curve -+ * - /sys/class/hwmon/X/pwmY_auto_pointZ_temp_hyst (rw) -+ * hysteris (CPU, GPU, or IC) at the Y-level in the fan curve. The lower -+ * temperatue of the level is the upper temperature minus the hysteris -+ * -+ * -+ * Credits for reverse engineering the firmware to: -+ * - David Woodhouse: heavily inspired by lenovo_laptop.c -+ * - Luke Cama: Windows version "LegionFanControl" -+ * - SmokelessCPU: reverse engineering of custom registers in EC -+ * and commincation method with EC via ports -+ * - 0x1F9F1: additional reverse engineering for complete fan curve -+ */ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("johnfan"); -+MODULE_DESCRIPTION("Lenovo Legion laptop extras"); -+ -+static bool force; -+module_param(force, bool, 0440); -+MODULE_PARM_DESC( -+ force, -+ "Force loading this module even if model or BIOS does not match."); -+ -+static bool ec_readonly; -+module_param(ec_readonly, bool, 0440); -+MODULE_PARM_DESC( -+ ec_readonly, -+ "Only read from embedded controller but do not write or change settings."); -+ -+static bool enable_platformprofile = true; -+module_param(enable_platformprofile, bool, 0440); -+MODULE_PARM_DESC( -+ enable_platformprofile, -+ "Enable the platform profile sysfs API to read and write the power mode."); -+ -+#define LEGIONFEATURES \ -+ "fancurve powermode platformprofile platformprofilenotify minifancurve" -+ -+//Size of fancurve stored in embedded controller -+#define MAXFANCURVESIZE 10 -+ -+#define LEGION_DRVR_SHORTNAME "legion" -+#define LEGION_HWMON_NAME LEGION_DRVR_SHORTNAME "_hwmon" -+ -+struct legion_private; -+ -+/* =============================== */ -+/* Embedded Controller Description */ -+/* =============================== */ -+ -+/* The configuration and registers to access the embedded controller -+ * depending on different the version of the software on the -+ * embedded controller or and the BIOS/UEFI firmware. -+ * -+ * To control fan curve in the embedded controller (EC) one has to -+ * write to its "RAM". There are different possibilities: -+ * - EC RAM is memory mapped (write to it with ioremap) -+ * - access EC RAM via ported mapped IO (outb/inb) -+ * - access EC RAM via ACPI methods. It is only possible to write -+ * to part of it (first 0xFF bytes?) -+ * -+ * In later models the firmware directly exposes ACPI methods to -+ * set the fan curve directly, without writing to EC RAM. This -+ * is done inside the ACPI method. -+ */ -+ -+/** -+ * Offsets for interesting values inside the EC RAM (0 = start of -+ * EC RAM) These might change depending on the software inside of -+ * the EC, which can be updated by a BIOS update from Lenovo. -+ */ -+// TODO: same order as in initialization -+struct ec_register_offsets { -+ // Super I/O Configuration Registers -+ // 7.15 General Control (GCTRL) -+ // General Control (GCTRL) -+ // (see EC Interface Registers and 6.2 Plug and Play Configuration (PNPCFG)) in datasheet -+ // note: these are in two places saved -+ // in EC Interface Registers and in super io configuration registers -+ // Chip ID -+ u16 ECHIPID1; -+ u16 ECHIPID2; -+ // Chip Version -+ u16 ECHIPVER; -+ u16 ECDEBUG; -+ -+ // Lenovo Custom OEM extension -+ // Firmware of ITE can be extended by -+ // custom program using its own "variables" -+ // These are the offsets to these "variables" -+ u16 EXT_FAN_CUR_POINT; -+ u16 EXT_FAN_POINTS_SIZE; -+ u16 EXT_FAN1_BASE; -+ u16 EXT_FAN2_BASE; -+ u16 EXT_FAN_ACC_BASE; -+ u16 EXT_FAN_DEC_BASE; -+ u16 EXT_CPU_TEMP; -+ u16 EXT_CPU_TEMP_HYST; -+ u16 EXT_GPU_TEMP; -+ u16 EXT_GPU_TEMP_HYST; -+ u16 EXT_VRM_TEMP; -+ u16 EXT_VRM_TEMP_HYST; -+ u16 EXT_FAN1_RPM_LSB; -+ u16 EXT_FAN1_RPM_MSB; -+ u16 EXT_FAN2_RPM_LSB; -+ u16 EXT_FAN2_RPM_MSB; -+ u16 EXT_FAN1_TARGET_RPM; -+ u16 EXT_FAN2_TARGET_RPM; -+ u16 EXT_POWERMODE; -+ u16 EXT_MINIFANCURVE_ON_COOL; -+ // values -+ // 0x04: enable mini fan curve if left for too long on cool level -+ // - this might be due to potential temp failure -+ // - or just because of really cool temps -+ // 0xA0: disable it -+ u16 EXT_LOCKFANCONTROLLER; -+ u16 EXT_MAXIMUMFANSPEED; -+ u16 EXT_WHITE_KEYBOARD_BACKLIGHT; -+ u16 EXT_IC_TEMP_INPUT; -+ u16 EXT_CPU_TEMP_INPUT; -+ u16 EXT_GPU_TEMP_INPUT; -+}; -+ -+enum access_method { -+ ACCESS_METHOD_NO_ACCESS = 0, -+ ACCESS_METHOD_EC = 1, -+ ACCESS_METHOD_ACPI = 2, -+ ACCESS_METHOD_WMI = 3, -+ ACCESS_METHOD_WMI2 = 4, -+ ACCESS_METHOD_WMI3 = 5, -+ ACCESS_METHOD_EC2 = 10, // ideapad fancurve method -+ ACCESS_METHOD_EC3 = 11, // loq -+}; -+ -+struct model_config { -+ const struct ec_register_offsets *registers; -+ bool check_embedded_controller_id; -+ u16 embedded_controller_id; -+ -+ // first addr in EC we access/scan -+ phys_addr_t memoryio_physical_ec_start; -+ size_t memoryio_size; -+ -+ // TODO: maybe use bitfield -+ bool has_minifancurve; -+ bool has_custom_powermode; -+ enum access_method access_method_powermode; -+ -+ enum access_method access_method_keyboard; -+ enum access_method access_method_temperature; -+ enum access_method access_method_fanspeed; -+ enum access_method access_method_fancurve; -+ enum access_method access_method_fanfullspeed; -+ bool three_state_keyboard; -+ -+ bool acpi_check_dev; -+ -+ phys_addr_t ramio_physical_start; -+ size_t ramio_size; -+}; -+ -+/* =================================== */ -+/* Configuration for different models */ -+/* =================================== */ -+ -+// Idea by SmokelesssCPU (modified) -+// - all default names and register addresses are supported by datasheet -+// - register addresses for custom firmware by SmokelesssCPU -+static const struct ec_register_offsets ec_register_offsets_v0 = { -+ .ECHIPID1 = 0x2000, -+ .ECHIPID2 = 0x2001, -+ .ECHIPVER = 0x2002, -+ .ECDEBUG = 0x2003, -+ .EXT_FAN_CUR_POINT = 0xC534, -+ .EXT_FAN_POINTS_SIZE = 0xC535, -+ .EXT_FAN1_BASE = 0xC540, -+ .EXT_FAN2_BASE = 0xC550, -+ .EXT_FAN_ACC_BASE = 0xC560, -+ .EXT_FAN_DEC_BASE = 0xC570, -+ .EXT_CPU_TEMP = 0xC580, -+ .EXT_CPU_TEMP_HYST = 0xC590, -+ .EXT_GPU_TEMP = 0xC5A0, -+ .EXT_GPU_TEMP_HYST = 0xC5B0, -+ .EXT_VRM_TEMP = 0xC5C0, -+ .EXT_VRM_TEMP_HYST = 0xC5D0, -+ .EXT_FAN1_RPM_LSB = 0xC5E0, -+ .EXT_FAN1_RPM_MSB = 0xC5E1, -+ .EXT_FAN2_RPM_LSB = 0xC5E2, -+ .EXT_FAN2_RPM_MSB = 0xC5E3, -+ .EXT_MINIFANCURVE_ON_COOL = 0xC536, -+ .EXT_LOCKFANCONTROLLER = 0xc4AB, -+ .EXT_CPU_TEMP_INPUT = 0xc538, -+ .EXT_GPU_TEMP_INPUT = 0xc539, -+ .EXT_IC_TEMP_INPUT = 0xC5E8, -+ .EXT_POWERMODE = 0xc420, -+ .EXT_FAN1_TARGET_RPM = 0xc600, -+ .EXT_FAN2_TARGET_RPM = 0xc601, -+ .EXT_MAXIMUMFANSPEED = 0xBD, -+ .EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400) -+}; -+ -+static const struct ec_register_offsets ec_register_offsets_v1 = { -+ .ECHIPID1 = 0x2000, -+ .ECHIPID2 = 0x2001, -+ .ECHIPVER = 0x2002, -+ .ECDEBUG = 0x2003, -+ .EXT_FAN_CUR_POINT = 0xC534, -+ .EXT_FAN_POINTS_SIZE = 0xC535, -+ .EXT_FAN1_BASE = 0xC540, -+ .EXT_FAN2_BASE = 0xC550, -+ .EXT_FAN_ACC_BASE = 0xC560, -+ .EXT_FAN_DEC_BASE = 0xC570, -+ .EXT_CPU_TEMP = 0xC580, -+ .EXT_CPU_TEMP_HYST = 0xC590, -+ .EXT_GPU_TEMP = 0xC5A0, -+ .EXT_GPU_TEMP_HYST = 0xC5B0, -+ .EXT_VRM_TEMP = 0xC5C0, -+ .EXT_VRM_TEMP_HYST = 0xC5D0, -+ .EXT_FAN1_RPM_LSB = 0xC5E0, -+ .EXT_FAN1_RPM_MSB = 0xC5E1, -+ .EXT_FAN2_RPM_LSB = 0xC5E2, -+ .EXT_FAN2_RPM_MSB = 0xC5E3, -+ .EXT_MINIFANCURVE_ON_COOL = 0xC536, -+ .EXT_LOCKFANCONTROLLER = 0xc4AB, -+ .EXT_CPU_TEMP_INPUT = 0xc538, -+ .EXT_GPU_TEMP_INPUT = 0xc539, -+ .EXT_IC_TEMP_INPUT = 0xC5E8, -+ .EXT_POWERMODE = 0xc41D, -+ .EXT_FAN1_TARGET_RPM = 0xc600, -+ .EXT_FAN2_TARGET_RPM = 0xc601, -+ .EXT_MAXIMUMFANSPEED = 0xBD, -+ .EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400) -+}; -+ -+static const struct ec_register_offsets ec_register_offsets_ideapad_v0 = { -+ .ECHIPID1 = 0x2000, -+ .ECHIPID2 = 0x2001, -+ .ECHIPVER = 0x2002, -+ .ECDEBUG = 0x2003, -+ .EXT_FAN_CUR_POINT = 0xC5a0, // not found yet -+ .EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0 -+ .EXT_FAN1_BASE = 0xC5a0, -+ .EXT_FAN2_BASE = 0xC5a8, -+ .EXT_FAN_ACC_BASE = 0xC5a0, // not found yet -+ .EXT_FAN_DEC_BASE = 0xC5a0, // not found yet -+ .EXT_CPU_TEMP = 0xC550, // and repeated after 8 bytes -+ .EXT_CPU_TEMP_HYST = 0xC590, // and repeated after 8 bytes -+ .EXT_GPU_TEMP = 0xC5C0, // and repeated after 8 bytes -+ .EXT_GPU_TEMP_HYST = 0xC5D0, // and repeated after 8 bytes -+ .EXT_VRM_TEMP = 0xC5a0, // does not exists or not found -+ .EXT_VRM_TEMP_HYST = 0xC5a0, // does not exists ot not found yet -+ .EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet -+ .EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet -+ .EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet -+ .EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet -+ .EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // does not exists or not found -+ .EXT_LOCKFANCONTROLLER = 0xC5a0, // does not exists or not found -+ .EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_POWERMODE = 0xC5a0, // not found yet -+ .EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet -+ .EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet -+ .EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet -+ .EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet -+}; -+ -+static const struct ec_register_offsets ec_register_offsets_ideapad_v1 = { -+ .ECHIPID1 = 0x2000, -+ .ECHIPID2 = 0x2001, -+ .ECHIPVER = 0x2002, -+ .ECDEBUG = 0x2003, -+ .EXT_FAN_CUR_POINT = 0xC5a0, // not found yet -+ .EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0 -+ .EXT_FAN1_BASE = 0xC5a0, -+ .EXT_FAN2_BASE = 0xC5a8, -+ .EXT_FAN_ACC_BASE = 0xC5a0, // not found yet -+ .EXT_FAN_DEC_BASE = 0xC5a0, // not found yet -+ .EXT_CPU_TEMP = 0xC550, // and repeated after 8 bytes -+ .EXT_CPU_TEMP_HYST = 0xC590, // and repeated after 8 bytes -+ .EXT_GPU_TEMP = 0xC5C0, // and repeated after 8 bytes -+ .EXT_GPU_TEMP_HYST = 0xC5D0, // and repeated after 8 bytes -+ .EXT_VRM_TEMP = 0xC5a0, // does not exists or not found -+ .EXT_VRM_TEMP_HYST = 0xC5a0, // does not exists ot not found yet -+ .EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet -+ .EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet -+ .EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet -+ .EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet -+ .EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // does not exists or not found -+ .EXT_LOCKFANCONTROLLER = 0xC5a0, // does not exists or not found -+ .EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_POWERMODE = 0xC5a0, // not found yet -+ .EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet -+ .EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet -+ .EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet -+ .EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet -+}; -+ -+static const struct ec_register_offsets ec_register_offsets_loq_v0 = { -+ .ECHIPID1 = 0x2000, -+ .ECHIPID2 = 0x2001, -+ .ECHIPVER = 0x2002, -+ .ECDEBUG = 0x2003, -+ .EXT_FAN_CUR_POINT = 0xC5a0, -+ .EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0 -+ .EXT_FAN1_BASE = 0xC530, -+ .EXT_FAN2_BASE = 0xC530, // same rpm as cpu -+ .EXT_FAN_ACC_BASE = 0xC5a0, // not found yet -+ .EXT_FAN_DEC_BASE = 0xC5a0, // not found yet -+ .EXT_CPU_TEMP = 0xC52F, -+ .EXT_CPU_TEMP_HYST = 0xC5a0, // not found yet -+ .EXT_GPU_TEMP = 0xC531, -+ .EXT_GPU_TEMP_HYST = 0xC5a0, // not found yet -+ .EXT_VRM_TEMP = 0xC5a0, // not found yet -+ .EXT_VRM_TEMP_HYST = 0xC5a0, // not found yet -+ .EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet -+ .EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet -+ .EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet -+ .EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet -+ .EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // not found yet -+ .EXT_LOCKFANCONTROLLER = 0xC5a0, // not found yet -+ .EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet -+ .EXT_POWERMODE = 0xc41D, -+ .EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet -+ .EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet -+ .EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet -+ .EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet -+}; -+ -+static const struct model_config model_v0 = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_j2cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_9vcn = { -+ .registers = &ec_register_offsets_ideapad_v1, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8226, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_WMI, -+ .access_method_fancurve = ACCESS_METHOD_EC2, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_v2022 = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_4gcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8226, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_bvcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8226, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_WMI, -+ .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFC7E0800, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_bhcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8226, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = false, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_ACPI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_ACPI, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFF00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_kwcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x5507, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_m0cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x5507, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_m1cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x5507, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_m2cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_m6cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_k1cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x5263, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_lpcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x5507, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_WMI3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_kfcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_hacn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_k9cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, // or replace 0xC400 by 0x0400 ? -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_eucn = { -+ .registers = &ec_register_offsets_v1, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_fccn = { -+ .registers = &ec_register_offsets_ideapad_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_ACPI, -+ .access_method_fancurve = ACCESS_METHOD_EC2, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_h3cn = { -+ //0xFE0B0800 -+ .registers = &ec_register_offsets_v1, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = false, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ // not implemented (properly) in WMI, RGB conrolled by USB -+ .access_method_keyboard = ACCESS_METHOD_NO_ACCESS, -+ // accessing fan speed is not implemented in ACPI -+ // a variable in the operation region (or not found) -+ // and not per WMI (methods returns constant 0) -+ .access_method_fanspeed = ACCESS_METHOD_NO_ACCESS, -+ .access_method_temperature = ACCESS_METHOD_WMI, -+ .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0800, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_e9cn = { -+ //0xFE0B0800 -+ .registers = &ec_register_offsets_v1, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, //0xFC7E0800 -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = false, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ // not implemented (properly) in WMI, RGB conrolled by USB -+ .access_method_keyboard = ACCESS_METHOD_NO_ACCESS, -+ // accessing fan speed is not implemented in ACPI -+ // a variable in the operation region (or not found) -+ // and not per WMI (methods returns constant 0) -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_WMI, -+ .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFC7E0800, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_8jcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8226, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_WMI, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE00D400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct model_config model_jncn = { -+ .registers = &ec_register_offsets_v1, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = false, -+ .has_custom_powermode = false, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_NO_ACCESS, -+ .access_method_fanspeed = ACCESS_METHOD_WMI, -+ .access_method_temperature = ACCESS_METHOD_WMI, -+ .access_method_fancurve = ACCESS_METHOD_NO_ACCESS, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFC7E0800, -+ .ramio_size = 0x600 -+}; -+ -+// Yoga Model! -+static const struct model_config model_j1cn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+// Yoga Model! -+static const struct model_config model_dmcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = true, -+ .ramio_physical_start = 0xFE700D00, -+ .ramio_size = 0x600 -+}; -+ -+// Yoga Model! -+static const struct model_config model_khcn = { -+ .registers = &ec_register_offsets_v0, -+ .check_embedded_controller_id = false, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_EC, -+ .access_method_keyboard = ACCESS_METHOD_WMI, -+ .access_method_fanspeed = ACCESS_METHOD_EC, -+ .access_method_temperature = ACCESS_METHOD_EC, -+ .access_method_fancurve = ACCESS_METHOD_EC, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+// LOQ Model -+static const struct model_config model_lzcn = { -+ .registers = &ec_register_offsets_loq_v0, -+ .check_embedded_controller_id = true, -+ .embedded_controller_id = 0x8227, -+ .memoryio_physical_ec_start = 0xC400, -+ .memoryio_size = 0x300, -+ .has_minifancurve = true, -+ .has_custom_powermode = true, -+ .access_method_powermode = ACCESS_METHOD_WMI, -+ .access_method_keyboard = ACCESS_METHOD_WMI2, -+ .access_method_fanspeed = ACCESS_METHOD_WMI3, -+ .access_method_temperature = ACCESS_METHOD_WMI3, -+ .access_method_fancurve = ACCESS_METHOD_EC3, -+ .access_method_fanfullspeed = ACCESS_METHOD_WMI3, -+ .acpi_check_dev = false, -+ .ramio_physical_start = 0xFE0B0400, -+ .ramio_size = 0x600 -+}; -+ -+static const struct dmi_system_id denylist[] = { {} }; -+ -+static const struct dmi_system_id optimistic_allowlist[] = { -+ { -+ // Release year: 2021 -+ // Generation: 6 -+ // Name: Legion 5, Legion 5 pro, Legion 7 -+ // Family: Legion 5 15ACH6H, ... -+ .ident = "GKCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "GKCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2020 -+ .ident = "EUCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "EUCN"), -+ }, -+ .driver_data = (void *)&model_eucn -+ }, -+ { -+ // Release year: 2020 -+ .ident = "EFCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "EFCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2020 -+ .ident = "FSCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "FSCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2021 -+ .ident = "HHCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "HHCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2022 -+ .ident = "H1CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "H1CN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2022 -+ .ident = "J2CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "J2CN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2022 -+ .ident = "JUCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "JUCN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2022 -+ .ident = "KFCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "KFCN"), -+ }, -+ .driver_data = (void *)&model_kfcn -+ }, -+ { -+ // Release year: 2021 -+ .ident = "HACN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "HACN"), -+ }, -+ .driver_data = (void *)&model_hacn -+ }, -+ { -+ // Release year: 2021 -+ .ident = "G9CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "G9CN"), -+ }, -+ .driver_data = (void *)&model_v0 -+ }, -+ { -+ // Release year: 2022 -+ .ident = "K9CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "K9CN"), -+ }, -+ .driver_data = (void *)&model_k9cn -+ }, -+ { -+ // e.g. IdeaPad Gaming 3 15ARH05 -+ .ident = "FCCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "FCCN"), -+ }, -+ .driver_data = (void *)&model_fccn -+ }, -+ { -+ // e.g. IdeaPad Gaming 3 15ARH05 (8K21) -+ .ident = "H4CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "H4CN"), -+ }, -+ .driver_data = (void *)&model_fccn -+ }, -+ { -+ // e.g. Ideapad Gaming 3 15ACH6 -+ .ident = "H3CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "H3CN"), -+ }, -+ .driver_data = (void *)&model_h3cn -+ }, -+ { -+ // e.g. IdeaPad Gaming 3 15ARH7 (2022) -+ .ident = "JNCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "JNCN"), -+ }, -+ .driver_data = (void *)&model_jncn -+ }, -+ { -+ // 2020, seems very different in ACPI dissassembly -+ .ident = "E9CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "E9CN"), -+ }, -+ .driver_data = (void *)&model_e9cn -+ }, -+ { -+ // e.g. Legion Y7000 (older version) -+ .ident = "8JCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "8JCN"), -+ }, -+ .driver_data = (void *)&model_8jcn -+ }, -+ { -+ // e.g. Legion 7i Pro 2023 -+ .ident = "KWCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "KWCN"), -+ }, -+ .driver_data = (void *)&model_kwcn -+ }, -+ { -+ // e.g. Legion Pro 5 2023 or R9000P -+ .ident = "LPCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "LPCN"), -+ }, -+ .driver_data = (void *)&model_lpcn -+ }, -+ { -+ // e.g. Lenovo Legion 5i/Y7000 2019 PG0 -+ .ident = "BHCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "BHCN"), -+ }, -+ .driver_data = (void *)&model_bhcn -+ }, -+ { -+ // e.g. Lenovo 7 16IAX7 -+ .ident = "K1CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "K1CN"), -+ }, -+ .driver_data = (void *)&model_k1cn -+ }, -+ { -+ // e.g. Legion Y720 -+ .ident = "4GCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "4GCN"), -+ }, -+ .driver_data = (void *)&model_4gcn -+ }, -+ { -+ // e.g. Legion Slim 5 16APH8 2023 -+ .ident = "M3CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "M3CN"), -+ }, -+ .driver_data = (void *)&model_lpcn -+ }, -+ { -+ // e.g. Legion Y7000p-1060 -+ .ident = "9VCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "9VCN"), -+ }, -+ .driver_data = (void *)&model_9vcn -+ }, -+ { -+ // e.g. Legion Y9000X -+ .ident = "JYCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "JYCN"), -+ }, -+ .driver_data = (void *)&model_v2022 -+ }, -+ { -+ // e.g. Legion Y740-15IRH, older model e.g. with GTX 1660 -+ .ident = "BVCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "BVCN"), -+ }, -+ .driver_data = (void *)&model_bvcn -+ }, -+ { -+ // e.g. Legion 5 Pro 16IAH7H with a RTX 3070 Ti -+ .ident = "J2CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "J2CN"), -+ }, -+ .driver_data = (void *)&model_j2cn -+ }, -+ { -+ // e.g. Lenovo Yoga 7 16IAH7 with GPU Intel DG2 Arc A370M -+ .ident = "J1CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "J1CN"), -+ }, -+ .driver_data = (void *)&model_j1cn -+ }, -+ { -+ // e.g. Legion Slim 7 16IRH8 (2023) with RTX 4070 -+ .ident = "M0CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "M0CN"), -+ }, -+ .driver_data = (void *)&model_m0cn -+ }, -+ { -+ // e.g. Legion Slim 7 16IRH8 (2023) AMD Ryzen 7 7840HS with RTX 4060 -+ .ident = "M1CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "M1CN"), -+ }, -+ .driver_data = (void *)&model_m1cn -+ }, -+ { -+ // e.g. Legion Slim 5 16IRH8 (2023) with RTX 4070 -+ .ident = "M2CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "M2CN"), -+ }, -+ .driver_data = (void *)&model_m2cn -+ }, -+ { -+ // e.g. Lenovo Yoga Slim 7 gen 8 (2023) -+ .ident = "M6CN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "M6CN"), -+ }, -+ .driver_data = (void *)&model_m6cn -+ }, -+ { -+ // e.g. Yoga Slim 7-14ARE05 -+ .ident = "DMCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "DMCN"), -+ }, -+ .driver_data = (void *)&model_dmcn -+ }, -+ { -+ // e.g. Yoga Slim 7 Pro 14ARH7 -+ .ident = "KHCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "KHCN"), -+ }, -+ .driver_data = (void *)&model_khcn -+ }, -+ { -+ // e.g. LOQ 15IRH8 -+ .ident = "LZCN", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -+ DMI_MATCH(DMI_BIOS_VERSION, "LZCN"), -+ }, -+ .driver_data = (void *)&model_lzcn -+ }, -+ {} -+}; -+ -+/* ================================= */ -+/* ACPI and WMI access */ -+/* ================================= */ -+ -+// function from ideapad-laptop.c -+static int eval_int(acpi_handle handle, const char *name, unsigned long *res) -+{ -+ unsigned long long result; -+ acpi_status status; -+ -+ status = acpi_evaluate_integer(handle, (char *)name, NULL, &result); -+ if (ACPI_FAILURE(status)) -+ return -EIO; -+ -+ *res = result; -+ -+ return 0; -+} -+ -+// function from ideapad-laptop.c -+static int exec_simple_method(acpi_handle handle, const char *name, -+ unsigned long arg) -+{ -+ acpi_status status = -+ acpi_execute_simple_method(handle, (char *)name, arg); -+ -+ return ACPI_FAILURE(status) ? -EIO : 0; -+} -+ -+// function from ideapad-laptop.c -+static int exec_sbmc(acpi_handle handle, unsigned long arg) -+{ -+ // \_SB.PCI0.LPC0.EC0.VPC0.SBMC -+ return exec_simple_method(handle, "VPC0.SBMC", arg); -+} -+ -+//static int eval_qcho(acpi_handle handle, unsigned long *res) -+//{ -+// // \_SB.PCI0.LPC0.EC0.QCHO -+// return eval_int(handle, "QCHO", res); -+//} -+ -+static int eval_gbmd(acpi_handle handle, unsigned long *res) -+{ -+ return eval_int(handle, "VPC0.GBMD", res); -+} -+ -+static int eval_spmo(acpi_handle handle, unsigned long *res) -+{ -+ // \_SB.PCI0.LPC0.EC0.QCHO -+ return eval_int(handle, "VPC0.BTSM", res); -+} -+ -+static int acpi_process_buffer_to_ints(const char *id_name, int id_nr, -+ acpi_status status, -+ struct acpi_buffer *out_buffer, u8 *res, -+ size_t ressize) -+{ -+ // seto to NULL call kfree on NULL if next function call fails -+ union acpi_object *out = NULL; -+ size_t i; -+ int error = 0; -+ -+ if (ACPI_FAILURE(status)) { -+ pr_info("ACPI evaluation error for: %s:%d\n", id_name, id_nr); -+ error = -EFAULT; -+ goto err; -+ } -+ -+ out = out_buffer->pointer; -+ if (!out) { -+ pr_info("Unexpected ACPI result for %s:%d\n", id_name, id_nr); -+ error = -AE_ERROR; -+ goto err; -+ } -+ -+ if (out->type != ACPI_TYPE_BUFFER || out->buffer.length != ressize) { -+ pr_info("Unexpected ACPI result for %s:%d: expected type %d but got %d; expected length %lu but got %u;\n", -+ id_name, id_nr, ACPI_TYPE_BUFFER, out->type, ressize, -+ out->buffer.length); -+ error = -AE_ERROR; -+ goto err; -+ } -+ -+// Reduced verbosity (only printing when ACPI result have bad parameters) -+// pr_info("ACPI result for %s:%d: ACPI buffer length: %u\n", id_name, -+// id_nr, out->buffer.length); -+ -+ for (i = 0; i < ressize; ++i) -+ res[i] = out->buffer.pointer[i]; -+ error = 0; -+ -+err: -+ kfree(out); -+ return error; -+} -+ -+//static int exec_ints(acpi_handle handle, const char *method_name, -+// struct acpi_object_list *params, u8 *res, size_t ressize) -+//{ -+// acpi_status status; -+// struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; -+ -+// status = acpi_evaluate_object(handle, (acpi_string)method_name, params, -+// &out_buffer); -+ -+// return acpi_process_buffer_to_ints(method_name, 0, status, &out_buffer, -+// res, ressize); -+//} -+ -+static int wmi_exec_ints(const char *guid, u8 instance, u32 method_id, -+ const struct acpi_buffer *params, u8 *res, -+ size_t ressize) -+{ -+ acpi_status status; -+ struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; -+ -+ status = wmi_evaluate_method(guid, instance, method_id, params, -+ &out_buffer); -+ return acpi_process_buffer_to_ints(guid, method_id, status, &out_buffer, -+ res, ressize); -+} -+ -+static int wmi_exec_int(const char *guid, u8 instance, u32 method_id, -+ const struct acpi_buffer *params, unsigned long *res) -+{ -+ acpi_status status; -+ struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; -+ // set to NULL and call kfree on NULL if next function call fails -+ union acpi_object *out = NULL; -+ int error = 0; -+ -+ status = wmi_evaluate_method(guid, instance, method_id, params, -+ &out_buffer); -+ -+ if (ACPI_FAILURE(status)) { -+ pr_info("WMI evaluation error for: %s:%d\n", guid, method_id); -+ error = -EFAULT; -+ goto err; -+ } -+ -+ out = out_buffer.pointer; -+ if (!out) { -+ pr_info("Unexpected ACPI result for %s:%d", guid, method_id); -+ error = -AE_ERROR; -+ goto err; -+ } -+ -+ if (out->type != ACPI_TYPE_INTEGER) { -+ pr_info("Unexpected ACPI result for %s:%d: expected type %d but got %d\n", -+ guid, method_id, ACPI_TYPE_INTEGER, out->type); -+ error = -AE_ERROR; -+ goto err; -+ } -+ -+ *res = out->integer.value; -+ error = 0; -+ -+err: -+ kfree(out); -+ return error; -+} -+ -+static int wmi_exec_noarg_int(const char *guid, u8 instance, u32 method_id, -+ unsigned long *res) -+{ -+ struct acpi_buffer params; -+ -+ params.length = 0; -+ params.pointer = NULL; -+ return wmi_exec_int(guid, instance, method_id, ¶ms, res); -+} -+ -+static int wmi_exec_noarg_ints(const char *guid, u8 instance, u32 method_id, -+ u8 *res, size_t ressize) -+{ -+ struct acpi_buffer params; -+ -+ params.length = 0; -+ params.pointer = NULL; -+ return wmi_exec_ints(guid, instance, method_id, ¶ms, res, ressize); -+} -+ -+static int wmi_exec_arg(const char *guid, u8 instance, u32 method_id, void *arg, -+ size_t arg_size) -+{ -+ struct acpi_buffer params; -+ acpi_status status; -+ -+ params.length = arg_size; -+ params.pointer = arg; -+ status = wmi_evaluate_method(guid, instance, method_id, ¶ms, NULL); -+ -+ if (ACPI_FAILURE(status)) -+ return -EIO; -+ return 0; -+} -+ -+/* ================================= */ -+/* Lenovo WMI config */ -+/* ================================= */ -+#define LEGION_WMI_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0" -+// GPU over clock -+#define WMI_METHOD_ID_ISSUPPORTGPUOC 4 -+ -+// Fan speed -+// only fully implemented for some models here -+// often implemented in other classes and methods too -+// below -+#define WMI_METHOD_ID_GETFAN1SPEED 8 -+#define WMI_METHOD_ID_GETFAN2SPEED 9 -+ -+// Version of ACPI -+#define WMI_METHOD_ID_GETVERSION 11 -+// Does it support CPU overclock? -+#define WMI_METHOD_ID_ISSUPPORTCPUOC 14 -+// Temperatures -+// only fully implemented for some models here -+// often implemented in other classes and methods too -+// below -+#define WMI_METHOD_ID_GETCPUTEMP 18 -+#define WMI_METHOD_ID_GETGPUTEMP 19 -+ -+// two state keyboard light -+#define WMI_METHOD_ID_GETKEYBOARDLIGHT 37 -+#define WMI_METHOD_ID_SETKEYBOARDLIGHT 36 -+// toggle win key -+// 0 = win key enabled; 1 = win key disabled -+#define WMI_METHOD_ID_ISSUPPORTDISABLEWINKEY 21 -+#define WMI_METHOD_ID_GETWINKEYSTATUS 23 -+#define WMI_METHOD_ID_SETWINKEYSTATUS 22 -+// toggle touchpad -+//0 = touchpad enabled; 1 = touchpad disabled -+#define WMI_METHOD_ID_ISSUPPORTDISABLETP 24 -+#define WMI_METHOD_ID_GETTPSTATUS 26 -+#define WMI_METHOD_ID_SETTPSTATUS 25 -+// GSync -+#define WMI_METHOD_ID_ISSUPPORTGSYNC 40 -+#define WMI_METHOD_ID_GETGSYNCSTATUS 41 -+#define WMI_METHOD_ID_SETGSYNCSTATUS 42 -+//smartFanMode = powermode -+#define WMI_METHOD_ID_ISSUPPORTSMARTFAN 49 -+#define WMI_METHOD_ID_GETSMARTFANMODE 45 -+#define WMI_METHOD_ID_SETSMARTFANMODE 44 -+// power charge mode -+#define WMI_METHOD_ID_GETPOWERCHARGEMODE 47 -+// overdrive of display to reduce latency -+// 0=off, 1=on -+#define WMI_METHOD_ID_ISSUPPORTOD 49 -+#define WMI_METHOD_ID_GETODSTATUS 50 -+#define WMI_METHOD_ID_SETODSTATUS 51 -+// thermal mode = power mode used for cooling -+#define WMI_METHOD_ID_GETTHERMALMODE 55 -+// get max frequency of core 0 -+#define WMI_METHOD_ID_GETCPUMAXFREQUENCY 60 -+// check if AC adapter has enough power to overclock -+#define WMI_METHOD_ID_ISACFITFOROC 62 -+// set iGPU (GPU packaged with CPU) state -+#define WMI_METHOD_ID_ISSUPPORTIGPUMODE 63 -+#define WMI_METHOD_ID_GETIGPUMODESTATUS 64 -+#define WMI_METHOD_ID_SETIGPUMODESTATUS 65 -+#define WMI_METHOD_ID_NOTIFYDGPUSTATUS 66 -+enum IGPUState { -+ IGPUState_default = 0, -+ IGPUState_iGPUOnly = 1, -+ IGPUState_auto = 2 -+}; -+ -+#define WMI_GUID_LENOVO_CPU_METHOD "14afd777-106f-4c9b-b334-d388dc7809be" -+#define WMI_METHOD_ID_CPU_GET_SUPPORT_OC_STATUS 15 -+#define WMI_METHOD_ID_CPU_GET_OC_STATUS 1 -+#define WMI_METHOD_ID_CPU_SET_OC_STATUS 2 -+ -+// ppt limit slow -+#define WMI_METHOD_ID_CPU_GET_SHORTTERM_POWERLIMIT 3 -+#define WMI_METHOD_ID_CPU_SET_SHORTTERM_POWERLIMIT 4 -+// ppt stapm -+#define WMI_METHOD_ID_CPU_GET_LONGTERM_POWERLIMIT 5 -+#define WMI_METHOD_ID_CPU_SET_LONGTERM_POWERLIMIT 6 -+// default power limit -+#define WMI_METHOD_ID_CPU_GET_DEFAULT_POWERLIMIT 7 -+// peak power limit -+#define WMI_METHOD_ID_CPU_GET_PEAK_POWERLIMIT 8 -+#define WMI_METHOD_ID_CPU_SET_PEAK_POWERLIMIT 9 -+// apu sppt powerlimit -+#define WMI_METHOD_ID_CPU_GET_APU_SPPT_POWERLIMIT 12 -+#define WMI_METHOD_ID_CPU_SET_APU_SPPT_POWERLIMIT 13 -+// cross loading powerlimit -+#define WMI_METHOD_ID_CPU_GET_CROSS_LOADING_POWERLIMIT 16 -+#define WMI_METHOD_ID_CPU_SET_CROSS_LOADING_POWERLIMIT 17 -+ -+#define WMI_GUID_LENOVO_GPU_METHOD "da7547f1-824d-405f-be79-d9903e29ced7" -+// overclock GPU possible -+#define WMI_METHOD_ID_GPU_GET_OC_STATUS 1 -+#define WMI_METHOD_ID_GPU_SET_OC_STATUS 2 -+// dynamic boost power -+#define WMI_METHOD_ID_GPU_GET_PPAB_POWERLIMIT 3 -+#define WMI_METHOD_ID_GPU_SET_PPAB_POWERLIMIT 4 -+// configurable TGP (power) -+#define WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT 5 -+#define WMI_METHOD_ID_GPU_SET_CTGP_POWERLIMIT 6 -+// ppab/ctgp powerlimit -+#define WMI_METHOD_ID_GPU_GET_DEFAULT_PPAB_CTGP_POWERLIMIT 7 -+// temperature limit -+#define WMI_METHOD_ID_GPU_GET_TEMPERATURE_LIMIT 8 -+#define WMI_METHOD_ID_GPU_SET_TEMPERATURE_LIMIT 9 -+// boost clock -+#define WMI_METHOD_ID_GPU_GET_BOOST_CLOCK 10 -+ -+#define WMI_GUID_LENOVO_FAN_METHOD "92549549-4bde-4f06-ac04-ce8bf898dbaa" -+// set fan to maximal speed; dust cleaning mode -+// only works in custom power mode -+#define WMI_METHOD_ID_FAN_GET_FULLSPEED 1 -+#define WMI_METHOD_ID_FAN_SET_FULLSPEED 2 -+// max speed of fan -+#define WMI_METHOD_ID_FAN_GET_MAXSPEED 3 -+#define WMI_METHOD_ID_FAN_SET_MAXSPEED 4 -+// fan table in custom mode -+#define WMI_METHOD_ID_FAN_GET_TABLE 5 -+#define WMI_METHOD_ID_FAN_SET_TABLE 6 -+// get speed of fans -+#define WMI_METHOD_ID_FAN_GETCURRENTFANSPEED 7 -+// get temperatures of CPU and GPU used for controlling cooling -+#define WMI_METHOD_ID_FAN_GETCURRENTSENSORTEMPERATURE 8 -+ -+// do not implement following -+// #define WMI_METHOD_ID_Fan_SetCurrentFanSpeed 9 -+ -+#define LEGION_WMI_KBBACKLIGHT_GUID "8C5B9127-ECD4-4657-980F-851019F99CA5" -+// access the keyboard backlight with 3 states -+#define WMI_METHOD_ID_KBBACKLIGHTGET 0x1 -+#define WMI_METHOD_ID_KBBACKLIGHTSET 0x2 -+ -+// new method in newer methods to get or set most of the values -+// with the two methods GetFeatureValue or SetFeatureValue. -+// They are called like GetFeatureValue(feature_id) where -+// feature_id is a id for the feature -+#define LEGION_WMI_LENOVO_OTHER_METHOD_GUID \ -+ "dc2a8805-3a8c-41ba-a6f7-092e0089cd3b" -+#define WMI_METHOD_ID_GET_FEATURE_VALUE 17 -+#define WMI_METHOD_ID_SET_FEATURE_VALUE 18 -+ -+enum OtherMethodFeature { -+ OtherMethodFeature_U1 = 0x010000, //->PC00.LPCB.EC0.REJF -+ OtherMethodFeature_U2 = 0x0F0000, //->C00.PEG1.PXP._STA? -+ OtherMethodFeature_U3 = 0x030000, //->PC00.LPCB.EC0.FLBT? -+ OtherMethodFeature_CPU_SHORT_TERM_POWER_LIMIT = 0x01010000, -+ OtherMethodFeature_CPU_LONG_TERM_POWER_LIMIT = 0x01020000, -+ OtherMethodFeature_CPU_PEAK_POWER_LIMIT = 0x01030000, -+ OtherMethodFeature_CPU_TEMPERATURE_LIMIT = 0x01040000, -+ -+ OtherMethodFeature_APU_PPT_POWER_LIMIT = 0x01050000, -+ -+ OtherMethodFeature_CPU_CROSS_LOAD_POWER_LIMIT = 0x01060000, -+ OtherMethodFeature_CPU_L1_TAU = 0x01070000, -+ -+ OtherMethodFeature_GPU_POWER_BOOST = 0x02010000, -+ OtherMethodFeature_GPU_cTGP = 0x02020000, -+ OtherMethodFeature_GPU_TEMPERATURE_LIMIT = 0x02030000, -+ OtherMethodFeature_GPU_POWER_TARGET_ON_AC_OFFSET_FROM_BASELINE = -+ 0x02040000, -+ -+ OtherMethodFeature_FAN_SPEED_1 = 0x04030001, -+ OtherMethodFeature_FAN_SPEED_2 = 0x04030002, -+ -+ OtherMethodFeature_C_U1 = 0x05010000, -+ OtherMethodFeature_TEMP_CPU = 0x05040000, -+ OtherMethodFeature_TEMP_GPU = 0x05050000, -+}; -+ -+static ssize_t wmi_other_method_get_value(enum OtherMethodFeature feature_id, -+ int *value) -+{ -+ struct acpi_buffer params; -+ int error; -+ unsigned long res; -+ u32 param1 = feature_id; -+ -+ params.length = sizeof(param1); -+ params.pointer = ¶m1; -+ error = wmi_exec_int(LEGION_WMI_LENOVO_OTHER_METHOD_GUID, 0, -+ WMI_METHOD_ID_GET_FEATURE_VALUE, ¶ms, &res); -+ if (!error) -+ *value = res; -+ return error; -+} -+ -+/* =================================== */ -+/* EC RAM Access with memory mapped IO */ -+/* =================================== */ -+ -+struct ecram_memoryio { -+ // TODO: start of remapped memory in EC RAM is assumed to be 0 -+ // u16 ecram_start; -+ -+ // physical address of remapped IO, depends on model and firmware -+ phys_addr_t physical_start; -+ // start adress of region in ec memory -+ phys_addr_t physical_ec_start; -+ // virtual address of remapped IO -+ u8 *virtual_start; -+ // size of remapped access -+ size_t size; -+}; -+ -+/** -+ * physical_start : corresponds to EC RAM 0 inside EC -+ * size: size of remapped region -+ * -+ * strong exception safety -+ */ -+static ssize_t ecram_memoryio_init(struct ecram_memoryio *ec_memoryio, -+ phys_addr_t physical_start, -+ phys_addr_t physical_ec_start, size_t size) -+{ -+ void *virtual_start = ioremap(physical_start, size); -+ -+ if (!IS_ERR_OR_NULL(virtual_start)) { -+ ec_memoryio->virtual_start = virtual_start; -+ ec_memoryio->physical_start = physical_start; -+ ec_memoryio->physical_ec_start = physical_ec_start; -+ ec_memoryio->size = size; -+ pr_info("Successfully mapped embedded controller: 0x%llx (in RAM)/0x%llx (in EC) to virtual 0x%p\n", -+ ec_memoryio->physical_start, -+ ec_memoryio->physical_ec_start, -+ ec_memoryio->virtual_start); -+ } else { -+ pr_info("Error mapping embedded controller memory at 0x%llx\n", -+ physical_start); -+ return -ENOMEM; -+ } -+ return 0; -+} -+ -+static void ecram_memoryio_exit(struct ecram_memoryio *ec_memoryio) -+{ -+ if (ec_memoryio->virtual_start != NULL) { -+ pr_info("Unmapping embedded controller memory at 0x%llx (in RAM)/0x%llx (in EC) at virtual 0x%p\n", -+ ec_memoryio->physical_start, -+ ec_memoryio->physical_ec_start, -+ ec_memoryio->virtual_start); -+ iounmap(ec_memoryio->virtual_start); -+ ec_memoryio->virtual_start = NULL; -+ } -+} -+ -+/* Read a byte from the EC RAM. -+ * -+ * Return status because of commong signature for alle -+ * methods to access EC RAM. -+ */ -+static ssize_t ecram_memoryio_read(const struct ecram_memoryio *ec_memoryio, -+ u16 ec_offset, u8 *value) -+{ -+ if (ec_offset < ec_memoryio->physical_ec_start) { -+ pr_info("Unexpected read at offset %d into EC RAM\n", -+ ec_offset); -+ return -1; -+ } -+ *value = *(ec_memoryio->virtual_start + -+ (ec_offset - ec_memoryio->physical_ec_start)); -+ return 0; -+} -+ -+/* Write a byte to the EC RAM. -+ * -+ * Return status because of commong signature for alle -+ * methods to access EC RAM. -+ */ -+ssize_t ecram_memoryio_write(const struct ecram_memoryio *ec_memoryio, -+ u16 ec_offset, u8 value) -+{ -+ if (ec_offset < ec_memoryio->physical_ec_start) { -+ pr_info("Unexpected write at offset %d into EC RAM\n", -+ ec_offset); -+ return -1; -+ } -+ *(ec_memoryio->virtual_start + -+ (ec_offset - ec_memoryio->physical_ec_start)) = value; -+ return 0; -+} -+ -+/* ================================= */ -+/* EC RAM Access with port-mapped IO */ -+/* ================================= */ -+ -+/* -+ * See datasheet of e.g. IT8502E/F/G, e.g. -+ * 6.2 Plug and Play Configuration (PNPCFG) -+ * -+ * Depending on configured BARDSEL register -+ * the ports -+ * ECRAM_PORTIO_ADDR_PORT and -+ * ECRAM_PORTIO_DATA_PORT -+ * are configured. -+ * -+ * By performing IO on these ports one can -+ * read/write to registers in the EC. -+ * -+ * "To access a register of PNPCFG, write target index to -+ * address port and access this PNPCFG register via -+ * data port" [datasheet, 6.2 Plug and Play Configuration] -+ */ -+ -+// IO ports used to write to communicate with embedded controller -+// Start of used ports -+#define ECRAM_PORTIO_START_PORT 0x4E -+// Number of used ports -+#define ECRAM_PORTIO_PORTS_SIZE 2 -+// Port used to specify address in EC RAM to read/write -+// 0x4E/0x4F is the usual port for IO super controller -+// 0x2E/0x2F also common (ITE can also be configured to use these) -+#define ECRAM_PORTIO_ADDR_PORT 0x4E -+// Port to send/receive the value to write/read -+#define ECRAM_PORTIO_DATA_PORT 0x4F -+// Name used to request ports -+#define ECRAM_PORTIO_NAME "legion" -+ -+struct ecram_portio { -+ /* protects read/write to EC RAM performed -+ * as a certain sequence of outb, inb -+ * commands on the IO ports. There can -+ * be at most one. -+ */ -+ struct mutex io_port_mutex; -+}; -+ -+static ssize_t ecram_portio_init(struct ecram_portio *ec_portio) -+{ -+ if (!request_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE, -+ ECRAM_PORTIO_NAME)) { -+ pr_info("Cannot init ecram_portio the %x ports starting at %x\n", -+ ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT); -+ return -ENODEV; -+ } -+ //pr_info("Reserved %x ports starting at %x\n", ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT); -+ mutex_init(&ec_portio->io_port_mutex); -+ return 0; -+} -+ -+static void ecram_portio_exit(struct ecram_portio *ec_portio) -+{ -+ release_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE); -+} -+ -+/* Read a byte from the EC RAM. -+ * -+ * Return status because of commong signature for alle -+ * methods to access EC RAM. -+ */ -+static ssize_t ecram_portio_read(struct ecram_portio *ec_portio, u16 offset, -+ u8 *value) -+{ -+ mutex_lock(&ec_portio->io_port_mutex); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x11, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ // TODO: no explicit cast between types seems to be sometimes -+ // done and sometimes not -+ outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x10, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x12, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ *value = inb(ECRAM_PORTIO_DATA_PORT); -+ -+ mutex_unlock(&ec_portio->io_port_mutex); -+ return 0; -+} -+ -+/* Write a byte to the EC RAM. -+ * -+ * Return status because of commong signature for alle -+ * methods to access EC RAM. -+ */ -+static ssize_t ecram_portio_write(struct ecram_portio *ec_portio, u16 offset, -+ u8 value) -+{ -+ mutex_lock(&ec_portio->io_port_mutex); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x11, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ // TODO: no explicit cast between types seems to be sometimes -+ // done and sometimes not -+ outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x10, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT); -+ -+ outb(0x2E, ECRAM_PORTIO_ADDR_PORT); -+ outb(0x12, ECRAM_PORTIO_DATA_PORT); -+ outb(0x2F, ECRAM_PORTIO_ADDR_PORT); -+ outb(value, ECRAM_PORTIO_DATA_PORT); -+ -+ mutex_unlock(&ec_portio->io_port_mutex); -+ // TODO: remove this -+ //pr_info("Writing %d to addr %x\n", value, offset); -+ return 0; -+} -+ -+/* =================================== */ -+/* EC RAM Access */ -+/* =================================== */ -+ -+struct ecram { -+ struct ecram_portio portio; -+}; -+ -+static ssize_t ecram_init(struct ecram *ecram, -+ phys_addr_t memoryio_ec_physical_start, -+ size_t region_size) -+{ -+ ssize_t err; -+ -+ err = ecram_portio_init(&ecram->portio); -+ if (err) { -+ pr_info("Failed ecram_portio_init\n"); -+ goto err_ecram_portio_init; -+ } -+ -+ return 0; -+ -+err_ecram_portio_init: -+ return err; -+} -+ -+static void ecram_exit(struct ecram *ecram) -+{ -+ pr_info("Unloading legion ecram\n"); -+ ecram_portio_exit(&ecram->portio); -+ pr_info("Unloading legion ecram done\n"); -+} -+ -+/** Read from EC RAM -+ * ecram_offset address on the EC -+ */ -+static u8 ecram_read(struct ecram *ecram, u16 ecram_offset) -+{ -+ u8 value; -+ int err; -+ -+ err = ecram_portio_read(&ecram->portio, ecram_offset, &value); -+ if (err) -+ pr_info("Error reading EC RAM at 0x%x.\n", ecram_offset); -+ return value; -+} -+ -+static void ecram_write(struct ecram *ecram, u16 ecram_offset, u8 value) -+{ -+ int err; -+ -+ if (ec_readonly) { -+ pr_info("Skipping writing EC RAM to 0x%x: Read-Only.\n", -+ ecram_offset); -+ return; -+ } -+ err = ecram_portio_write(&ecram->portio, ecram_offset, value); -+ if (err) -+ pr_info("Error writing EC RAM to 0x%x: Read-Only.\n", ecram_offset); -+} -+ -+/* =============================== */ -+/* Reads from EC */ -+/* =============================== */ -+ -+static u16 read_ec_id(struct ecram *ecram, const struct model_config *model) -+{ -+ u8 id1 = ecram_read(ecram, model->registers->ECHIPID1); -+ u8 id2 = ecram_read(ecram, model->registers->ECHIPID2); -+ -+ return (id1 << 8) + id2; -+} -+ -+static u16 read_ec_version(struct ecram *ecram, -+ const struct model_config *model) -+{ -+ u8 vers = ecram_read(ecram, model->registers->ECHIPVER); -+ u8 debug = ecram_read(ecram, model->registers->ECDEBUG); -+ -+ return (vers << 8) + debug; -+} -+ -+/* ============================= */ -+/* Data model for sensor values */ -+/* ============================= */ -+ -+struct sensor_values { -+ u16 fan1_rpm; // current speed in rpm of fan 1 -+ u16 fan2_rpm; // current speed in rpm of fan2 -+ u16 fan1_target_rpm; // target speed in rpm of fan 1 -+ u16 fan2_target_rpm; // target speed in rpm of fan 2 -+ u8 cpu_temp_celsius; // cpu temperature in celcius -+ u8 gpu_temp_celsius; // gpu temperature in celcius -+ u8 ic_temp_celsius; // ic temperature in celcius -+}; -+ -+enum SENSOR_ATTR { -+ SENSOR_CPU_TEMP_ID = 1, -+ SENSOR_GPU_TEMP_ID = 2, -+ SENSOR_IC_TEMP_ID = 3, -+ SENSOR_FAN1_RPM_ID = 4, -+ SENSOR_FAN2_RPM_ID = 5, -+ SENSOR_FAN1_TARGET_RPM_ID = 6, -+ SENSOR_FAN2_TARGET_RPM_ID = 7 -+}; -+ -+/* ============================= */ -+/* Data model for fan curve */ -+/* ============================= */ -+ -+struct fancurve_point { -+ // rpm1 devided by 100 -+ u8 rpm1_raw; -+ // rpm2 devided by 100 -+ u8 rpm2_raw; -+ // >=2 , <=5 (lower is faster); must increase by level -+ u8 accel; -+ // >=2 , <=5 (lower is faster); must increase by level -+ u8 decel; -+ -+ // min must be lower than or equal to max -+ // last level max must be 127 -+ // <=127 cpu max temp for this level; must increase by level -+ u8 cpu_max_temp_celsius; -+ // <=127 cpu min temp for this level; must increase by level -+ u8 cpu_min_temp_celsius; -+ // <=127 gpu min temp for this level; must increase by level -+ u8 gpu_max_temp_celsius; -+ // <=127 gpu max temp for this level; must increase by level -+ u8 gpu_min_temp_celsius; -+ // <=127 ic max temp for this level; must increase by level -+ u8 ic_max_temp_celsius; -+ // <=127 ic max temp for this level; must increase by level -+ u8 ic_min_temp_celsius; -+}; -+ -+enum FANCURVE_ATTR { -+ FANCURVE_ATTR_PWM1 = 1, -+ FANCURVE_ATTR_PWM2 = 2, -+ FANCURVE_ATTR_CPU_TEMP = 3, -+ FANCURVE_ATTR_CPU_HYST = 4, -+ FANCURVE_ATTR_GPU_TEMP = 5, -+ FANCURVE_ATTR_GPU_HYST = 6, -+ FANCURVE_ATTR_IC_TEMP = 7, -+ FANCURVE_ATTR_IC_HYST = 8, -+ FANCURVE_ATTR_ACCEL = 9, -+ FANCURVE_ATTR_DECEL = 10, -+ FANCURVE_SIZE = 11, -+ FANCURVE_MINIFANCURVE_ON_COOL = 12 -+}; -+ -+// used for clearing table entries -+static const struct fancurve_point fancurve_point_zero = { 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0 }; -+ -+struct fancurve { -+ struct fancurve_point points[MAXFANCURVESIZE]; -+ // number of points used; must be <= MAXFANCURVESIZE -+ size_t size; -+ // the point at which fans are run currently -+ size_t current_point_i; -+}; -+ -+// validation functions -+ -+static bool fancurve_is_valid_min_temp(int min_temp) -+{ -+ return min_temp >= 0 && min_temp <= 127; -+} -+ -+static bool fancurve_is_valid_max_temp(int max_temp) -+{ -+ return max_temp >= 0 && max_temp <= 127; -+} -+ -+// setters with validation -+// - make hwmon implementation easier -+// - keep fancurve valid, otherwise EC will not properly control fan -+ -+static bool fancurve_set_rpm1(struct fancurve *fancurve, int point_id, int rpm) -+{ -+ bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500); -+ -+ if (valid) -+ fancurve->points[point_id].rpm1_raw = rpm / 100; -+ return valid; -+} -+ -+static bool fancurve_set_rpm2(struct fancurve *fancurve, int point_id, int rpm) -+{ -+ bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500); -+ -+ if (valid) -+ fancurve->points[point_id].rpm2_raw = rpm / 100; -+ return valid; -+} -+ -+// TODO: remove { ... } from single line if body -+ -+static bool fancurve_set_accel(struct fancurve *fancurve, int point_id, -+ int accel) -+{ -+ bool valid = accel >= 2 && accel <= 5; -+ -+ if (valid) -+ fancurve->points[point_id].accel = accel; -+ return valid; -+} -+ -+static bool fancurve_set_decel(struct fancurve *fancurve, int point_id, -+ int decel) -+{ -+ bool valid = decel >= 2 && decel <= 5; -+ -+ if (valid) -+ fancurve->points[point_id].decel = decel; -+ return valid; -+} -+ -+static bool fancurve_set_cpu_temp_max(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].cpu_max_temp_celsius = value; -+ -+ return valid; -+} -+ -+static bool fancurve_set_gpu_temp_max(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].gpu_max_temp_celsius = value; -+ return valid; -+} -+ -+static bool fancurve_set_ic_temp_max(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].ic_max_temp_celsius = value; -+ return valid; -+} -+ -+static bool fancurve_set_cpu_temp_min(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_max_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].cpu_min_temp_celsius = value; -+ return valid; -+} -+ -+static bool fancurve_set_gpu_temp_min(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_min_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].gpu_min_temp_celsius = value; -+ return valid; -+} -+ -+static bool fancurve_set_ic_temp_min(struct fancurve *fancurve, int point_id, -+ int value) -+{ -+ bool valid = fancurve_is_valid_min_temp(value); -+ -+ if (valid) -+ fancurve->points[point_id].ic_min_temp_celsius = value; -+ return valid; -+} -+ -+static bool fancurve_set_size(struct fancurve *fancurve, int size, -+ bool init_values) -+{ -+ bool valid = size >= 1 && size <= MAXFANCURVESIZE; -+ -+ if (!valid) -+ return false; -+ if (init_values && size < fancurve->size) { -+ // fancurve size is decreased, but last entry always needs 127 temperatures -+ // Note: size >=1 -+ fancurve->points[size - 1].cpu_max_temp_celsius = 127; -+ fancurve->points[size - 1].ic_max_temp_celsius = 127; -+ fancurve->points[size - 1].gpu_max_temp_celsius = 127; -+ } -+ if (init_values && size > fancurve->size) { -+ // fancurve increased, so new entries need valid values -+ int i; -+ int last = fancurve->size > 0 ? fancurve->size - 1 : 0; -+ -+ for (i = fancurve->size; i < size; ++i) -+ fancurve->points[i] = fancurve->points[last]; -+ } -+ return true; -+} -+ -+static ssize_t fancurve_print_seqfile(const struct fancurve *fancurve, -+ struct seq_file *s) -+{ -+ int i; -+ -+ seq_printf( -+ s, -+ "rpm1|rpm2|acceleration|deceleration|cpu_min_temp|cpu_max_temp|gpu_min_temp|gpu_max_temp|ic_min_temp|ic_max_temp\n"); -+ for (i = 0; i < fancurve->size; ++i) { -+ const struct fancurve_point *point = &fancurve->points[i]; -+ -+ seq_printf( -+ s, "%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\n", -+ point->rpm1_raw * 100, point->rpm2_raw * 100, -+ point->accel, point->decel, point->cpu_min_temp_celsius, -+ point->cpu_max_temp_celsius, -+ point->gpu_min_temp_celsius, -+ point->gpu_max_temp_celsius, point->ic_min_temp_celsius, -+ point->ic_max_temp_celsius); -+ } -+ return 0; -+} -+ -+struct light { -+ bool initialized; -+ struct led_classdev led; -+ unsigned int last_brightness; -+ u8 light_id; -+ unsigned int lower_limit; -+ unsigned int upper_limit; -+}; -+ -+/* ============================= */ -+/* Global and shared data between */ -+/* all calls to this module */ -+/* ============================= */ -+// Implemented like ideapad-laptop.c but currently still -+// without dynamic memory allocation (instead global _priv) -+struct legion_private { -+ struct platform_device *platform_device; -+ // TODO: remove or keep? init? -+ struct acpi_device *adev; -+ -+ // Method to access ECRAM -+ struct ecram ecram; -+ // Configuration with registers and ECRAM access method -+ const struct model_config *conf; -+ -+ // TODO: maybe refactor and keep only local to each function -+ // last known fan curve -+ struct fancurve fancurve; -+ // configured fan curve from user space -+ struct fancurve fancurve_configured; -+ -+ // update lock, when partial values of fancurve are changed -+ struct mutex fancurve_mutex; -+ -+ //interfaces -+ struct dentry *debugfs_dir; -+ struct device *hwmon_dev; -+ struct platform_profile_handler platform_profile_handler; -+ -+ struct light kbd_bl; -+ struct light ylogo_light; -+ struct light iport_light; -+ -+ // TODO: remove? -+ bool loaded; -+ -+ // TODO: remove, only for reverse enginnering -+ struct ecram_memoryio ec_memoryio; -+}; -+ -+// shared between different drivers: WMI, platform and protected by mutex -+static struct legion_private *legion_shared; -+static struct legion_private _priv; -+static DEFINE_MUTEX(legion_shared_mutex); -+ -+static int legion_shared_init(struct legion_private *priv) -+{ -+ int ret; -+ -+ mutex_lock(&legion_shared_mutex); -+ -+ if (!legion_shared) { -+ legion_shared = priv; -+ mutex_init(&legion_shared->fancurve_mutex); -+ ret = 0; -+ } else { -+ pr_warn("Found multiple platform devices\n"); -+ ret = -EINVAL; -+ } -+ -+ priv->loaded = true; -+ mutex_unlock(&legion_shared_mutex); -+ -+ return ret; -+} -+ -+static void legion_shared_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion shared\n"); -+ mutex_lock(&legion_shared_mutex); -+ -+ if (legion_shared == priv) -+ legion_shared = NULL; -+ -+ mutex_unlock(&legion_shared_mutex); -+ pr_info("Unloading legion shared done\n"); -+} -+ -+static int get_simple_wmi_attribute(struct legion_private *priv, -+ const char *guid, u8 instance, -+ u32 method_id, bool invert, -+ unsigned long scale, unsigned long *value) -+{ -+ unsigned long state = 0; -+ int err; -+ -+ if (scale == 0) { -+ pr_info("Scale cannot be 0\n"); -+ return -EINVAL; -+ } -+ err = wmi_exec_noarg_int(guid, instance, method_id, &state); -+ if (err) -+ return -EINVAL; -+ -+ // TODO: remove later -+ pr_info("%swith raw value: %ld\n", __func__, state); -+ -+ state = state * scale; -+ -+ if (invert) -+ state = !state; -+ *value = state; -+ return 0; -+} -+ -+static int get_simple_wmi_attribute_bool(struct legion_private *priv, -+ const char *guid, u8 instance, -+ u32 method_id, bool invert, -+ unsigned long scale, bool *value) -+{ -+ unsigned long int_val = *value; -+ int err = get_simple_wmi_attribute(priv, guid, instance, method_id, -+ invert, scale, &int_val); -+ *value = int_val; -+ return err; -+} -+ -+static int set_simple_wmi_attribute(struct legion_private *priv, -+ const char *guid, u8 instance, -+ u32 method_id, bool invert, int scale, -+ int state) -+{ -+ int err; -+ u8 in_param; -+ -+ if (scale == 0) { -+ pr_info("Scale cannot be 0\n"); -+ return -EINVAL; -+ } -+ -+ if (invert) -+ state = !state; -+ -+ in_param = state / scale; -+ -+ err = wmi_exec_arg(guid, instance, method_id, &in_param, -+ sizeof(in_param)); -+ return err; -+} -+ -+/* ============================= */ -+/* Sensor value reading/writing */ -+/* ============================= */ -+ -+static int ec_read_sensor_values(struct ecram *ecram, -+ const struct model_config *model, -+ struct sensor_values *values) -+{ -+ values->fan1_target_rpm = -+ 100 * ecram_read(ecram, model->registers->EXT_FAN1_TARGET_RPM); -+ values->fan2_target_rpm = -+ 100 * ecram_read(ecram, model->registers->EXT_FAN2_TARGET_RPM); -+ -+ values->fan1_rpm = -+ ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) + -+ (((int)ecram_read(ecram, model->registers->EXT_FAN1_RPM_MSB)) -+ << 8); -+ values->fan2_rpm = -+ ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) + -+ (((int)ecram_read(ecram, model->registers->EXT_FAN2_RPM_MSB)) -+ << 8); -+ -+ values->cpu_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_CPU_TEMP_INPUT); -+ values->gpu_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_GPU_TEMP_INPUT); -+ values->ic_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_IC_TEMP_INPUT); -+ -+ values->cpu_temp_celsius = ecram_read(ecram, 0xC5E6); -+ values->gpu_temp_celsius = ecram_read(ecram, 0xC5E7); -+ values->ic_temp_celsius = ecram_read(ecram, 0xC5E8); -+ -+ return 0; -+} -+ -+static ssize_t ec_read_temperature(struct ecram *ecram, -+ const struct model_config *model, -+ int sensor_id, int *temperature) -+{ -+ int err = 0; -+ unsigned long res; -+ -+ if (sensor_id == 0) { -+ res = ecram_read(ecram, 0xC5E6); -+ } else if (sensor_id == 1) { -+ res = ecram_read(ecram, 0xC5E7); -+ } else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ if (!err) -+ *temperature = res; -+ return err; -+} -+ -+static ssize_t ec_read_fanspeed(struct ecram *ecram, -+ const struct model_config *model, int fan_id, -+ int *fanspeed_rpm) -+{ -+ int err = 0; -+ unsigned long res; -+ -+ if (fan_id == 0) { -+ res = ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) + -+ (((int)ecram_read(ecram, -+ model->registers->EXT_FAN1_RPM_MSB)) -+ << 8); -+ } else if (fan_id == 1) { -+ res = ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) + -+ (((int)ecram_read(ecram, -+ model->registers->EXT_FAN2_RPM_MSB)) -+ << 8); -+ } else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ if (!err) -+ *fanspeed_rpm = res; -+ return err; -+} -+ -+// '\_SB.PCI0.LPC0.EC0.FANS -+#define ACPI_PATH_FAN_SPEED1 "FANS" -+// '\_SB.PCI0.LPC0.EC0.FA2S -+#define ACPI_PATH_FAN_SPEED2 "FA2S" -+ -+static ssize_t acpi_read_fanspeed(struct legion_private *priv, int fan_id, -+ int *value) -+{ -+ int err; -+ unsigned long acpi_value; -+ const char *acpi_path; -+ -+ if (fan_id == 0) { -+ acpi_path = ACPI_PATH_FAN_SPEED1; -+ } else if (fan_id == 1) { -+ acpi_path = ACPI_PATH_FAN_SPEED2; -+ } else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ err = eval_int(priv->adev->handle, acpi_path, &acpi_value); -+ if (!err) -+ *value = (int)acpi_value * 100; -+ return err; -+} -+ -+// '\_SB.PCI0.LPC0.EC0.CPUT -+#define ACPI_PATH_CPU_TEMP "CPUT" -+// '\_SB.PCI0.LPC0.EC0.GPUT -+#define ACPI_PATH_GPU_TEMP "GPUT" -+ -+static ssize_t acpi_read_temperature(struct legion_private *priv, int fan_id, -+ int *value) -+{ -+ int err; -+ unsigned long acpi_value; -+ const char *acpi_path; -+ -+ if (fan_id == 0) { -+ acpi_path = ACPI_PATH_CPU_TEMP; -+ } else if (fan_id == 1) { -+ acpi_path = ACPI_PATH_GPU_TEMP; -+ } else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ err = eval_int(priv->adev->handle, acpi_path, &acpi_value); -+ if (!err) -+ *value = (int)acpi_value; -+ return err; -+} -+ -+// fan_id: 0 or 1 -+static ssize_t wmi_read_fanspeed(int fan_id, int *fanspeed_rpm) -+{ -+ int err; -+ unsigned long res; -+ struct acpi_buffer params; -+ -+ params.length = 1; -+ params.pointer = &fan_id; -+ -+ err = wmi_exec_int(WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_GETCURRENTFANSPEED, ¶ms, &res); -+ -+ if (!err) -+ *fanspeed_rpm = res; -+ return err; -+} -+ -+//sensor_id: cpu = 0, gpu = 1 -+static ssize_t wmi_read_temperature(int sensor_id, int *temperature) -+{ -+ int err; -+ unsigned long res; -+ struct acpi_buffer params; -+ -+ if (sensor_id == 0) -+ sensor_id = 0x03; -+ else if (sensor_id == 1) -+ sensor_id = 0x04; -+ else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ -+ params.length = 1; -+ params.pointer = &sensor_id; -+ -+ err = wmi_exec_int(WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_GETCURRENTSENSORTEMPERATURE, -+ ¶ms, &res); -+ -+ if (!err) -+ *temperature = res; -+ return err; -+} -+ -+// fan_id: 0 or 1 -+static ssize_t wmi_read_fanspeed_gz(int fan_id, int *fanspeed_rpm) -+{ -+ int err; -+ u32 method_id; -+ unsigned long res; -+ -+ if (fan_id == 0) -+ method_id = WMI_METHOD_ID_GETFAN1SPEED; -+ else if (fan_id == 1) -+ method_id = WMI_METHOD_ID_GETFAN2SPEED; -+ else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, method_id, &res); -+ -+ if (!err) -+ *fanspeed_rpm = res; -+ return err; -+} -+ -+//sensor_id: cpu = 0, gpu = 1 -+static ssize_t wmi_read_temperature_gz(int sensor_id, int *temperature) -+{ -+ int err; -+ u32 method_id; -+ unsigned long res; -+ -+ if (sensor_id == 0) -+ method_id = WMI_METHOD_ID_GETCPUTEMP; -+ else if (sensor_id == 1) -+ method_id = WMI_METHOD_ID_GETGPUTEMP; -+ else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ -+ err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, method_id, &res); -+ -+ if (!err) -+ *temperature = res; -+ return err; -+} -+ -+// fan_id: 0 or 1 -+static ssize_t wmi_read_fanspeed_other(int fan_id, int *fanspeed_rpm) -+{ -+ int err; -+ enum OtherMethodFeature featured_id; -+ int res; -+ -+ if (fan_id == 0) -+ featured_id = OtherMethodFeature_FAN_SPEED_1; -+ else if (fan_id == 1) -+ featured_id = OtherMethodFeature_FAN_SPEED_2; -+ else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ -+ err = wmi_other_method_get_value(featured_id, &res); -+ -+ if (!err) -+ *fanspeed_rpm = res; -+ return err; -+} -+ -+//sensor_id: cpu = 0, gpu = 1 -+static ssize_t wmi_read_temperature_other(int sensor_id, int *temperature) -+{ -+ int err; -+ enum OtherMethodFeature featured_id; -+ int res; -+ -+ if (sensor_id == 0) -+ featured_id = OtherMethodFeature_TEMP_CPU; -+ else if (sensor_id == 1) -+ featured_id = OtherMethodFeature_TEMP_GPU; -+ else { -+ // TODO: use all correct error codes -+ return -EEXIST; -+ } -+ -+ err = wmi_other_method_get_value(featured_id, &res); -+ if (!err) -+ *temperature = res; -+ return err; -+} -+ -+static ssize_t read_fanspeed(struct legion_private *priv, int fan_id, -+ int *speed_rpm) -+{ -+ // TODO: use enums or function pointers? -+ switch (priv->conf->access_method_fanspeed) { -+ case ACCESS_METHOD_EC: -+ return ec_read_fanspeed(&priv->ecram, priv->conf, fan_id, -+ speed_rpm); -+ case ACCESS_METHOD_ACPI: -+ return acpi_read_fanspeed(priv, fan_id, speed_rpm); -+ case ACCESS_METHOD_WMI: -+ return wmi_read_fanspeed_gz(fan_id, speed_rpm); -+ case ACCESS_METHOD_WMI2: -+ return wmi_read_fanspeed(fan_id, speed_rpm); -+ case ACCESS_METHOD_WMI3: -+ return wmi_read_fanspeed_other(fan_id, speed_rpm); -+ default: -+ pr_info("No access method for fanspeed: %d\n", -+ priv->conf->access_method_fanspeed); -+ return -EINVAL; -+ } -+} -+ -+static ssize_t read_temperature(struct legion_private *priv, int sensor_id, -+ int *temperature) -+{ -+ // TODO: use enums or function pointers? -+ switch (priv->conf->access_method_temperature) { -+ case ACCESS_METHOD_EC: -+ return ec_read_temperature(&priv->ecram, priv->conf, sensor_id, -+ temperature); -+ case ACCESS_METHOD_ACPI: -+ return acpi_read_temperature(priv, sensor_id, temperature); -+ case ACCESS_METHOD_WMI: -+ return wmi_read_temperature_gz(sensor_id, temperature); -+ case ACCESS_METHOD_WMI2: -+ return wmi_read_temperature(sensor_id, temperature); -+ case ACCESS_METHOD_WMI3: -+ return wmi_read_temperature_other(sensor_id, temperature); -+ default: -+ pr_info("No access method for temperature: %d\n", -+ priv->conf->access_method_temperature); -+ return -EINVAL; -+ } -+} -+ -+/* ============================= */ -+/* Fancurve reading/writing */ -+/* ============================= */ -+ -+/* Fancurve from WMI -+ * This allows changing fewer parameters. -+ * It is only available on newer models. -+ */ -+ -+struct WMIFanTable { -+ u8 FSTM; //FSMD -+ u8 FSID; -+ u32 FSTL; //FSST -+ u16 FSS0; -+ u16 FSS1; -+ u16 FSS2; -+ u16 FSS3; -+ u16 FSS4; -+ u16 FSS5; -+ u16 FSS6; -+ u16 FSS7; -+ u16 FSS8; -+ u16 FSS9; -+} __packed; -+ -+struct WMIFanTableRead { -+ u32 FSFL; -+ u32 FSS0; -+ u32 FSS1; -+ u32 FSS2; -+ u32 FSS3; -+ u32 FSS4; -+ u32 FSS5; -+ u32 FSS6; -+ u32 FSS7; -+ u32 FSS8; -+ u32 FSS9; -+ u32 FSSA; -+} __packed; -+ -+static ssize_t wmi_read_fancurve_custom(const struct model_config *model, -+ struct fancurve *fancurve) -+{ -+ u8 buffer[88]; -+ int err; -+ -+ // The output buffer from the ACPI call is 88 bytes and larger -+ // than the returned object -+ pr_info("Size of object: %lu\n", sizeof(struct WMIFanTableRead)); -+ err = wmi_exec_noarg_ints(WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_GET_TABLE, buffer, -+ sizeof(buffer)); -+ print_hex_dump(KERN_INFO, "legion_laptop fan table wmi buffer", -+ DUMP_PREFIX_ADDRESS, 16, 1, buffer, sizeof(buffer), -+ true); -+ if (!err) { -+ struct WMIFanTableRead *fantable = -+ (struct WMIFanTableRead *)&buffer[0]; -+ fancurve->current_point_i = 0; -+ fancurve->size = 10; -+ fancurve->points[0].rpm1_raw = fantable->FSS0; -+ fancurve->points[1].rpm1_raw = fantable->FSS1; -+ fancurve->points[2].rpm1_raw = fantable->FSS2; -+ fancurve->points[3].rpm1_raw = fantable->FSS3; -+ fancurve->points[4].rpm1_raw = fantable->FSS4; -+ fancurve->points[5].rpm1_raw = fantable->FSS5; -+ fancurve->points[6].rpm1_raw = fantable->FSS6; -+ fancurve->points[7].rpm1_raw = fantable->FSS7; -+ fancurve->points[8].rpm1_raw = fantable->FSS8; -+ fancurve->points[9].rpm1_raw = fantable->FSS9; -+ //fancurve->points[10].rpm1_raw = fantable->FSSA; -+ } -+ return err; -+} -+ -+static ssize_t wmi_write_fancurve_custom(const struct model_config *model, -+ const struct fancurve *fancurve) -+{ -+ u8 buffer[0x20]; -+ int err; -+ -+ // The buffer is read like this in ACPI firmware -+ // -+ // CreateByteField (Arg2, Zero, FSTM) -+ // CreateByteField (Arg2, One, FSID) -+ // CreateDWordField (Arg2, 0x02, FSTL) -+ // CreateByteField (Arg2, 0x06, FSS0) -+ // CreateByteField (Arg2, 0x08, FSS1) -+ // CreateByteField (Arg2, 0x0A, FSS2) -+ // CreateByteField (Arg2, 0x0C, FSS3) -+ // CreateByteField (Arg2, 0x0E, FSS4) -+ // CreateByteField (Arg2, 0x10, FSS5) -+ // CreateByteField (Arg2, 0x12, FSS6) -+ // CreateByteField (Arg2, 0x14, FSS7) -+ // CreateByteField (Arg2, 0x16, FSS8) -+ // CreateByteField (Arg2, 0x18, FSS9) -+ -+ memset(buffer, 0, sizeof(buffer)); -+ buffer[0x06] = fancurve->points[0].rpm1_raw; -+ buffer[0x08] = fancurve->points[1].rpm1_raw; -+ buffer[0x0A] = fancurve->points[2].rpm1_raw; -+ buffer[0x0C] = fancurve->points[3].rpm1_raw; -+ buffer[0x0E] = fancurve->points[4].rpm1_raw; -+ buffer[0x10] = fancurve->points[5].rpm1_raw; -+ buffer[0x12] = fancurve->points[6].rpm1_raw; -+ buffer[0x14] = fancurve->points[7].rpm1_raw; -+ buffer[0x16] = fancurve->points[8].rpm1_raw; -+ buffer[0x18] = fancurve->points[9].rpm1_raw; -+ -+ print_hex_dump(KERN_INFO, "legion_laptop fan table wmi write buffer", -+ DUMP_PREFIX_ADDRESS, 16, 1, buffer, sizeof(buffer), -+ true); -+ err = wmi_exec_arg(WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_SET_TABLE, buffer, sizeof(buffer)); -+ return err; -+} -+ -+/* Read the fan curve from the EC. -+ * -+ * In newer models (>=2022) there is an ACPI/WMI to read fan curve as -+ * a whole. So read/write fan table as a whole to use the -+ * same interface for both cases. -+ * -+ * It reads all points from EC memory, even if stored fancurve is smaller, so -+ * it can contain 0 entries. -+ */ -+static int ec_read_fancurve_legion(struct ecram *ecram, -+ const struct model_config *model, -+ struct fancurve *fancurve) -+{ -+ size_t i = 0; -+ -+ for (i = 0; i < MAXFANCURVESIZE; ++i) { -+ struct fancurve_point *point = &fancurve->points[i]; -+ -+ point->rpm1_raw = -+ ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); -+ point->rpm2_raw = -+ ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); -+ -+ point->accel = ecram_read( -+ ecram, model->registers->EXT_FAN_ACC_BASE + i); -+ point->decel = ecram_read( -+ ecram, model->registers->EXT_FAN_DEC_BASE + i); -+ point->cpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_CPU_TEMP + i); -+ point->cpu_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_CPU_TEMP_HYST + i); -+ point->gpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_GPU_TEMP + i); -+ point->gpu_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_GPU_TEMP_HYST + i); -+ point->ic_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_VRM_TEMP + i); -+ point->ic_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_VRM_TEMP_HYST + i); -+ } -+ -+ // Do not trust that hardware; It might suddenly report -+ // a larger size, so clamp it. -+ fancurve->size = -+ ecram_read(ecram, model->registers->EXT_FAN_POINTS_SIZE); -+ fancurve->size = -+ min(fancurve->size, (typeof(fancurve->size))(MAXFANCURVESIZE)); -+ fancurve->current_point_i = -+ ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); -+ fancurve->current_point_i = -+ min(fancurve->current_point_i, fancurve->size); -+ return 0; -+} -+ -+static int ec_write_fancurve_legion(struct ecram *ecram, -+ const struct model_config *model, -+ const struct fancurve *fancurve, -+ bool write_size) -+{ -+ size_t i; -+ -+ // Reset fan update counters (try to avoid any race conditions) -+ ecram_write(ecram, 0xC5FE, 0); -+ ecram_write(ecram, 0xC5FF, 0); -+ for (i = 0; i < MAXFANCURVESIZE; ++i) { -+ // Entries for points larger than fancurve size should be cleared -+ // to 0 -+ const struct fancurve_point *point = -+ i < fancurve->size ? &fancurve->points[i] : -+ &fancurve_point_zero; -+ -+ ecram_write(ecram, model->registers->EXT_FAN1_BASE + i, -+ point->rpm1_raw); -+ ecram_write(ecram, model->registers->EXT_FAN2_BASE + i, -+ point->rpm2_raw); -+ -+ ecram_write(ecram, model->registers->EXT_FAN_ACC_BASE + i, -+ point->accel); -+ ecram_write(ecram, model->registers->EXT_FAN_DEC_BASE + i, -+ point->decel); -+ -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP + i, -+ point->cpu_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i, -+ point->cpu_min_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP + i, -+ point->gpu_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i, -+ point->gpu_min_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_VRM_TEMP + i, -+ point->ic_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_VRM_TEMP_HYST + i, -+ point->ic_min_temp_celsius); -+ } -+ -+ if (write_size) { -+ ecram_write(ecram, model->registers->EXT_FAN_POINTS_SIZE, -+ fancurve->size); -+ } -+ -+ // Reset current fan level to 0, so algorithm in EC -+ // selects fan curve point again and resetting hysterisis -+ // effects -+ ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0); -+ -+ // Reset internal fan levels -+ ecram_write(ecram, 0xC634, 0); // CPU -+ ecram_write(ecram, 0xC635, 0); // GPU -+ ecram_write(ecram, 0xC636, 0); // SENSOR -+ -+ return 0; -+} -+ -+#define FANCURVESIZE_IDEAPDAD 8 -+ -+static int ec_read_fancurve_ideapad(struct ecram *ecram, -+ const struct model_config *model, -+ struct fancurve *fancurve) -+{ -+ size_t i = 0; -+ -+ for (i = 0; i < FANCURVESIZE_IDEAPDAD; ++i) { -+ struct fancurve_point *point = &fancurve->points[i]; -+ -+ point->rpm1_raw = -+ ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); -+ point->rpm2_raw = -+ ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); -+ -+ point->accel = 0; -+ point->decel = 0; -+ point->cpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_CPU_TEMP + i); -+ point->cpu_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_CPU_TEMP_HYST + i); -+ point->gpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_GPU_TEMP + i); -+ point->gpu_min_temp_celsius = ecram_read( -+ ecram, model->registers->EXT_GPU_TEMP_HYST + i); -+ point->ic_max_temp_celsius = 0; -+ point->ic_min_temp_celsius = 0; -+ } -+ -+ // Do not trust that hardware; It might suddenly report -+ // a larger size, so clamp it. -+ fancurve->size = FANCURVESIZE_IDEAPDAD; -+ fancurve->current_point_i = -+ ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); -+ fancurve->current_point_i = -+ min(fancurve->current_point_i, fancurve->size); -+ return 0; -+} -+ -+static int ec_write_fancurve_ideapad(struct ecram *ecram, -+ const struct model_config *model, -+ const struct fancurve *fancurve) -+{ -+ size_t i; -+ int valr1; -+ int valr2; -+ -+ // add this later: maybe other addresses needed -+ // therefore, fan curve might not be effective immediately but -+ // only after temp change -+ // Reset fan update counters (try to avoid any race conditions) -+ ecram_write(ecram, 0xC5FE, 0); -+ ecram_write(ecram, 0xC5FF, 0); -+ for (i = 0; i < FANCURVESIZE_IDEAPDAD; ++i) { -+ const struct fancurve_point *point = &fancurve->points[i]; -+ -+ ecram_write(ecram, model->registers->EXT_FAN1_BASE + i, -+ point->rpm1_raw); -+ valr1 = ecram_read(ecram, model->registers->EXT_FAN1_BASE + i); -+ ecram_write(ecram, model->registers->EXT_FAN2_BASE + i, -+ point->rpm2_raw); -+ valr2 = ecram_read(ecram, model->registers->EXT_FAN2_BASE + i); -+ pr_info("Writing fan1: %d; reading fan1: %d\n", point->rpm1_raw, -+ valr1); -+ pr_info("Writing fan2: %d; reading fan2: %d\n", point->rpm2_raw, -+ valr2); -+ -+ // write to memory and repeat 8 bytes later again -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP + i, -+ point->cpu_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP + 8 + i, -+ point->cpu_max_temp_celsius); -+ // write to memory and repeat 8 bytes later again -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i, -+ point->cpu_min_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + 8 + i, -+ point->cpu_min_temp_celsius); -+ // write to memory and repeat 8 bytes later again -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP + i, -+ point->gpu_max_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP + 8 + i, -+ point->gpu_max_temp_celsius); -+ // write to memory and repeat 8 bytes later again -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i, -+ point->gpu_min_temp_celsius); -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + 8 + i, -+ point->gpu_min_temp_celsius); -+ } -+ -+ // add this later: maybe other addresses needed -+ // therefore, fan curve might not be effective immediately but -+ // only after temp change -+ // // Reset current fan level to 0, so algorithm in EC -+ // // selects fan curve point again and resetting hysterisis -+ // // effects -+ // ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0); -+ -+ // // Reset internal fan levels -+ // ecram_write(ecram, 0xC634, 0); // CPU -+ // ecram_write(ecram, 0xC635, 0); // GPU -+ // ecram_write(ecram, 0xC636, 0); // SENSOR -+ -+ return 0; -+} -+ -+#define FANCURVESIZE_LOQ 10 -+ -+static int ec_read_fancurve_loq(struct ecram *ecram, -+ const struct model_config *model, -+ struct fancurve *fancurve) -+{ -+ size_t i = 0; -+ size_t struct_offset = 3; // {cpu_temp: u8, rpm: u8, gpu_temp?: u8} -+ -+ for (i = 0; i < FANCURVESIZE_LOQ; ++i) { -+ struct fancurve_point *point = &fancurve->points[i]; -+ -+ point->rpm1_raw = -+ ecram_read(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset)); -+ point->rpm2_raw = -+ ecram_read(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset)); -+ -+ point->accel = 0; -+ point->decel = 0; -+ point->cpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_CPU_TEMP + (i * struct_offset)); -+ point->gpu_max_temp_celsius = -+ ecram_read(ecram, model->registers->EXT_GPU_TEMP + (i * struct_offset)); -+ point->cpu_min_temp_celsius = 0; -+ point->gpu_min_temp_celsius = 0; -+ point->ic_max_temp_celsius = 0; -+ point->ic_min_temp_celsius = 0; -+ } -+ -+ fancurve->size = FANCURVESIZE_LOQ; -+ fancurve->current_point_i = -+ ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT); -+ fancurve->current_point_i = -+ min(fancurve->current_point_i, fancurve->size); -+ return 0; -+} -+ -+static int ec_write_fancurve_loq(struct ecram *ecram, -+ const struct model_config *model, -+ const struct fancurve *fancurve) -+{ -+ size_t i; -+ int valr1; -+ int valr2; -+ size_t struct_offset = 3; // {cpu_temp: u8, rpm: u8, gpu_temp?: u8} -+ -+ for (i = 0; i < FANCURVESIZE_LOQ; ++i) { -+ const struct fancurve_point *point = &fancurve->points[i]; -+ -+ ecram_write(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset), -+ point->rpm1_raw); -+ valr1 = ecram_read(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset)); -+ ecram_write(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset), -+ point->rpm2_raw); -+ valr2 = ecram_read(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset)); -+ pr_info("Writing fan1: %d; reading fan1: %d\n", point->rpm1_raw, -+ valr1); -+ pr_info("Writing fan2: %d; reading fan2: %d\n", point->rpm2_raw, -+ valr2); -+ -+ // write to memory and repeat 8 bytes later again -+ ecram_write(ecram, model->registers->EXT_CPU_TEMP + (i * struct_offset), -+ point->cpu_max_temp_celsius); -+ // write to memory and repeat 8 bytes later again -+ ecram_write(ecram, model->registers->EXT_GPU_TEMP + (i * struct_offset), -+ point->gpu_max_temp_celsius); -+ } -+ -+ return 0; -+} -+ -+static int read_fancurve(struct legion_private *priv, struct fancurve *fancurve) -+{ -+ // TODO: use enums or function pointers? -+ switch (priv->conf->access_method_fancurve) { -+ case ACCESS_METHOD_EC: -+ return ec_read_fancurve_legion(&priv->ecram, priv->conf, -+ fancurve); -+ case ACCESS_METHOD_EC2: -+ return ec_read_fancurve_ideapad(&priv->ecram, priv->conf, -+ fancurve); -+ case ACCESS_METHOD_EC3: -+ return ec_read_fancurve_loq(&priv->ecram, priv->conf, -+ fancurve); -+ case ACCESS_METHOD_WMI3: -+ return wmi_read_fancurve_custom(priv->conf, fancurve); -+ default: -+ pr_info("No access method for fancurve: %d\n", -+ priv->conf->access_method_fancurve); -+ return -EINVAL; -+ } -+} -+ -+static int write_fancurve(struct legion_private *priv, -+ const struct fancurve *fancurve, bool write_size) -+{ -+ // TODO: use enums or function pointers? -+ switch (priv->conf->access_method_fancurve) { -+ case ACCESS_METHOD_EC: -+ return ec_write_fancurve_legion(&priv->ecram, priv->conf, -+ fancurve, write_size); -+ case ACCESS_METHOD_EC2: -+ return ec_write_fancurve_ideapad(&priv->ecram, priv->conf, -+ fancurve); -+ case ACCESS_METHOD_EC3: -+ return ec_write_fancurve_loq(&priv->ecram, priv->conf, -+ fancurve); -+ case ACCESS_METHOD_WMI3: -+ return wmi_write_fancurve_custom(priv->conf, fancurve); -+ default: -+ pr_info("No access method for fancurve: %d\n", -+ priv->conf->access_method_fancurve); -+ return -EINVAL; -+ } -+} -+ -+#define MINIFANCUVE_ON_COOL_ON 0x04 -+#define MINIFANCUVE_ON_COOL_OFF 0xA0 -+ -+static int ec_read_minifancurve(struct ecram *ecram, -+ const struct model_config *model, bool *state) -+{ -+ int value = -+ ecram_read(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL); -+ -+ switch (value) { -+ case MINIFANCUVE_ON_COOL_ON: -+ *state = true; -+ break; -+ case MINIFANCUVE_ON_COOL_OFF: -+ *state = false; -+ break; -+ default: -+ pr_info("Unexpected value in MINIFANCURVE register: %d\n", -+ value); -+ return -1; -+ } -+ return 0; -+} -+ -+static ssize_t ec_write_minifancurve(struct ecram *ecram, -+ const struct model_config *model, -+ bool state) -+{ -+ u8 val = state ? MINIFANCUVE_ON_COOL_ON : MINIFANCUVE_ON_COOL_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL, val); -+ return 0; -+} -+ -+#define EC_LOCKFANCONTROLLER_ON 8 -+#define EC_LOCKFANCONTROLLER_OFF 0 -+ -+static ssize_t ec_write_lockfancontroller(struct ecram *ecram, -+ const struct model_config *model, -+ bool state) -+{ -+ u8 val = state ? EC_LOCKFANCONTROLLER_ON : EC_LOCKFANCONTROLLER_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_LOCKFANCONTROLLER, val); -+ return 0; -+} -+ -+static int ec_read_lockfancontroller(struct ecram *ecram, -+ const struct model_config *model, -+ bool *state) -+{ -+ int value = ecram_read(ecram, model->registers->EXT_LOCKFANCONTROLLER); -+ -+ switch (value) { -+ case EC_LOCKFANCONTROLLER_ON: -+ *state = true; -+ break; -+ case EC_LOCKFANCONTROLLER_OFF: -+ *state = false; -+ break; -+ default: -+ pr_info("Unexpected value in lockfanspeed register: %d\n", -+ value); -+ return -1; -+ } -+ return 0; -+} -+ -+#define EC_FANFULLSPEED_ON 0x40 -+#define EC_FANFULLSPEED_OFF 0x00 -+ -+static int ec_read_fanfullspeed(struct ecram *ecram, -+ const struct model_config *model, bool *state) -+{ -+ int value = ecram_read(ecram, model->registers->EXT_MAXIMUMFANSPEED); -+ -+ switch (value) { -+ case EC_FANFULLSPEED_ON: -+ *state = true; -+ break; -+ case EC_FANFULLSPEED_OFF: -+ *state = false; -+ break; -+ default: -+ pr_info("Unexpected value in maximumfanspeed register: %d\n", -+ value); -+ return -1; -+ } -+ return 0; -+} -+ -+static ssize_t ec_write_fanfullspeed(struct ecram *ecram, -+ const struct model_config *model, -+ bool state) -+{ -+ u8 val = state ? EC_FANFULLSPEED_ON : EC_FANFULLSPEED_OFF; -+ -+ ecram_write(ecram, model->registers->EXT_MAXIMUMFANSPEED, val); -+ return 0; -+} -+ -+static ssize_t wmi_read_fanfullspeed(struct legion_private *priv, bool *state) -+{ -+ return get_simple_wmi_attribute_bool(priv, WMI_GUID_LENOVO_FAN_METHOD, -+ 0, WMI_METHOD_ID_FAN_GET_FULLSPEED, -+ false, 1, state); -+} -+ -+static ssize_t wmi_write_fanfullspeed(struct legion_private *priv, bool state) -+{ -+ return set_simple_wmi_attribute(priv, WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_SET_FULLSPEED, false, -+ 1, state); -+} -+ -+static ssize_t read_fanfullspeed(struct legion_private *priv, bool *state) -+{ -+ // TODO: use enums or function pointers? -+ switch (priv->conf->access_method_fanfullspeed) { -+ case ACCESS_METHOD_EC: -+ return ec_read_fanfullspeed(&priv->ecram, priv->conf, state); -+ case ACCESS_METHOD_WMI: -+ return wmi_read_fanfullspeed(priv, state); -+ default: -+ pr_info("No access method for fan full speed: %d\n", -+ priv->conf->access_method_fanfullspeed); -+ return -EINVAL; -+ } -+} -+ -+static ssize_t write_fanfullspeed(struct legion_private *priv, bool state) -+{ -+ ssize_t res; -+ -+ switch (priv->conf->access_method_fanfullspeed) { -+ case ACCESS_METHOD_EC: -+ res = ec_write_fanfullspeed(&priv->ecram, priv->conf, state); -+ return res; -+ case ACCESS_METHOD_WMI: -+ return wmi_write_fanfullspeed(priv, state); -+ default: -+ pr_info("No access method for fan full speed: %d\n", -+ priv->conf->access_method_fanfullspeed); -+ return -EINVAL; -+ } -+} -+ -+/* ============================= */ -+/* Power mode reading/writing */ -+/* ============================= */ -+ -+enum legion_ec_powermode { -+ LEGION_EC_POWERMODE_QUIET = 2, -+ LEGION_EC_POWERMODE_BALANCED = 0, -+ LEGION_EC_POWERMODE_PERFORMANCE = 1, -+ LEGION_EC_POWERMODE_CUSTOM = 3 -+}; -+ -+enum legion_wmi_powermode { -+ LEGION_WMI_POWERMODE_QUIET = 1, -+ LEGION_WMI_POWERMODE_BALANCED = 2, -+ LEGION_WMI_POWERMODE_PERFORMANCE = 3, -+ LEGION_WMI_POWERMODE_CUSTOM = 255 -+}; -+ -+enum legion_wmi_powermode ec_to_wmi_powermode(int ec_mode) -+{ -+ switch (ec_mode) { -+ case LEGION_EC_POWERMODE_QUIET: -+ return LEGION_WMI_POWERMODE_QUIET; -+ case LEGION_EC_POWERMODE_BALANCED: -+ return LEGION_WMI_POWERMODE_BALANCED; -+ case LEGION_EC_POWERMODE_PERFORMANCE: -+ return LEGION_WMI_POWERMODE_PERFORMANCE; -+ case LEGION_EC_POWERMODE_CUSTOM: -+ return LEGION_WMI_POWERMODE_CUSTOM; -+ default: -+ return LEGION_WMI_POWERMODE_BALANCED; -+ } -+} -+ -+enum legion_ec_powermode wmi_to_ec_powermode(enum legion_wmi_powermode wmi_mode) -+{ -+ switch (wmi_mode) { -+ case LEGION_WMI_POWERMODE_QUIET: -+ return LEGION_EC_POWERMODE_QUIET; -+ case LEGION_WMI_POWERMODE_BALANCED: -+ return LEGION_EC_POWERMODE_BALANCED; -+ case LEGION_WMI_POWERMODE_PERFORMANCE: -+ return LEGION_EC_POWERMODE_PERFORMANCE; -+ case LEGION_WMI_POWERMODE_CUSTOM: -+ return LEGION_EC_POWERMODE_CUSTOM; -+ default: -+ return LEGION_EC_POWERMODE_BALANCED; -+ } -+} -+ -+static ssize_t ec_read_powermode(struct legion_private *priv, int *powermode) -+{ -+ *powermode = -+ ecram_read(&priv->ecram, priv->conf->registers->EXT_POWERMODE); -+ return 0; -+} -+ -+static ssize_t ec_write_powermode(struct legion_private *priv, u8 value) -+{ -+ if (!((value >= 0 && value <= 2) || value == 255)) { -+ pr_info("Unexpected power mode value ignored: %d\n", value); -+ return -ENOMEM; -+ } -+ ecram_write(&priv->ecram, priv->conf->registers->EXT_POWERMODE, value); -+ return 0; -+} -+ -+static ssize_t acpi_read_powermode(struct legion_private *priv, int *powermode) -+{ -+ unsigned long acpi_powermode; -+ int err; -+ -+ // spmo method not always available -+ // \_SB.PCI0.LPC0.EC0.SPMO -+ err = eval_spmo(priv->adev->handle, &acpi_powermode); -+ *powermode = (int)acpi_powermode; -+ return err; -+} -+ -+static ssize_t wmi_read_powermode(int *powermode) -+{ -+ int err; -+ unsigned long res; -+ -+ err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETSMARTFANMODE, &res); -+ -+ if (!err) -+ *powermode = res; -+ return err; -+} -+ -+static ssize_t wmi_write_powermode(u8 value) -+{ -+ if (!((value >= LEGION_WMI_POWERMODE_QUIET && -+ value <= LEGION_WMI_POWERMODE_PERFORMANCE) || -+ value == LEGION_WMI_POWERMODE_CUSTOM)) { -+ pr_info("Unexpected power mode value ignored: %d\n", value); -+ return -ENOMEM; -+ } -+ return wmi_exec_arg(LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_SETSMARTFANMODE, &value, -+ sizeof(value)); -+} -+ -+static ssize_t read_powermode(struct legion_private *priv, int *powermode) -+{ -+ ssize_t res; -+ -+ switch (priv->conf->access_method_powermode) { -+ case ACCESS_METHOD_EC: -+ res = ec_read_powermode(priv, powermode); -+ *powermode = ec_to_wmi_powermode(*powermode); -+ return res; -+ case ACCESS_METHOD_ACPI: -+ return acpi_read_powermode(priv, powermode); -+ case ACCESS_METHOD_WMI: -+ return wmi_read_powermode(powermode); -+ default: -+ pr_info("No access method for powermode: %d\n", -+ priv->conf->access_method_powermode); -+ return -EINVAL; -+ } -+} -+ -+static ssize_t write_powermode(struct legion_private *priv, -+ enum legion_wmi_powermode value) -+{ -+ ssize_t res; -+ -+ //TODO: remove again -+ pr_info("Set powermode\n"); -+ -+ switch (priv->conf->access_method_powermode) { -+ case ACCESS_METHOD_EC: -+ res = ec_write_powermode(priv, wmi_to_ec_powermode(value)); -+ return res; -+ case ACCESS_METHOD_WMI: -+ return wmi_write_powermode(value); -+ default: -+ pr_info("No access method for powermode: %d\n", -+ priv->conf->access_method_powermode); -+ return -EINVAL; -+ } -+} -+ -+/** -+ * Shortly toggle powermode to a different mode -+ * and switch back, e.g. to reset fan curve. -+ */ -+static void toggle_powermode(struct legion_private *priv) -+{ -+ int old_powermode; -+ int next_powermode; -+ -+ read_powermode(priv, &old_powermode); -+ next_powermode = old_powermode == 0 ? 1 : 0; -+ -+ write_powermode(priv, next_powermode); -+ mdelay(1500); -+ write_powermode(priv, old_powermode); -+} -+ -+/* ============================= */ -+/* Charging mode reading/writing */ -+/* ============================- */ -+ -+#define FCT_RAPID_CHARGE_ON 0x07 -+#define FCT_RAPID_CHARGE_OFF 0x08 -+#define RAPID_CHARGE_ON 0x0 -+#define RAPID_CHARGE_OFF 0x1 -+ -+static int acpi_read_rapidcharge(struct acpi_device *adev, bool *state) -+{ -+ unsigned long result; -+ int err; -+ -+ //also works? which one is better? -+ /* -+ * err = eval_qcho(adev->handle, &result); -+ * if (err) -+ * return err; -+ * state = result; -+ * return 0; -+ */ -+ -+ err = eval_gbmd(adev->handle, &result); -+ if (err) -+ return err; -+ -+ *state = result & 0x04; -+ return 0; -+} -+ -+static int acpi_write_rapidcharge(struct acpi_device *adev, bool state) -+{ -+ int err; -+ unsigned long fct_nr = state > 0 ? FCT_RAPID_CHARGE_ON : -+ FCT_RAPID_CHARGE_OFF; -+ -+ err = exec_sbmc(adev->handle, fct_nr); -+ pr_info("Set rapidcharge to %d by calling %lu: result: %d\n", state, -+ fct_nr, err); -+ return err; -+} -+ -+/* ============================= */ -+/* Keyboard backlight read/write */ -+/* ============================= */ -+ -+static ssize_t legion_kbd_bl2_brightness_get(struct legion_private *priv) -+{ -+ unsigned long state = 0; -+ int err; -+ -+ err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETKEYBOARDLIGHT, &state); -+ if (err) -+ return -EINVAL; -+ -+ return state; -+} -+ -+//static int legion_kbd_bl2_brightness_set(struct legion_private *priv, -+// unsigned int brightness) -+//{ -+// u8 in_param = brightness; -+ -+// return wmi_exec_arg(LEGION_WMI_GAMEZONE_GUID, 0, -+// WMI_METHOD_ID_SETKEYBOARDLIGHT, &in_param, -+// sizeof(in_param)); -+//} -+ -+//min: 1, max: 3 -+#define LIGHT_ID_KEYBOARD 0x00 -+//min: 0, max: 1 -+#define LIGHT_ID_YLOGO 0x03 -+//min: 1, max: 2 -+#define LIGHT_ID_IOPORT 0x05 -+ -+static int legion_wmi_light_get(struct legion_private *priv, u8 light_id, -+ unsigned int min_value, unsigned int max_value) -+{ -+ struct acpi_buffer params; -+ u8 in; -+ u8 result[2]; -+ u8 value; -+ int err; -+ -+ params.length = 1; -+ params.pointer = ∈ -+ in = light_id; -+ err = wmi_exec_ints(LEGION_WMI_KBBACKLIGHT_GUID, 0, -+ WMI_METHOD_ID_KBBACKLIGHTGET, ¶ms, result, -+ ARRAY_SIZE(result)); -+ if (err) { -+ pr_info("Error for WMI method call to get brightness\n"); -+ return -EIO; -+ } -+ -+ value = result[1]; -+ if (!(value >= min_value && value <= max_value)) { -+ pr_info("Error WMI call for reading brightness: expected a value between %u and %u, but got %d\n", -+ min_value, max_value, value); -+ return -EFAULT; -+ } -+ -+ return value - min_value; -+} -+ -+static int legion_wmi_light_set(struct legion_private *priv, u8 light_id, -+ unsigned int min_value, unsigned int max_value, -+ unsigned int brightness) -+{ -+ struct acpi_buffer buffer; -+ u8 in_buffer_param[8]; -+ unsigned long result; -+ int err; -+ -+ buffer.length = 3; -+ buffer.pointer = &in_buffer_param[0]; -+ in_buffer_param[0] = light_id; -+ in_buffer_param[1] = 0x01; -+ in_buffer_param[2] = -+ clamp(brightness + min_value, min_value, max_value); -+ -+ err = wmi_exec_int(LEGION_WMI_KBBACKLIGHT_GUID, 0, -+ WMI_METHOD_ID_KBBACKLIGHTSET, &buffer, &result); -+ if (err) { -+ pr_info("Error for WMI method call to set brightness on light: %d\n", -+ light_id); -+ return -EIO; -+ } -+ -+ return 0; -+} -+ -+static int legion_kbd_bl_brightness_get(struct legion_private *priv) -+{ -+ return legion_wmi_light_get(priv, LIGHT_ID_KEYBOARD, 1, 3); -+} -+ -+static int legion_kbd_bl_brightness_set(struct legion_private *priv, -+ unsigned int brightness) -+{ -+ return legion_wmi_light_set(priv, LIGHT_ID_KEYBOARD, 1, 3, brightness); -+} -+ -+/* ============================= */ -+/* debugfs interface */ -+/* ============================ */ -+ -+static int debugfs_ecmemory_show(struct seq_file *s, void *unused) -+{ -+ struct legion_private *priv = s->private; -+ size_t offset; -+ -+ for (offset = 0; offset < priv->conf->memoryio_size; ++offset) { -+ char value = ecram_read(&priv->ecram, -+ priv->conf->memoryio_physical_ec_start + -+ offset); -+ -+ seq_write(s, &value, 1); -+ } -+ return 0; -+} -+ -+DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemory); -+ -+static int debugfs_ecmemoryram_show(struct seq_file *s, void *unused) -+{ -+ struct legion_private *priv = s->private; -+ size_t offset; -+ ssize_t err; -+ u8 value; -+ -+ for (offset = 0; offset < priv->conf->ramio_size; ++offset) { -+ err = ecram_memoryio_read(&priv->ec_memoryio, offset, &value); -+ if (!err) -+ seq_write(s, &value, 1); -+ else -+ return -EACCES; -+ } -+ return 0; -+} -+ -+DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemoryram); -+ -+//TODO: make (almost) all methods static -+ -+static void seq_file_print_with_error(struct seq_file *s, const char *name, -+ ssize_t err, int value) -+{ -+ seq_printf(s, "%s error: %ld\n", name, err); -+ seq_printf(s, "%s: %d\n", name, value); -+} -+ -+static int debugfs_fancurve_show(struct seq_file *s, void *unused) -+{ -+ struct legion_private *priv = s->private; -+ bool is_minifancurve; -+ bool is_lockfancontroller; -+ bool is_maximumfanspeed; -+ bool is_rapidcharge = false; -+ int powermode; -+ int temperature; -+ int fanspeed; -+ int err; -+ unsigned long cfg; -+ struct fancurve wmi_fancurve; -+ //int kb_backlight; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ -+ seq_printf(s, "EC Chip ID: %x\n", read_ec_id(&priv->ecram, priv->conf)); -+ seq_printf(s, "EC Chip Version: %x\n", -+ read_ec_version(&priv->ecram, priv->conf)); -+ seq_printf(s, "legion_laptop features: %s\n", LEGIONFEATURES); -+ seq_printf(s, "legion_laptop ec_readonly: %d\n", ec_readonly); -+ -+ err = eval_int(priv->adev->handle, "VPC0._CFG", &cfg); -+ seq_printf(s, "ACPI CFG error: %d\n", err); -+ seq_printf(s, "ACPI CFG: %lu\n", cfg); -+ -+ seq_printf(s, "temperature access method: %d\n", -+ priv->conf->access_method_temperature); -+ err = read_temperature(priv, 0, &temperature); -+ seq_file_print_with_error(s, "CPU temperature", err, temperature); -+ err = ec_read_temperature(&priv->ecram, priv->conf, 0, &temperature); -+ seq_file_print_with_error(s, "CPU temperature EC", err, temperature); -+ err = acpi_read_temperature(priv, 0, &temperature); -+ seq_file_print_with_error(s, "CPU temperature ACPI", err, temperature); -+ err = wmi_read_temperature_gz(0, &temperature); -+ seq_file_print_with_error(s, "CPU temperature WMI", err, temperature); -+ err = wmi_read_temperature(0, &temperature); -+ seq_file_print_with_error(s, "CPU temperature WMI2", err, temperature); -+ err = wmi_read_temperature_other(0, &temperature); -+ seq_file_print_with_error(s, "CPU temperature WMI3", err, temperature); -+ -+ err = read_temperature(priv, 1, &temperature); -+ seq_file_print_with_error(s, "GPU temperature", err, temperature); -+ err = ec_read_temperature(&priv->ecram, priv->conf, 1, &temperature); -+ seq_file_print_with_error(s, "GPU temperature EC", err, temperature); -+ err = acpi_read_temperature(priv, 1, &temperature); -+ seq_file_print_with_error(s, "GPU temperature ACPI", err, temperature); -+ err = wmi_read_temperature_gz(1, &temperature); -+ seq_file_print_with_error(s, "GPU temperature WMI", err, temperature); -+ err = wmi_read_temperature(1, &temperature); -+ seq_file_print_with_error(s, "GPU temperature WMI2", err, temperature); -+ err = wmi_read_temperature_other(1, &temperature); -+ seq_file_print_with_error(s, "GPU temperature WMI3", err, temperature); -+ -+ seq_printf(s, "fan speed access method: %d\n", -+ priv->conf->access_method_fanspeed); -+ err = read_fanspeed(priv, 0, &fanspeed); -+ seq_file_print_with_error(s, "1 fanspeed", err, fanspeed); -+ err = ec_read_fanspeed(&priv->ecram, priv->conf, 0, &fanspeed); -+ seq_file_print_with_error(s, "1 fanspeed EC", err, fanspeed); -+ err = acpi_read_fanspeed(priv, 0, &fanspeed); -+ seq_file_print_with_error(s, "1 fanspeed ACPI", err, fanspeed); -+ err = wmi_read_fanspeed_gz(0, &fanspeed); -+ seq_file_print_with_error(s, "1 fanspeed WMI", err, fanspeed); -+ err = wmi_read_fanspeed(0, &fanspeed); -+ seq_file_print_with_error(s, "1 fanspeed WMI2", err, fanspeed); -+ err = wmi_read_fanspeed_other(0, &fanspeed); -+ seq_file_print_with_error(s, "1 fanspeed WMI3", err, fanspeed); -+ -+ err = read_fanspeed(priv, 1, &fanspeed); -+ seq_file_print_with_error(s, "2 fanspeed", err, fanspeed); -+ err = ec_read_fanspeed(&priv->ecram, priv->conf, 1, &fanspeed); -+ seq_file_print_with_error(s, "2 fanspeed EC", err, fanspeed); -+ err = acpi_read_fanspeed(priv, 1, &fanspeed); -+ seq_file_print_with_error(s, "2 fanspeed ACPI", err, fanspeed); -+ err = wmi_read_fanspeed_gz(1, &fanspeed); -+ seq_file_print_with_error(s, "2 fanspeed WMI", err, fanspeed); -+ err = wmi_read_fanspeed(1, &fanspeed); -+ seq_file_print_with_error(s, "2 fanspeed WMI2", err, fanspeed); -+ err = wmi_read_fanspeed_other(1, &fanspeed); -+ seq_file_print_with_error(s, "2 fanspeed WMI3", err, fanspeed); -+ -+ seq_printf(s, "powermode access method: %d\n", -+ priv->conf->access_method_powermode); -+ err = read_powermode(priv, &powermode); -+ seq_file_print_with_error(s, "powermode", err, powermode); -+ err = ec_read_powermode(priv, &powermode); -+ seq_file_print_with_error(s, "powermode EC", err, powermode); -+ err = acpi_read_powermode(priv, &powermode); -+ seq_file_print_with_error(s, "powermode ACPI", err, powermode); -+ err = wmi_read_powermode(&powermode); -+ seq_file_print_with_error(s, "powermode WMI", err, powermode); -+ seq_printf(s, "has custom powermode: %d\n", -+ priv->conf->has_custom_powermode); -+ -+ err = acpi_read_rapidcharge(priv->adev, &is_rapidcharge); -+ seq_printf(s, "ACPI rapidcharge error: %d\n", err); -+ seq_printf(s, "ACPI rapidcharge: %d\n", is_rapidcharge); -+ -+ seq_printf(s, "WMI backlight 2 state: %ld\n", -+ legion_kbd_bl2_brightness_get(priv)); -+ seq_printf(s, "WMI backlight 3 state: %d\n", -+ legion_kbd_bl_brightness_get(priv)); -+ -+ seq_printf(s, "WMI light IO port: %d\n", -+ legion_wmi_light_get(priv, LIGHT_ID_IOPORT, 0, 4)); -+ -+ seq_printf(s, "WMI light Y logo/lid: %d\n", -+ legion_wmi_light_get(priv, LIGHT_ID_YLOGO, 0, 4)); -+ -+ seq_printf(s, "EC minifancurve feature enabled: %d\n", -+ priv->conf->has_minifancurve); -+ err = ec_read_minifancurve(&priv->ecram, priv->conf, &is_minifancurve); -+ seq_printf(s, "EC minifancurve on cool: %s\n", -+ err ? "error" : (is_minifancurve ? "true" : "false")); -+ -+ err = ec_read_lockfancontroller(&priv->ecram, priv->conf, -+ &is_lockfancontroller); -+ seq_printf(s, "EC lockfancontroller error: %d\n", err); -+ seq_printf(s, "EC lockfancontroller: %s\n", -+ err ? "error" : (is_lockfancontroller ? "true" : "false")); -+ -+ err = read_fanfullspeed(priv, &is_maximumfanspeed); -+ seq_file_print_with_error(s, "fanfullspeed", err, is_maximumfanspeed); -+ -+ err = ec_read_fanfullspeed(&priv->ecram, priv->conf, -+ &is_maximumfanspeed); -+ seq_file_print_with_error(s, "fanfullspeed EC", err, -+ is_maximumfanspeed); -+ -+ read_fancurve(priv, &priv->fancurve); -+ seq_printf(s, "EC fan curve current point id: %ld\n", -+ priv->fancurve.current_point_i); -+ seq_printf(s, "EC fan curve points size: %ld\n", priv->fancurve.size); -+ -+ seq_puts(s, "Current fan curve in hardware:\n"); -+ fancurve_print_seqfile(&priv->fancurve, s); -+ seq_puts(s, "=====================\n"); -+ mutex_unlock(&priv->fancurve_mutex); -+ -+ seq_puts(s, "Current fan curve in hardware (WMI; might be empty)\n"); -+ wmi_fancurve.size = 0; -+ err = wmi_read_fancurve_custom(priv->conf, &wmi_fancurve); -+ fancurve_print_seqfile(&wmi_fancurve, s); -+ seq_puts(s, "=====================\n"); -+ return 0; -+} -+ -+DEFINE_SHOW_ATTRIBUTE(debugfs_fancurve); -+ -+static void legion_debugfs_init(struct legion_private *priv) -+{ -+ struct dentry *dir; -+ -+ // TODO: remove this note -+ // Note: like other kernel modules, do not catch errors here -+ // because if kernel is build without debugfs this -+ // will return an error but module still has to -+ // work, just without debugfs -+ // TODO: what permissions; some modules do 400 -+ // other do 444 -+ dir = debugfs_create_dir(LEGION_DRVR_SHORTNAME, NULL); -+ debugfs_create_file("fancurve", 0444, dir, priv, -+ &debugfs_fancurve_fops); -+ debugfs_create_file("ecmemory", 0444, dir, priv, -+ &debugfs_ecmemory_fops); -+ debugfs_create_file("ecmemoryram", 0444, dir, priv, -+ &debugfs_ecmemoryram_fops); -+ -+ priv->debugfs_dir = dir; -+} -+ -+static void legion_debugfs_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion dubugfs\n"); -+ // The following is does nothing if pointer is NULL -+ debugfs_remove_recursive(priv->debugfs_dir); -+ priv->debugfs_dir = NULL; -+ pr_info("Unloading legion dubugfs done\n"); -+} -+ -+/* ============================= */ -+/* sysfs interface */ -+/* ============================ */ -+ -+static int show_simple_wmi_attribute(struct device *dev, -+ struct device_attribute *attr, char *buf, -+ const char *guid, u8 instance, -+ u32 method_id, bool invert, -+ unsigned long scale) -+{ -+ unsigned long state = 0; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = get_simple_wmi_attribute(priv, guid, instance, method_id, invert, -+ scale, &state); -+ mutex_unlock(&priv->fancurve_mutex); -+ -+ if (err) -+ return -EINVAL; -+ -+ return sysfs_emit(buf, "%lu\n", state); -+} -+ -+static int show_simple_wmi_attribute_from_buffer(struct device *dev, -+ struct device_attribute *attr, -+ char *buf, const char *guid, -+ u8 instance, u32 method_id, -+ size_t ressize, size_t i, -+ int scale) -+{ -+ u8 res[16]; -+ int err; -+ int out; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ if (ressize > ARRAY_SIZE(res)) { -+ pr_info("Buffer too small for WMI result\n"); -+ return -EINVAL; -+ } -+ if (i >= ressize) { -+ pr_info("Index not within buffer size\n"); -+ return -EINVAL; -+ } -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = wmi_exec_noarg_ints(guid, instance, method_id, res, ressize); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ out = scale * res[i]; -+ return sysfs_emit(buf, "%d\n", out); -+} -+ -+static int store_simple_wmi_attribute(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count, -+ const char *guid, u8 instance, -+ u32 method_id, bool invert, int scale) -+{ -+ int state; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ err = kstrtouint(buf, 0, &state); -+ if (err) -+ return err; -+ err = set_simple_wmi_attribute(priv, guid, instance, method_id, invert, -+ scale, state); -+ if (err) -+ return err; -+ return count; -+} -+ -+static ssize_t lockfancontroller_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ bool is_lockfancontroller; -+ int err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = ec_read_lockfancontroller(&priv->ecram, priv->conf, -+ &is_lockfancontroller); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return sysfs_emit(buf, "%d\n", is_lockfancontroller); -+} -+ -+static ssize_t lockfancontroller_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ bool is_lockfancontroller; -+ int err; -+ -+ err = kstrtobool(buf, &is_lockfancontroller); -+ if (err) -+ return err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = ec_write_lockfancontroller(&priv->ecram, priv->conf, -+ is_lockfancontroller); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(lockfancontroller); -+ -+static ssize_t rapidcharge_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ bool state = false; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = acpi_read_rapidcharge(priv->adev, &state); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return sysfs_emit(buf, "%d\n", state); -+} -+ -+static ssize_t rapidcharge_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, -+ size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int state; -+ int err; -+ -+ err = kstrtouint(buf, 0, &state); -+ if (err) -+ return err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = acpi_write_rapidcharge(priv->adev, state); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(rapidcharge); -+ -+static ssize_t issupportgpuoc_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_ISSUPPORTGPUOC, false, -+ 1); -+} -+ -+static DEVICE_ATTR_RO(issupportgpuoc); -+ -+static ssize_t aslcodeversion_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETVERSION, false, 1); -+} -+ -+static DEVICE_ATTR_RO(aslcodeversion); -+ -+static ssize_t issupportcpuoc_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_ISSUPPORTCPUOC, false, -+ 1); -+} -+ -+static DEVICE_ATTR_RO(issupportcpuoc); -+ -+static ssize_t winkey_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETWINKEYSTATUS, true, -+ 1); -+} -+ -+static ssize_t winkey_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_SETWINKEYSTATUS, true, -+ 1); -+} -+ -+static DEVICE_ATTR_RW(winkey); -+ -+// on newer models the touchpad feature in ideapad does not work anymore, so -+// we need this -+static ssize_t touchpad_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETTPSTATUS, true, 1); -+} -+ -+static ssize_t touchpad_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_SETTPSTATUS, true, 1); -+} -+ -+static DEVICE_ATTR_RW(touchpad); -+ -+static ssize_t gsync_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETGSYNCSTATUS, true, 1); -+} -+ -+static ssize_t gsync_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_SETGSYNCSTATUS, true, -+ 1); -+} -+ -+static DEVICE_ATTR_RW(gsync); -+ -+static ssize_t powerchargemode_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETPOWERCHARGEMODE, -+ false, 1); -+} -+static DEVICE_ATTR_RO(powerchargemode); -+ -+static ssize_t overdrive_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETODSTATUS, false, 1); -+} -+ -+static ssize_t overdrive_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, -+ size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_SETODSTATUS, false, 1); -+} -+ -+static DEVICE_ATTR_RW(overdrive); -+ -+static ssize_t thermalmode_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETTHERMALMODE, false, -+ 1); -+} -+static DEVICE_ATTR_RO(thermalmode); -+ -+// TOOD: probably remove again because provided by other means; only useful for overclocking -+static ssize_t cpumaxfrequency_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETCPUMAXFREQUENCY, -+ false, 1); -+} -+static DEVICE_ATTR_RO(cpumaxfrequency); -+ -+static ssize_t isacfitforoc_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_ISACFITFOROC, false, 1); -+} -+static DEVICE_ATTR_RO(isacfitforoc); -+ -+static ssize_t igpumode_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_GETIGPUMODESTATUS, false, -+ 1); -+} -+ -+static ssize_t igpumode_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ LEGION_WMI_GAMEZONE_GUID, 0, -+ WMI_METHOD_ID_SETIGPUMODESTATUS, -+ false, 1); -+} -+ -+static DEVICE_ATTR_RW(igpumode); -+ -+static ssize_t cpu_oc_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute_from_buffer( -+ dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_OC_STATUS, 16, 0, 1); -+} -+ -+static ssize_t cpu_oc_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_SET_OC_STATUS, -+ false, 1); -+} -+ -+static DEVICE_ATTR_RW(cpu_oc); -+ -+static ssize_t cpu_shortterm_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute_from_buffer( -+ dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_SHORTTERM_POWERLIMIT, 16, 0, 1); -+} -+ -+static ssize_t cpu_shortterm_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute( -+ dev, attr, buf, count, WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_SET_SHORTTERM_POWERLIMIT, false, 1); -+} -+ -+static DEVICE_ATTR_RW(cpu_shortterm_powerlimit); -+ -+static ssize_t cpu_longterm_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute_from_buffer( -+ dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_LONGTERM_POWERLIMIT, 16, 0, 1); -+} -+ -+static ssize_t cpu_longterm_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute( -+ dev, attr, buf, count, WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_SET_LONGTERM_POWERLIMIT, false, 1); -+} -+ -+static DEVICE_ATTR_RW(cpu_longterm_powerlimit); -+ -+static ssize_t cpu_default_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute( -+ dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_DEFAULT_POWERLIMIT, false, 1); -+} -+ -+static DEVICE_ATTR_RO(cpu_default_powerlimit); -+ -+static ssize_t cpu_peak_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_PEAK_POWERLIMIT, -+ false, 1); -+} -+ -+static ssize_t cpu_peak_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_SET_PEAK_POWERLIMIT, -+ false, 1); -+} -+ -+static DEVICE_ATTR_RW(cpu_peak_powerlimit); -+ -+static ssize_t cpu_apu_sppt_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_APU_SPPT_POWERLIMIT, false, 1); -+} -+ -+static ssize_t cpu_apu_sppt_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute( -+ dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_SET_APU_SPPT_POWERLIMIT, false, 1); -+} -+ -+static DEVICE_ATTR_RW(cpu_apu_sppt_powerlimit); -+ -+static ssize_t cpu_cross_loading_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_GET_CROSS_LOADING_POWERLIMIT, false, 1); -+} -+ -+static ssize_t cpu_cross_loading_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute( -+ dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_CPU_SET_CROSS_LOADING_POWERLIMIT, false, 1); -+} -+ -+static DEVICE_ATTR_RW(cpu_cross_loading_powerlimit); -+ -+static ssize_t gpu_oc_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_OC_STATUS, false, -+ 1); -+} -+ -+static ssize_t gpu_oc_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_SET_OC_STATUS, -+ false, 1); -+} -+ -+static DEVICE_ATTR_RW(gpu_oc); -+ -+static ssize_t gpu_ppab_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute_from_buffer( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_PPAB_POWERLIMIT, 16, 0, 1); -+} -+ -+static ssize_t gpu_ppab_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_SET_PPAB_POWERLIMIT, -+ false, 1); -+} -+ -+static DEVICE_ATTR_RW(gpu_ppab_powerlimit); -+ -+static ssize_t gpu_ctgp_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute_from_buffer( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT, 16, 0, 1); -+} -+ -+static ssize_t gpu_ctgp_powerlimit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_SET_CTGP_POWERLIMIT, -+ false, 1); -+} -+ -+static DEVICE_ATTR_RW(gpu_ctgp_powerlimit); -+ -+static ssize_t gpu_ctgp2_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute_from_buffer( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT, 16, 0x0C, 1); -+} -+ -+static DEVICE_ATTR_RO(gpu_ctgp2_powerlimit); -+ -+// TOOD: probably remove again because provided by other means; only useful for overclocking -+static ssize_t -+gpu_default_ppab_ctrgp_powerlimit_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_DEFAULT_PPAB_CTGP_POWERLIMIT, false, 1); -+} -+static DEVICE_ATTR_RO(gpu_default_ppab_ctrgp_powerlimit); -+ -+static ssize_t gpu_temperature_limit_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return show_simple_wmi_attribute( -+ dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_TEMPERATURE_LIMIT, false, 1); -+} -+ -+static ssize_t gpu_temperature_limit_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute( -+ dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_SET_TEMPERATURE_LIMIT, false, 1); -+} -+ -+static DEVICE_ATTR_RW(gpu_temperature_limit); -+ -+// TOOD: probably remove again because provided by other means; only useful for overclocking -+static ssize_t gpu_boost_clock_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ WMI_GUID_LENOVO_GPU_METHOD, 0, -+ WMI_METHOD_ID_GPU_GET_BOOST_CLOCK, -+ false, 1); -+} -+static DEVICE_ATTR_RO(gpu_boost_clock); -+ -+static ssize_t fan_fullspeed_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ bool state = false; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_fanfullspeed(priv, &state); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return sysfs_emit(buf, "%d\n", state); -+} -+ -+static ssize_t fan_fullspeed_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int state; -+ int err; -+ -+ err = kstrtouint(buf, 0, &state); -+ if (err) -+ return err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = write_fanfullspeed(priv, state); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(fan_fullspeed); -+ -+static ssize_t fan_maxspeed_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return show_simple_wmi_attribute(dev, attr, buf, -+ WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_GET_MAXSPEED, false, -+ 1); -+} -+ -+static ssize_t fan_maxspeed_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return store_simple_wmi_attribute(dev, attr, buf, count, -+ WMI_GUID_LENOVO_FAN_METHOD, 0, -+ WMI_METHOD_ID_FAN_SET_MAXSPEED, false, -+ 1); -+} -+ -+static DEVICE_ATTR_RW(fan_maxspeed); -+ -+static ssize_t powermode_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int power_mode; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ read_powermode(priv, &power_mode); -+ mutex_unlock(&priv->fancurve_mutex); -+ return sysfs_emit(buf, "%d\n", power_mode); -+} -+ -+static void legion_platform_profile_notify(void); -+ -+static ssize_t powermode_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, -+ size_t count) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int powermode; -+ int err; -+ -+ err = kstrtouint(buf, 0, &powermode); -+ if (err) -+ return err; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = write_powermode(priv, powermode); -+ mutex_unlock(&priv->fancurve_mutex); -+ if (err) -+ return -EINVAL; -+ -+ // TODO: better? -+ // we have to wait a bit before change is done in hardware and -+ // readback done after notifying returns correct value, otherwise -+ // the notified reader will read old value -+ msleep(500); -+ legion_platform_profile_notify(); -+ -+ return count; -+} -+ -+static DEVICE_ATTR_RW(powermode); -+ -+static struct attribute *legion_sysfs_attributes[] = { -+ &dev_attr_powermode.attr, -+ &dev_attr_lockfancontroller.attr, -+ &dev_attr_rapidcharge.attr, -+ &dev_attr_winkey.attr, -+ &dev_attr_touchpad.attr, -+ &dev_attr_gsync.attr, -+ &dev_attr_powerchargemode.attr, -+ &dev_attr_overdrive.attr, -+ &dev_attr_cpumaxfrequency.attr, -+ &dev_attr_isacfitforoc.attr, -+ &dev_attr_cpu_oc.attr, -+ &dev_attr_cpu_shortterm_powerlimit.attr, -+ &dev_attr_cpu_longterm_powerlimit.attr, -+ &dev_attr_cpu_apu_sppt_powerlimit.attr, -+ &dev_attr_cpu_default_powerlimit.attr, -+ &dev_attr_cpu_peak_powerlimit.attr, -+ &dev_attr_cpu_cross_loading_powerlimit.attr, -+ &dev_attr_gpu_oc.attr, -+ &dev_attr_gpu_ppab_powerlimit.attr, -+ &dev_attr_gpu_ctgp_powerlimit.attr, -+ &dev_attr_gpu_ctgp2_powerlimit.attr, -+ &dev_attr_gpu_default_ppab_ctrgp_powerlimit.attr, -+ &dev_attr_gpu_temperature_limit.attr, -+ &dev_attr_gpu_boost_clock.attr, -+ &dev_attr_fan_fullspeed.attr, -+ &dev_attr_fan_maxspeed.attr, -+ &dev_attr_thermalmode.attr, -+ &dev_attr_issupportcpuoc.attr, -+ &dev_attr_issupportgpuoc.attr, -+ &dev_attr_aslcodeversion.attr, -+ &dev_attr_igpumode.attr, -+ NULL -+}; -+ -+static const struct attribute_group legion_attribute_group = { -+ .attrs = legion_sysfs_attributes -+}; -+ -+static int legion_sysfs_init(struct legion_private *priv) -+{ -+ return device_add_group(&priv->platform_device->dev, -+ &legion_attribute_group); -+} -+ -+static void legion_sysfs_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion sysfs\n"); -+ device_remove_group(&priv->platform_device->dev, -+ &legion_attribute_group); -+ pr_info("Unloading legion sysfs done\n"); -+} -+ -+/* ============================= */ -+/* WMI + ACPI */ -+/* ============================ */ -+// heavily based on ideapad_laptop.c -+ -+// TODO: proper names if meaning of all events is clear -+enum LEGION_WMI_EVENT { -+ LEGION_WMI_EVENT_GAMEZONE = 1, -+ LEGION_EVENT_A, -+ LEGION_EVENT_B, -+ LEGION_EVENT_C, -+ LEGION_EVENT_D, -+ LEGION_EVENT_E, -+ LEGION_EVENT_F, -+ LEGION_EVENT_G -+}; -+ -+struct legion_wmi_private { -+ enum LEGION_WMI_EVENT event; -+}; -+ -+//static void legion_wmi_notify2(u32 value, void *context) -+// { -+// pr_info("WMI notify\n" ); -+// } -+ -+static void legion_wmi_notify(struct wmi_device *wdev, union acpi_object *data) -+{ -+ struct legion_wmi_private *wpriv; -+ struct legion_private *priv; -+ -+ mutex_lock(&legion_shared_mutex); -+ priv = legion_shared; -+ if ((!priv) && (priv->loaded)) { -+ pr_info("Received WMI event while not initialized!\n"); -+ goto unlock; -+ } -+ -+ wpriv = dev_get_drvdata(&wdev->dev); -+ switch (wpriv->event) { -+ case LEGION_EVENT_A: -+ pr_info("Fan event: legion type: %d; acpi type: %d (%d=integer)", -+ wpriv->event, data->type, ACPI_TYPE_INTEGER); -+ // TODO: here it is too early (first unlock mutext, then wait a bit) -+ //legion_platform_profile_notify(); -+ break; -+ default: -+ pr_info("Event: legion type: %d; acpi type: %d (%d=integer)", -+ wpriv->event, data->type, ACPI_TYPE_INTEGER); -+ break; -+ } -+ -+unlock: -+ mutex_unlock(&legion_shared_mutex); -+ // todo; fix that! -+ // problem: we get an event just before the powermode change (from the key?), -+ // so if we notify too early, it will read the old power mode/platform profile -+ msleep(500); -+ legion_platform_profile_notify(); -+} -+ -+static int legion_wmi_probe(struct wmi_device *wdev, const void *context) -+{ -+ struct legion_wmi_private *wpriv; -+ -+ wpriv = devm_kzalloc(&wdev->dev, sizeof(*wpriv), GFP_KERNEL); -+ if (!wpriv) -+ return -ENOMEM; -+ -+ *wpriv = *(const struct legion_wmi_private *)context; -+ -+ dev_set_drvdata(&wdev->dev, wpriv); -+ dev_info(&wdev->dev, "Register after probing for WMI.\n"); -+ return 0; -+} -+ -+static const struct legion_wmi_private legion_wmi_context_gamezone = { -+ .event = LEGION_WMI_EVENT_GAMEZONE -+}; -+static const struct legion_wmi_private legion_wmi_context_a = { -+ .event = LEGION_EVENT_A -+}; -+static const struct legion_wmi_private legion_wmi_context_b = { -+ .event = LEGION_EVENT_B -+}; -+static const struct legion_wmi_private legion_wmi_context_c = { -+ .event = LEGION_EVENT_C -+}; -+static const struct legion_wmi_private legion_wmi_context_d = { -+ .event = LEGION_EVENT_D -+}; -+static const struct legion_wmi_private legion_wmi_context_e = { -+ .event = LEGION_EVENT_E -+}; -+static const struct legion_wmi_private legion_wmi_context_f = { -+ .event = LEGION_EVENT_F -+}; -+ -+#define LEGION_WMI_GUID_FAN_EVENT "D320289E-8FEA-41E0-86F9-611D83151B5F" -+#define LEGION_WMI_GUID_FAN2_EVENT "bc72a435-e8c1-4275-b3e2-d8b8074aba59" -+#define LEGION_WMI_GUID_GAMEZONE_KEY_EVENT \ -+ "10afc6d9-ea8b-4590-a2e7-1cd3c84bb4b1" -+#define LEGION_WMI_GUID_GAMEZONE_GPU_EVENT \ -+ "bfd42481-aee3-4502-a107-afb68425c5f8" -+#define LEGION_WMI_GUID_GAMEZONE_OC_EVENT "d062906b-12d4-4510-999d-4831ee80e985" -+#define LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT \ -+ "bfd42481-aee3-4501-a107-afb68425c5f8" -+//#define LEGION_WMI_GUID_GAMEZONE_DATA_EVENT "887b54e3-dddc-4b2c-8b88-68a26a8835d0" -+ -+static const struct wmi_device_id legion_wmi_ids[] = { -+ { LEGION_WMI_GAMEZONE_GUID, &legion_wmi_context_gamezone }, -+ { LEGION_WMI_GUID_FAN_EVENT, &legion_wmi_context_a }, -+ { LEGION_WMI_GUID_FAN2_EVENT, &legion_wmi_context_b }, -+ { LEGION_WMI_GUID_GAMEZONE_KEY_EVENT, &legion_wmi_context_c }, -+ { LEGION_WMI_GUID_GAMEZONE_GPU_EVENT, &legion_wmi_context_d }, -+ { LEGION_WMI_GUID_GAMEZONE_OC_EVENT, &legion_wmi_context_e }, -+ { LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT, &legion_wmi_context_f }, -+ { "8FC0DE0C-B4E4-43FD-B0F3-8871711C1294", -+ &legion_wmi_context_gamezone }, /* Legion 5 */ -+ {}, -+}; -+MODULE_DEVICE_TABLE(wmi, legion_wmi_ids); -+ -+static struct wmi_driver legion_wmi_driver = { -+ .driver = { -+ .name = "legion_wmi", -+ }, -+ .id_table = legion_wmi_ids, -+ .probe = legion_wmi_probe, -+ .notify = legion_wmi_notify, -+}; -+ -+//acpi_status status = wmi_install_notify_handler(LEGION_WMI_GAMEZONE_GUID, -+// legion_wmi_notify2, NULL); -+//if (ACPI_FAILURE(status)) { -+// return -ENODEV; -+//} -+//return 0; -+ -+static int legion_wmi_init(void) -+{ -+ return wmi_driver_register(&legion_wmi_driver); -+} -+ -+static void legion_wmi_exit(void) -+{ -+ // TODO: remove this -+ pr_info("Unloading legion WMI\n"); -+ -+ //wmi_remove_notify_handler(LEGION_WMI_GAMEZONE_GUID); -+ wmi_driver_unregister(&legion_wmi_driver); -+ pr_info("Unloading legion WMI done\n"); -+} -+ -+/* ============================= */ -+/* Platform profile */ -+/* ============================ */ -+ -+static void legion_platform_profile_notify(void) -+{ -+ if (!enable_platformprofile) -+ pr_info("Skipping platform_profile_notify because enable_platformprofile is false\n"); -+ -+ platform_profile_notify(); -+} -+ -+static int legion_platform_profile_get(struct platform_profile_handler *pprof, -+ enum platform_profile_option *profile) -+{ -+ int powermode; -+ struct legion_private *priv; -+ -+ priv = container_of(pprof, struct legion_private, -+ platform_profile_handler); -+ read_powermode(priv, &powermode); -+ -+ switch (powermode) { -+ case LEGION_WMI_POWERMODE_BALANCED: -+ *profile = PLATFORM_PROFILE_BALANCED; -+ break; -+ case LEGION_WMI_POWERMODE_PERFORMANCE: -+ *profile = PLATFORM_PROFILE_PERFORMANCE; -+ break; -+ case LEGION_WMI_POWERMODE_QUIET: -+ *profile = PLATFORM_PROFILE_QUIET; -+ break; -+ case LEGION_WMI_POWERMODE_CUSTOM: -+ *profile = PLATFORM_PROFILE_BALANCED_PERFORMANCE; -+ break; -+ default: -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static int legion_platform_profile_set(struct platform_profile_handler *pprof, -+ enum platform_profile_option profile) -+{ -+ int powermode; -+ struct legion_private *priv; -+ -+ priv = container_of(pprof, struct legion_private, -+ platform_profile_handler); -+ -+ switch (profile) { -+ case PLATFORM_PROFILE_BALANCED: -+ powermode = LEGION_WMI_POWERMODE_BALANCED; -+ break; -+ case PLATFORM_PROFILE_PERFORMANCE: -+ powermode = LEGION_WMI_POWERMODE_PERFORMANCE; -+ break; -+ case PLATFORM_PROFILE_QUIET: -+ powermode = LEGION_WMI_POWERMODE_QUIET; -+ break; -+ case PLATFORM_PROFILE_BALANCED_PERFORMANCE: -+ powermode = LEGION_WMI_POWERMODE_CUSTOM; -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return write_powermode(priv, powermode); -+} -+ -+static int legion_platform_profile_init(struct legion_private *priv) -+{ -+ int err; -+ -+ if (!enable_platformprofile) { -+ pr_info("Skipping creating platform profile support because enable_platformprofile is false\n"); -+ return 0; -+ } -+ -+ priv->platform_profile_handler.profile_get = -+ legion_platform_profile_get; -+ priv->platform_profile_handler.profile_set = -+ legion_platform_profile_set; -+ -+ set_bit(PLATFORM_PROFILE_QUIET, priv->platform_profile_handler.choices); -+ set_bit(PLATFORM_PROFILE_BALANCED, -+ priv->platform_profile_handler.choices); -+ set_bit(PLATFORM_PROFILE_PERFORMANCE, -+ priv->platform_profile_handler.choices); -+ if (priv->conf->has_custom_powermode && -+ priv->conf->access_method_powermode == ACCESS_METHOD_WMI) { -+ set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, -+ priv->platform_profile_handler.choices); -+ } -+ -+ err = platform_profile_register(&priv->platform_profile_handler); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+static void legion_platform_profile_exit(struct legion_private *priv) -+{ -+ if (!enable_platformprofile) { -+ pr_info("Skipping unloading platform profile support because enable_platformprofile is false\n"); -+ return; -+ } -+ pr_info("Unloading legion platform profile\n"); -+ platform_profile_remove(); -+ pr_info("Unloading legion platform profile done\n"); -+} -+ -+/* ============================= */ -+/* hwom interface */ -+/* ============================ */ -+ -+// hw-mon interface -+ -+// todo: register_group or register_info? -+ -+// TODO: use one common function (like here) or one function per attribute? -+static ssize_t sensor_label_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ int sensor_id = (to_sensor_dev_attr(attr))->index; -+ const char *label; -+ -+ switch (sensor_id) { -+ case SENSOR_CPU_TEMP_ID: -+ label = "CPU Temperature\n"; -+ break; -+ case SENSOR_GPU_TEMP_ID: -+ label = "GPU Temperature\n"; -+ break; -+ case SENSOR_IC_TEMP_ID: -+ label = "IC Temperature\n"; -+ break; -+ case SENSOR_FAN1_RPM_ID: -+ label = "Fan 1\n"; -+ break; -+ case SENSOR_FAN2_RPM_ID: -+ label = "Fan 2\n"; -+ break; -+ case SENSOR_FAN1_TARGET_RPM_ID: -+ label = "Fan 1 Target\n"; -+ break; -+ case SENSOR_FAN2_TARGET_RPM_ID: -+ label = "Fan 2 Target\n"; -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return sprintf(buf, label); -+} -+ -+// TODO: use one common function (like here) or one function per attribute? -+static ssize_t sensor_show(struct device *dev, struct device_attribute *devattr, -+ char *buf) -+{ -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int sensor_id = (to_sensor_dev_attr(devattr))->index; -+ struct sensor_values values; -+ int outval; -+ int err = -EIO; -+ -+ switch (sensor_id) { -+ case SENSOR_CPU_TEMP_ID: -+ err = read_temperature(priv, 0, &outval); -+ outval *= 1000; -+ break; -+ case SENSOR_GPU_TEMP_ID: -+ err = read_temperature(priv, 1, &outval); -+ outval *= 1000; -+ break; -+ case SENSOR_IC_TEMP_ID: -+ ec_read_sensor_values(&priv->ecram, priv->conf, &values); -+ outval = 1000 * values.ic_temp_celsius; -+ err = 0; -+ break; -+ case SENSOR_FAN1_RPM_ID: -+ err = read_fanspeed(priv, 0, &outval); -+ break; -+ case SENSOR_FAN2_RPM_ID: -+ err = read_fanspeed(priv, 1, &outval); -+ break; -+ case SENSOR_FAN1_TARGET_RPM_ID: -+ ec_read_sensor_values(&priv->ecram, priv->conf, &values); -+ outval = values.fan1_target_rpm; -+ err = 0; -+ break; -+ case SENSOR_FAN2_TARGET_RPM_ID: -+ ec_read_sensor_values(&priv->ecram, priv->conf, &values); -+ outval = values.fan2_target_rpm; -+ err = 0; -+ break; -+ default: -+ pr_info("Error reading sensor value with id %d\n", sensor_id); -+ return -EOPNOTSUPP; -+ } -+ if (err) -+ return err; -+ -+ return sprintf(buf, "%d\n", outval); -+} -+ -+static SENSOR_DEVICE_ATTR_RO(temp1_input, sensor, SENSOR_CPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp1_label, sensor_label, SENSOR_CPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp2_input, sensor, SENSOR_GPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp2_label, sensor_label, SENSOR_GPU_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp3_input, sensor, SENSOR_IC_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(temp3_label, sensor_label, SENSOR_IC_TEMP_ID); -+static SENSOR_DEVICE_ATTR_RO(fan1_input, sensor, SENSOR_FAN1_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan1_label, sensor_label, SENSOR_FAN1_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan2_input, sensor, SENSOR_FAN2_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan2_label, sensor_label, SENSOR_FAN2_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan1_target, sensor, SENSOR_FAN1_TARGET_RPM_ID); -+static SENSOR_DEVICE_ATTR_RO(fan2_target, sensor, SENSOR_FAN2_TARGET_RPM_ID); -+ -+static struct attribute *sensor_hwmon_attributes[] = { -+ &sensor_dev_attr_temp1_input.dev_attr.attr, -+ &sensor_dev_attr_temp1_label.dev_attr.attr, -+ &sensor_dev_attr_temp2_input.dev_attr.attr, -+ &sensor_dev_attr_temp2_label.dev_attr.attr, -+ &sensor_dev_attr_temp3_input.dev_attr.attr, -+ &sensor_dev_attr_temp3_label.dev_attr.attr, -+ &sensor_dev_attr_fan1_input.dev_attr.attr, -+ &sensor_dev_attr_fan1_label.dev_attr.attr, -+ &sensor_dev_attr_fan2_input.dev_attr.attr, -+ &sensor_dev_attr_fan2_label.dev_attr.attr, -+ &sensor_dev_attr_fan1_target.dev_attr.attr, -+ &sensor_dev_attr_fan2_target.dev_attr.attr, -+ NULL -+}; -+ -+static ssize_t autopoint_show(struct device *dev, -+ struct device_attribute *devattr, char *buf) -+{ -+ struct fancurve fancurve; -+ int err; -+ int value; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr; -+ int point_id = to_sensor_dev_attr_2(devattr)->index; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_fancurve(priv, &fancurve); -+ mutex_unlock(&priv->fancurve_mutex); -+ -+ if (err) { -+ pr_info("Failed to read fancurve\n"); -+ return -EOPNOTSUPP; -+ } -+ if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) { -+ pr_info("Failed to read fancurve due to wrong point id: %d\n", -+ point_id); -+ return -EOPNOTSUPP; -+ } -+ -+ switch (fancurve_attr_id) { -+ case FANCURVE_ATTR_PWM1: -+ value = fancurve.points[point_id].rpm1_raw * 100; -+ break; -+ case FANCURVE_ATTR_PWM2: -+ value = fancurve.points[point_id].rpm2_raw * 100; -+ break; -+ case FANCURVE_ATTR_CPU_TEMP: -+ value = fancurve.points[point_id].cpu_max_temp_celsius; -+ break; -+ case FANCURVE_ATTR_CPU_HYST: -+ value = fancurve.points[point_id].cpu_min_temp_celsius; -+ break; -+ case FANCURVE_ATTR_GPU_TEMP: -+ value = fancurve.points[point_id].gpu_max_temp_celsius; -+ break; -+ case FANCURVE_ATTR_GPU_HYST: -+ value = fancurve.points[point_id].gpu_min_temp_celsius; -+ break; -+ case FANCURVE_ATTR_IC_TEMP: -+ value = fancurve.points[point_id].ic_max_temp_celsius; -+ break; -+ case FANCURVE_ATTR_IC_HYST: -+ value = fancurve.points[point_id].ic_min_temp_celsius; -+ break; -+ case FANCURVE_ATTR_ACCEL: -+ value = fancurve.points[point_id].accel; -+ break; -+ case FANCURVE_ATTR_DECEL: -+ value = fancurve.points[point_id].decel; -+ break; -+ case FANCURVE_SIZE: -+ value = fancurve.size; -+ break; -+ default: -+ pr_info("Failed to read fancurve due to wrong attribute id: %d\n", -+ fancurve_attr_id); -+ return -EOPNOTSUPP; -+ } -+ -+ return sprintf(buf, "%d\n", value); -+} -+ -+static ssize_t autopoint_store(struct device *dev, -+ struct device_attribute *devattr, -+ const char *buf, size_t count) -+{ -+ struct fancurve fancurve; -+ int err; -+ int value; -+ bool valid; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr; -+ int point_id = to_sensor_dev_attr_2(devattr)->index; -+ bool write_fancurve_size = false; -+ -+ if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) { -+ pr_info("Failed to read fancurve due to wrong point id: %d\n", -+ point_id); -+ err = -EOPNOTSUPP; -+ goto error; -+ } -+ -+ err = kstrtoint(buf, 0, &value); -+ if (err) { -+ pr_info("Parsing hwmon store failed: error: %d; point_id: %d; fancurve_attr_id: %d\\n", -+ err, point_id, fancurve_attr_id); -+ goto error; -+ } -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = read_fancurve(priv, &fancurve); -+ -+ if (err) { -+ pr_info("Failed to read fancurve\n"); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ switch (fancurve_attr_id) { -+ case FANCURVE_ATTR_PWM1: -+ valid = fancurve_set_rpm1(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_PWM2: -+ valid = fancurve_set_rpm2(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_CPU_TEMP: -+ valid = fancurve_set_cpu_temp_max(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_CPU_HYST: -+ valid = fancurve_set_cpu_temp_min(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_GPU_TEMP: -+ valid = fancurve_set_gpu_temp_max(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_GPU_HYST: -+ valid = fancurve_set_gpu_temp_min(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_IC_TEMP: -+ valid = fancurve_set_ic_temp_max(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_IC_HYST: -+ valid = fancurve_set_ic_temp_min(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_ACCEL: -+ valid = fancurve_set_accel(&fancurve, point_id, value); -+ break; -+ case FANCURVE_ATTR_DECEL: -+ valid = fancurve_set_decel(&fancurve, point_id, value); -+ break; -+ case FANCURVE_SIZE: -+ valid = fancurve_set_size(&fancurve, value, true); -+ write_fancurve_size = true; -+ break; -+ default: -+ pr_info("Failed to write fancurve due to wrong attribute id: %d\n", -+ fancurve_attr_id); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ if (!valid) { -+ pr_info("Ignoring invalid fancurve value %d for attribute %d at point %d\n", -+ value, fancurve_attr_id, point_id); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ err = write_fancurve(priv, &fancurve, write_fancurve_size); -+ if (err) { -+ pr_info("Failed to write fancurve for accessing hwmon at point_id: %d\n", -+ point_id); -+ err = -EOPNOTSUPP; -+ goto error_mutex; -+ } -+ -+ mutex_unlock(&priv->fancurve_mutex); -+ return count; -+ -+error_mutex: -+ mutex_unlock(&priv->fancurve_mutex); -+error: -+ return count; -+} -+ -+// rpm1 -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_pwm, autopoint, -+ FANCURVE_ATTR_PWM1, 9); -+// rpm2 -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_pwm, autopoint, -+ FANCURVE_ATTR_PWM2, 9); -+// CPU temp -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp, autopoint, -+ FANCURVE_ATTR_CPU_TEMP, 9); -+// CPU temp hyst -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp_hyst, autopoint, -+ FANCURVE_ATTR_CPU_HYST, 9); -+// GPU temp -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp, autopoint, -+ FANCURVE_ATTR_GPU_TEMP, 9); -+// GPU temp hyst -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp_hyst, autopoint, -+ FANCURVE_ATTR_GPU_HYST, 9); -+// IC temp -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp, autopoint, -+ FANCURVE_ATTR_IC_TEMP, 9); -+// IC temp hyst -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp_hyst, autopoint, -+ FANCURVE_ATTR_IC_HYST, 9); -+// accel -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_accel, autopoint, -+ FANCURVE_ATTR_ACCEL, 9); -+// decel -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 0); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 1); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 2); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 3); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 4); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 5); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 6); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 7); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 8); -+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_decel, autopoint, -+ FANCURVE_ATTR_DECEL, 9); -+//size -+static SENSOR_DEVICE_ATTR_2_RW(auto_points_size, autopoint, FANCURVE_SIZE, 0); -+ -+static ssize_t minifancurve_show(struct device *dev, -+ struct device_attribute *devattr, char *buf) -+{ -+ bool value; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = ec_read_minifancurve(&priv->ecram, priv->conf, &value); -+ if (err) { -+ err = -1; -+ pr_info("Failed to read minifancurve\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return sprintf(buf, "%d\n", value); -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+ return -1; -+} -+ -+static ssize_t minifancurve_store(struct device *dev, -+ struct device_attribute *devattr, -+ const char *buf, size_t count) -+{ -+ int value; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ err = kstrtoint(buf, 0, &value); -+ if (err) { -+ err = -1; -+ pr_info("Parsing hwmon store failed: error:%d\n", -+ err); -+ goto error; -+ } -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = ec_write_minifancurve(&priv->ecram, priv->conf, value); -+ if (err) { -+ err = -1; -+ pr_info("Failed to write minifancurve\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return count; -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+error: -+ return err; -+} -+ -+static SENSOR_DEVICE_ATTR_RW(minifancurve, minifancurve, 0); -+ -+static ssize_t pwm1_mode_show(struct device *dev, -+ struct device_attribute *devattr, char *buf) -+{ -+ bool value; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = ec_read_fanfullspeed(&priv->ecram, priv->conf, &value); -+ if (err) { -+ err = -1; -+ pr_info("Failed to pwm1_mode/maximumfanspeed\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return sprintf(buf, "%d\n", value ? 0 : 2); -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+ return -1; -+} -+ -+// TODO: remove? or use WMI method? -+static ssize_t pwm1_mode_store(struct device *dev, -+ struct device_attribute *devattr, -+ const char *buf, size_t count) -+{ -+ int value; -+ int is_maximumfanspeed; -+ int err; -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ err = kstrtoint(buf, 0, &value); -+ if (err) { -+ err = -1; -+ pr_info("Parsing hwmon store failed: error:%d\n", -+ err); -+ goto error; -+ } -+ is_maximumfanspeed = value == 0; -+ -+ mutex_lock(&priv->fancurve_mutex); -+ err = ec_write_fanfullspeed(&priv->ecram, priv->conf, -+ is_maximumfanspeed); -+ if (err) { -+ err = -1; -+ pr_info("Failed to write pwm1_mode/maximumfanspeed\n"); -+ goto error_unlock; -+ } -+ mutex_unlock(&priv->fancurve_mutex); -+ return count; -+ -+error_unlock: -+ mutex_unlock(&priv->fancurve_mutex); -+error: -+ return err; -+} -+ -+static SENSOR_DEVICE_ATTR_RW(pwm1_mode, pwm1_mode, 0); -+ -+static struct attribute *fancurve_hwmon_attributes[] = { -+ &sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point3_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point4_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point5_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point6_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point7_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point8_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point9_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point10_pwm.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point1_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point3_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point4_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point5_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point6_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point7_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point8_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point9_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point10_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point1_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point2_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point3_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point4_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point5_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point6_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point7_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point8_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point9_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm2_auto_point10_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point1_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point3_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point4_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point5_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point6_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point7_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point8_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point9_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point10_temp.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point1_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point2_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point3_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point4_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point5_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point6_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point7_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point8_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point9_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm3_auto_point10_temp_hyst.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_accel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point1_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point2_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point3_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point4_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point5_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point6_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point7_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point8_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point9_decel.dev_attr.attr, -+ &sensor_dev_attr_pwm1_auto_point10_decel.dev_attr.attr, -+ // -+ &sensor_dev_attr_auto_points_size.dev_attr.attr, -+ &sensor_dev_attr_minifancurve.dev_attr.attr, -+ &sensor_dev_attr_pwm1_mode.dev_attr.attr, NULL -+}; -+ -+static umode_t legion_hwmon_is_visible(struct kobject *kobj, -+ struct attribute *attr, int idx) -+{ -+ bool supported = true; -+ struct device *dev = kobj_to_dev(kobj); -+ struct legion_private *priv = dev_get_drvdata(dev); -+ -+ if (attr == &sensor_dev_attr_minifancurve.dev_attr.attr) -+ supported = priv->conf->has_minifancurve; -+ -+ supported = supported && (priv->conf->access_method_fancurve != -+ ACCESS_METHOD_NO_ACCESS); -+ -+ return supported ? attr->mode : 0; -+} -+ -+static const struct attribute_group legion_hwmon_sensor_group = { -+ .attrs = sensor_hwmon_attributes, -+ .is_visible = NULL -+}; -+ -+static const struct attribute_group legion_hwmon_fancurve_group = { -+ .attrs = fancurve_hwmon_attributes, -+ .is_visible = legion_hwmon_is_visible, -+}; -+ -+static const struct attribute_group *legion_hwmon_groups[] = { -+ &legion_hwmon_sensor_group, &legion_hwmon_fancurve_group, NULL -+}; -+ -+static ssize_t legion_hwmon_init(struct legion_private *priv) -+{ -+ //TODO: use hwmon_device_register_with_groups or -+ // hwmon_device_register_with_info (latter means all hwmon functions have to be -+ // changed) -+ // some laptop driver do it in one way, some in the other -+ // TODO: Use devm_hwmon_device_register_with_groups ? -+ // some laptop drivers use this, some -+ struct device *hwmon_dev = hwmon_device_register_with_groups( -+ &priv->platform_device->dev, "legion_hwmon", priv, -+ legion_hwmon_groups); -+ if (IS_ERR_OR_NULL(hwmon_dev)) { -+ pr_err("hwmon_device_register failed!\n"); -+ return PTR_ERR(hwmon_dev); -+ } -+ dev_set_drvdata(hwmon_dev, priv); -+ priv->hwmon_dev = hwmon_dev; -+ return 0; -+} -+ -+static void legion_hwmon_exit(struct legion_private *priv) -+{ -+ pr_info("Unloading legion hwon\n"); -+ if (priv->hwmon_dev) { -+ hwmon_device_unregister(priv->hwmon_dev); -+ priv->hwmon_dev = NULL; -+ } -+ pr_info("Unloading legion hwon done\n"); -+} -+ -+/* ACPI*/ -+ -+static int acpi_init(struct legion_private *priv, struct acpi_device *adev) -+{ -+ int err; -+ unsigned long cfg; -+ bool skip_acpi_sta_check; -+ struct device *dev = &priv->platform_device->dev; -+ -+ priv->adev = adev; -+ if (!priv->adev) { -+ dev_info(dev, "Could not get ACPI handle\n"); -+ goto err_acpi_init; -+ } -+ -+ skip_acpi_sta_check = force || (!priv->conf->acpi_check_dev); -+ if (!skip_acpi_sta_check) { -+ err = eval_int(priv->adev->handle, "_STA", &cfg); -+ if (err) { -+ dev_info(dev, "Could not evaluate ACPI _STA\n"); -+ goto err_acpi_init; -+ } -+ -+ err = eval_int(priv->adev->handle, "VPC0._CFG", &cfg); -+ if (err) { -+ dev_info(dev, "Could not evaluate ACPI _CFG\n"); -+ goto err_acpi_init; -+ } -+ dev_info(dev, "ACPI CFG: %lu\n", cfg); -+ } else { -+ dev_info(dev, "Skipping ACPI _STA check"); -+ } -+ -+ return 0; -+ -+err_acpi_init: -+ return err; -+} -+ -+/* ============================= */ -+/* White Keyboard Backlight */ -+/* ============================ */ -+// In style of ideapad-driver and with code modified from ideapad-driver. -+ -+static enum led_brightness -+legion_kbd_bl_led_cdev_brightness_get(struct led_classdev *led_cdev) -+{ -+ struct legion_private *priv = -+ container_of(led_cdev, struct legion_private, kbd_bl.led); -+ -+ return legion_kbd_bl_brightness_get(priv); -+} -+ -+static int legion_kbd_bl_led_cdev_brightness_set(struct led_classdev *led_cdev, -+ enum led_brightness brightness) -+{ -+ struct legion_private *priv = -+ container_of(led_cdev, struct legion_private, kbd_bl.led); -+ -+ return legion_kbd_bl_brightness_set(priv, brightness); -+} -+ -+static int legion_kbd_bl_init(struct legion_private *priv) -+{ -+ int brightness, err; -+ -+ if (WARN_ON(priv->kbd_bl.initialized)) { -+ pr_info("Keyboard backlight already initialized\n"); -+ return -EEXIST; -+ } -+ -+ if (priv->conf->access_method_keyboard == ACCESS_METHOD_NO_ACCESS) { -+ pr_info("Keyboard backlight handling disabled by this driver\n"); -+ return -ENODEV; -+ } -+ -+ brightness = legion_kbd_bl_brightness_get(priv); -+ if (brightness < 0) { -+ pr_info("Error reading keyboard brightness\n"); -+ return brightness; -+ } -+ -+ priv->kbd_bl.last_brightness = brightness; -+ -+ // will be renamed to "platform::kbd_backlight_1" if it exists already -+ priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT; -+ priv->kbd_bl.led.max_brightness = 2; -+ priv->kbd_bl.led.brightness_get = legion_kbd_bl_led_cdev_brightness_get; -+ priv->kbd_bl.led.brightness_set_blocking = -+ legion_kbd_bl_led_cdev_brightness_set; -+ priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED; -+ -+ err = led_classdev_register(&priv->platform_device->dev, -+ &priv->kbd_bl.led); -+ if (err) -+ return err; -+ -+ priv->kbd_bl.initialized = true; -+ -+ return 0; -+} -+ -+/** -+ * Deinit keyboard backlight. -+ * -+ * Can also be called if init was not successful. -+ * -+ */ -+static void legion_kbd_bl_exit(struct legion_private *priv) -+{ -+ if (!priv->kbd_bl.initialized) -+ return; -+ -+ priv->kbd_bl.initialized = false; -+ -+ led_classdev_unregister(&priv->kbd_bl.led); -+} -+ -+/* ============================= */ -+/* Additional light driver */ -+/* ============================ */ -+ -+static enum led_brightness -+legion_wmi_cdev_brightness_get(struct led_classdev *led_cdev) -+{ -+ struct legion_private *priv = -+ container_of(led_cdev, struct legion_private, kbd_bl.led); -+ struct light *light_ins = container_of(led_cdev, struct light, led); -+ -+ return legion_wmi_light_get(priv, light_ins->light_id, -+ light_ins->lower_limit, -+ light_ins->upper_limit); -+} -+ -+static int legion_wmi_cdev_brightness_set(struct led_classdev *led_cdev, -+ enum led_brightness brightness) -+{ -+ struct legion_private *priv = -+ container_of(led_cdev, struct legion_private, kbd_bl.led); -+ struct light *light_ins = container_of(led_cdev, struct light, led); -+ -+ return legion_wmi_light_set(priv, light_ins->light_id, -+ light_ins->lower_limit, -+ light_ins->upper_limit, brightness); -+} -+ -+static int legion_light_init(struct legion_private *priv, -+ struct light *light_ins, u8 light_id, -+ u8 lower_limit, u8 upper_limit, const char *name) -+{ -+ int brightness, err; -+ -+ if (WARN_ON(light_ins->initialized)) { -+ pr_info("Light already initialized for light: %u\n", -+ light_ins->light_id); -+ return -EEXIST; -+ } -+ -+ light_ins->light_id = light_id; -+ light_ins->lower_limit = lower_limit; -+ light_ins->upper_limit = upper_limit; -+ -+ brightness = legion_wmi_light_get(priv, light_ins->light_id, -+ light_ins->lower_limit, -+ light_ins->upper_limit); -+ if (brightness < 0) { -+ pr_info("Error reading brightness for light: %u\n", -+ light_ins->light_id); -+ return brightness; -+ } -+ -+ light_ins->led.name = name; -+ light_ins->led.max_brightness = -+ light_ins->upper_limit - light_ins->lower_limit; -+ light_ins->led.brightness_get = legion_wmi_cdev_brightness_get; -+ light_ins->led.brightness_set_blocking = legion_wmi_cdev_brightness_set; -+ light_ins->led.flags = LED_BRIGHT_HW_CHANGED; -+ -+ err = led_classdev_register(&priv->platform_device->dev, -+ &light_ins->led); -+ if (err) -+ return err; -+ -+ light_ins->initialized = true; -+ -+ return 0; -+} -+ -+/** -+ * Deinit light. -+ * -+ * Can also be called if init was not successful. -+ * -+ */ -+static void legion_light_exit(struct legion_private *priv, -+ struct light *light_ins) -+{ -+ if (!light_ins->initialized) -+ return; -+ -+ light_ins->initialized = false; -+ -+ led_classdev_unregister(&light_ins->led); -+} -+ -+/* ============================= */ -+/* Platform driver */ -+/* ============================ */ -+ -+static int legion_add(struct platform_device *pdev) -+{ -+ struct legion_private *priv; -+ const struct dmi_system_id *dmi_sys; -+ int err; -+ u16 ec_read_id; -+ bool skip_ec_id_check; -+ bool is_ec_id_valid; -+ bool is_denied = true; -+ bool is_allowed = false; -+ bool do_load_by_list = false; -+ bool do_load = false; -+ //struct legion_private *priv = dev_get_drvdata(&pdev->dev); -+ dev_info(&pdev->dev, "legion_laptop platform driver probing\n"); -+ -+ dev_info( -+ &pdev->dev, -+ "Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n", -+ dmi_get_system_info(DMI_SYS_VENDOR), -+ dmi_get_system_info(DMI_PRODUCT_NAME), -+ dmi_get_system_info(DMI_BIOS_VERSION)); -+ -+ // TODO: allocate? -+ priv = &_priv; -+ priv->platform_device = pdev; -+ err = legion_shared_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "legion_laptop is forced to load.\n"); -+ goto err_legion_shared_init; -+ } -+ dev_set_drvdata(&pdev->dev, priv); -+ -+ // TODO: remove -+ pr_info("Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n", -+ dmi_get_system_info(DMI_SYS_VENDOR), -+ dmi_get_system_info(DMI_PRODUCT_NAME), -+ dmi_get_system_info(DMI_BIOS_VERSION)); -+ -+ dmi_sys = dmi_first_match(optimistic_allowlist); -+ is_allowed = dmi_sys != NULL; -+ is_denied = dmi_check_system(denylist); -+ do_load_by_list = is_allowed && !is_denied; -+ do_load = do_load_by_list || force; -+ -+ dev_info( -+ &pdev->dev, -+ "is_denied: %d; is_allowed: %d; do_load_by_list: %d; do_load: %d\n", -+ is_denied, is_allowed, do_load_by_list, do_load); -+ -+ if (!(do_load)) { -+ dev_info( -+ &pdev->dev, -+ "Module not usable for this laptop because it is not in allowlist. Notify the maintainer if you want to add your device or force load with param force.\n"); -+ err = -ENOMEM; -+ goto err_model_mismtach; -+ } -+ -+ if (force) -+ dev_info(&pdev->dev, "legion_laptop is forced to load.\n"); -+ -+ if (!do_load_by_list && do_load) { -+ dev_info( -+ &pdev->dev, -+ "legion_laptop is forced to load and would otherwise not be loaded\n"); -+ } -+ -+ // if forced and no module found, use config for first model -+ if (dmi_sys == NULL) -+ dmi_sys = &optimistic_allowlist[0]; -+ dev_info(&pdev->dev, "Using configuration for system: %s\n", -+ dmi_sys->ident); -+ -+ priv->conf = dmi_sys->driver_data; -+ -+ err = acpi_init(priv, ACPI_COMPANION(&pdev->dev)); -+ if (err) { -+ dev_info(&pdev->dev, "Could not init ACPI access: %d\n", err); -+ goto err_acpi_init; -+ } -+ -+ // TODO: remove; only used for reverse engineering -+ pr_info("Creating RAM access to embedded controller\n"); -+ err = ecram_memoryio_init(&priv->ec_memoryio, -+ priv->conf->ramio_physical_start, 0, -+ priv->conf->ramio_size); -+ if (err) { -+ dev_info( -+ &pdev->dev, -+ "Could not init RAM access to embedded controller: %d\n", -+ err); -+ goto err_ecram_memoryio_init; -+ } -+ -+ err = ecram_init(&priv->ecram, priv->conf->memoryio_physical_ec_start, -+ priv->conf->memoryio_size); -+ if (err) { -+ dev_info(&pdev->dev, -+ "Could not init access to embedded controller: %d\n", -+ err); -+ goto err_ecram_init; -+ } -+ -+ ec_read_id = read_ec_id(&priv->ecram, priv->conf); -+ dev_info(&pdev->dev, "Read embedded controller ID 0x%x\n", ec_read_id); -+ skip_ec_id_check = force || (!priv->conf->check_embedded_controller_id); -+ is_ec_id_valid = skip_ec_id_check || -+ (ec_read_id == priv->conf->embedded_controller_id); -+ if (!is_ec_id_valid) { -+ err = -ENOMEM; -+ dev_info(&pdev->dev, "Expected EC chip id 0x%x but read 0x%x\n", -+ priv->conf->embedded_controller_id, ec_read_id); -+ goto err_ecram_id; -+ } -+ if (skip_ec_id_check) { -+ dev_info(&pdev->dev, -+ "Skipped checking embedded controller id\n"); -+ } -+ -+ dev_info(&pdev->dev, "Creating debugfs interface\n"); -+ legion_debugfs_init(priv); -+ -+ pr_info("Creating sysfs interface\n"); -+ err = legion_sysfs_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "Failed to create sysfs interface: %d\n", -+ err); -+ goto err_sysfs_init; -+ } -+ -+ pr_info("Creating hwmon interface"); -+ err = legion_hwmon_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "Failed to create hwmon interface: %d\n", -+ err); -+ goto err_hwmon_init; -+ } -+ -+ pr_info("Creating platform profile support\n"); -+ err = legion_platform_profile_init(priv); -+ if (err) { -+ dev_info(&pdev->dev, "Failed to create platform profile: %d\n", -+ err); -+ goto err_platform_profile; -+ } -+ -+ pr_info("Init WMI driver support\n"); -+ err = legion_wmi_init(); -+ if (err) { -+ dev_info(&pdev->dev, "Failed to init WMI driver: %d\n", err); -+ goto err_wmi; -+ } -+ -+ pr_info("Init keyboard backlight LED driver\n"); -+ err = legion_kbd_bl_init(priv); -+ if (err) { -+ dev_info( -+ &pdev->dev, -+ "Failed to init keyboard backlight LED driver. Skipping ...\n"); -+ } -+ -+ pr_info("Init Y-Logo LED driver\n"); -+ err = legion_light_init(priv, &priv->ylogo_light, LIGHT_ID_YLOGO, 0, 1, -+ "platform::ylogo"); -+ if (err) { -+ dev_info(&pdev->dev, -+ "Failed to init Y-Logo LED driver. Skipping ...\n"); -+ } -+ -+ pr_info("Init IO-Port LED driver\n"); -+ err = legion_light_init(priv, &priv->iport_light, LIGHT_ID_IOPORT, 1, 2, -+ "platform::ioport"); -+ if (err) { -+ dev_info(&pdev->dev, -+ "Failed to init IO-Port LED driver. Skipping ...\n"); -+ } -+ -+ dev_info(&pdev->dev, "legion_laptop loaded for this device\n"); -+ return 0; -+ -+ // TODO: remove eventually -+ legion_light_exit(priv, &priv->iport_light); -+ legion_light_exit(priv, &priv->ylogo_light); -+ legion_kbd_bl_exit(priv); -+ legion_wmi_exit(); -+err_wmi: -+ legion_platform_profile_exit(priv); -+err_platform_profile: -+ legion_hwmon_exit(priv); -+err_hwmon_init: -+ legion_sysfs_exit(priv); -+err_sysfs_init: -+ legion_debugfs_exit(priv); -+err_ecram_id: -+ ecram_exit(&priv->ecram); -+err_ecram_init: -+ ecram_memoryio_exit(&priv->ec_memoryio); -+err_ecram_memoryio_init: -+err_acpi_init: -+ legion_shared_exit(priv); -+err_legion_shared_init: -+err_model_mismtach: -+ dev_info(&pdev->dev, "legion_laptop not loaded for this device\n"); -+ return err; -+} -+ -+static int legion_remove(struct platform_device *pdev) -+{ -+ struct legion_private *priv = dev_get_drvdata(&pdev->dev); -+ -+ mutex_lock(&legion_shared_mutex); -+ priv->loaded = false; -+ mutex_unlock(&legion_shared_mutex); -+ -+ legion_light_exit(priv, &priv->iport_light); -+ legion_light_exit(priv, &priv->ylogo_light); -+ legion_kbd_bl_exit(priv); -+ // first unregister wmi, so toggling powermode does not -+ // generate events anymore that even might be delayed -+ legion_wmi_exit(); -+ legion_platform_profile_exit(priv); -+ -+ // toggle power mode to load default setting from embedded controller -+ // again -+ toggle_powermode(priv); -+ -+ legion_hwmon_exit(priv); -+ legion_sysfs_exit(priv); -+ legion_debugfs_exit(priv); -+ ecram_exit(&priv->ecram); -+ ecram_memoryio_exit(&priv->ec_memoryio); -+ legion_shared_exit(priv); -+ -+ pr_info("Legion platform unloaded\n"); -+ return 0; -+} -+ -+static int legion_resume(struct platform_device *pdev) -+{ -+ //struct legion_private *priv = dev_get_drvdata(&pdev->dev); -+ dev_info(&pdev->dev, "Resumed in legion-laptop\n"); -+ -+ return 0; -+} -+ -+#ifdef CONFIG_PM_SLEEP -+static int legion_pm_resume(struct device *dev) -+{ -+ //struct legion_private *priv = dev_get_drvdata(dev); -+ dev_info(dev, "Resumed PM in legion-laptop\n"); -+ -+ return 0; -+} -+#endif -+static SIMPLE_DEV_PM_OPS(legion_pm, NULL, legion_pm_resume); -+ -+// same as ideapad -+static const struct acpi_device_id legion_device_ids[] = { -+ // todo: change to "VPC2004", and also ACPI paths -+ { "PNP0C09", 0 }, -+ { "", 0 }, -+}; -+MODULE_DEVICE_TABLE(acpi, legion_device_ids); -+ -+static struct platform_driver legion_driver = { -+ .probe = legion_add, -+ .remove = legion_remove, -+ .resume = legion_resume, -+ .driver = { -+ .name = "legion", -+ .pm = &legion_pm, -+ .acpi_match_table = ACPI_PTR(legion_device_ids), -+ }, -+}; -+ -+static int __init legion_init(void) -+{ -+ int err; -+ -+ pr_info("Loading legion_laptop\n"); -+ err = platform_driver_register(&legion_driver); -+ if (err) { -+ pr_info("legion_laptop: platform_driver_register failed\n"); -+ return err; -+ } -+ -+ return 0; -+} -+ -+module_init(legion_init); -+ -+static void __exit legion_exit(void) -+{ -+ platform_driver_unregister(&legion_driver); -+ pr_info("legion_laptop exit\n"); -+} -+ -+module_exit(legion_exit); --- -2.43.2 diff --git a/patches/nobara/linux-surface.patch b/patches/nobara/linux-surface.patch deleted file mode 100644 index 3378feb..0000000 --- a/patches/nobara/linux-surface.patch +++ /dev/null @@ -1,9117 +0,0 @@ -From da55b6ffe4a98a4af6ced4074317ba9d026f84dd Mon Sep 17 00:00:00 2001 -From: Tsuchiya Yuto -Date: Sun, 18 Oct 2020 16:42:44 +0900 -Subject: [PATCH] (surface3-oemb) add DMI matches for Surface 3 with broken DMI - table - -On some Surface 3, the DMI table gets corrupted for unknown reasons -and breaks existing DMI matching used for device-specific quirks. - -This commit adds the (broken) DMI data into dmi_system_id tables used -for quirks so that each driver can enable quirks even on the affected -systems. - -On affected systems, DMI data will look like this: - $ grep . /sys/devices/virtual/dmi/id/{bios_vendor,board_name,board_vendor,\ - chassis_vendor,product_name,sys_vendor} - /sys/devices/virtual/dmi/id/bios_vendor:American Megatrends Inc. - /sys/devices/virtual/dmi/id/board_name:OEMB - /sys/devices/virtual/dmi/id/board_vendor:OEMB - /sys/devices/virtual/dmi/id/chassis_vendor:OEMB - /sys/devices/virtual/dmi/id/product_name:OEMB - /sys/devices/virtual/dmi/id/sys_vendor:OEMB - -Expected: - $ grep . /sys/devices/virtual/dmi/id/{bios_vendor,board_name,board_vendor,\ - chassis_vendor,product_name,sys_vendor} - /sys/devices/virtual/dmi/id/bios_vendor:American Megatrends Inc. - /sys/devices/virtual/dmi/id/board_name:Surface 3 - /sys/devices/virtual/dmi/id/board_vendor:Microsoft Corporation - /sys/devices/virtual/dmi/id/chassis_vendor:Microsoft Corporation - /sys/devices/virtual/dmi/id/product_name:Surface 3 - /sys/devices/virtual/dmi/id/sys_vendor:Microsoft Corporation - -Signed-off-by: Tsuchiya Yuto -Patchset: surface3-oemb ---- - drivers/platform/surface/surface3-wmi.c | 7 +++++++ - sound/soc/codecs/rt5645.c | 9 +++++++++ - sound/soc/intel/common/soc-acpi-intel-cht-match.c | 8 ++++++++ - 3 files changed, 24 insertions(+) - -diff --git a/drivers/platform/surface/surface3-wmi.c b/drivers/platform/surface/surface3-wmi.c -index ca4602bcc7dea..490b9731068ae 100644 ---- a/drivers/platform/surface/surface3-wmi.c -+++ b/drivers/platform/surface/surface3-wmi.c -@@ -37,6 +37,13 @@ static const struct dmi_system_id surface3_dmi_table[] = { - DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"), - }, - }, -+ { -+ .matches = { -+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), -+ DMI_MATCH(DMI_SYS_VENDOR, "OEMB"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"), -+ }, -+ }, - #endif - { } - }; -diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c -index 7938b52d741d8..2d5f83b0cdb0b 100644 ---- a/sound/soc/codecs/rt5645.c -+++ b/sound/soc/codecs/rt5645.c -@@ -3746,6 +3746,15 @@ static const struct dmi_system_id dmi_platform_data[] = { - }, - .driver_data = (void *)&intel_braswell_platform_data, - }, -+ { -+ .ident = "Microsoft Surface 3", -+ .matches = { -+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), -+ DMI_MATCH(DMI_SYS_VENDOR, "OEMB"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"), -+ }, -+ .driver_data = (void *)&intel_braswell_platform_data, -+ }, - { - /* - * Match for the GPDwin which unfortunately uses somewhat -diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c -index cdcbf04b8832f..958305779b125 100644 ---- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c -+++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c -@@ -27,6 +27,14 @@ static const struct dmi_system_id cht_table[] = { - DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"), - }, - }, -+ { -+ .callback = cht_surface_quirk_cb, -+ .matches = { -+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), -+ DMI_MATCH(DMI_SYS_VENDOR, "OEMB"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"), -+ }, -+ }, - { } - }; - --- -2.42.0 - -From 35b3c5195c9fc191de6b5a6e4361762aa37edad2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= -Date: Tue, 3 Nov 2020 13:28:04 +0100 -Subject: [PATCH] mwifiex: Add quirk resetting the PCI bridge on MS Surface - devices - -The most recent firmware of the 88W8897 card reports a hardcoded LTR -value to the system during initialization, probably as an (unsuccessful) -attempt of the developers to fix firmware crashes. This LTR value -prevents most of the Microsoft Surface devices from entering deep -powersaving states (either platform C-State 10 or S0ix state), because -the exit latency of that state would be higher than what the card can -tolerate. - -Turns out the card works just the same (including the firmware crashes) -no matter if that hardcoded LTR value is reported or not, so it's kind -of useless and only prevents us from saving power. - -To get rid of those hardcoded LTR reports, it's possible to reset the -PCI bridge device after initializing the cards firmware. I'm not exactly -sure why that works, maybe the power management subsystem of the PCH -resets its stored LTR values when doing a function level reset of the -bridge device. Doing the reset once after starting the wifi firmware -works very well, probably because the firmware only reports that LTR -value a single time during firmware startup. - -Patchset: mwifiex ---- - drivers/net/wireless/marvell/mwifiex/pcie.c | 12 +++++++++ - .../wireless/marvell/mwifiex/pcie_quirks.c | 26 +++++++++++++------ - .../wireless/marvell/mwifiex/pcie_quirks.h | 1 + - 3 files changed, 31 insertions(+), 8 deletions(-) - -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c -index 6697132ecc977..f06b4ebc5bd8e 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie.c -+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c -@@ -1771,9 +1771,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb) - static int mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter) - { - struct pcie_service_card *card = adapter->card; -+ struct pci_dev *pdev = card->dev; -+ struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); - const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; - int tx_wrap = card->txbd_wrptr & reg->tx_wrap_mask; - -+ /* Trigger a function level reset of the PCI bridge device, this makes -+ * the firmware of PCIe 88W8897 cards stop reporting a fixed LTR value -+ * that prevents the system from entering package C10 and S0ix powersaving -+ * states. -+ * We need to do it here because it must happen after firmware -+ * initialization and this function is called after that is done. -+ */ -+ if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE) -+ pci_reset_function(parent_pdev); -+ - /* Write the RX ring read pointer in to reg->rx_rdptr */ - if (mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr | - tx_wrap)) { -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -index dd6d21f1dbfd7..f46b06f8d6435 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -@@ -13,7 +13,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Pro 5", -@@ -22,7 +23,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Pro 5 (LTE)", -@@ -31,7 +33,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Pro 6", -@@ -39,7 +42,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Book 1", -@@ -47,7 +51,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Book 2", -@@ -55,7 +60,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Laptop 1", -@@ -63,7 +69,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - { - .ident = "Surface Laptop 2", -@@ -71,7 +78,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"), - }, -- .driver_data = (void *)QUIRK_FW_RST_D3COLD, -+ .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -+ QUIRK_DO_FLR_ON_BRIDGE), - }, - {} - }; -@@ -89,6 +97,8 @@ void mwifiex_initialize_quirks(struct pcie_service_card *card) - dev_info(&pdev->dev, "no quirks enabled\n"); - if (card->quirks & QUIRK_FW_RST_D3COLD) - dev_info(&pdev->dev, "quirk reset_d3cold enabled\n"); -+ if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE) -+ dev_info(&pdev->dev, "quirk do_flr_on_bridge enabled\n"); - } - - static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev) -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -index d6ff964aec5bf..5d30ae39d65ec 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -@@ -4,6 +4,7 @@ - #include "pcie.h" - - #define QUIRK_FW_RST_D3COLD BIT(0) -+#define QUIRK_DO_FLR_ON_BRIDGE BIT(1) - - void mwifiex_initialize_quirks(struct pcie_service_card *card); - int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev); --- -2.42.0 - -From 241da24644ea2f5b8119019448b638aa8df6ab26 Mon Sep 17 00:00:00 2001 -From: Tsuchiya Yuto -Date: Sun, 4 Oct 2020 00:11:49 +0900 -Subject: [PATCH] mwifiex: pcie: disable bridge_d3 for Surface gen4+ - -Currently, mwifiex fw will crash after suspend on recent kernel series. -On Windows, it seems that the root port of wifi will never enter D3 state -(stay on D0 state). And on Linux, disabling the D3 state for the -bridge fixes fw crashing after suspend. - -This commit disables the D3 state of root port on driver initialization -and fixes fw crashing after suspend. - -Signed-off-by: Tsuchiya Yuto -Patchset: mwifiex ---- - drivers/net/wireless/marvell/mwifiex/pcie.c | 7 +++++ - .../wireless/marvell/mwifiex/pcie_quirks.c | 27 +++++++++++++------ - .../wireless/marvell/mwifiex/pcie_quirks.h | 1 + - 3 files changed, 27 insertions(+), 8 deletions(-) - -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c -index f06b4ebc5bd8e..07f13b52ddb92 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie.c -+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c -@@ -370,6 +370,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) - { - struct pcie_service_card *card; -+ struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); - int ret; - - pr_debug("info: vendor=0x%4.04X device=0x%4.04X rev=%d\n", -@@ -411,6 +412,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, - return -1; - } - -+ /* disable bridge_d3 for Surface gen4+ devices to fix fw crashing -+ * after suspend -+ */ -+ if (card->quirks & QUIRK_NO_BRIDGE_D3) -+ parent_pdev->bridge_d3 = false; -+ - return 0; - } - -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -index f46b06f8d6435..99b024ecbadea 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -@@ -14,7 +14,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Pro 5", -@@ -24,7 +25,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Pro 5 (LTE)", -@@ -34,7 +36,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Pro 6", -@@ -43,7 +46,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Book 1", -@@ -52,7 +56,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Book 2", -@@ -61,7 +66,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Laptop 1", -@@ -70,7 +76,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - { - .ident = "Surface Laptop 2", -@@ -79,7 +86,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"), - }, - .driver_data = (void *)(QUIRK_FW_RST_D3COLD | -- QUIRK_DO_FLR_ON_BRIDGE), -+ QUIRK_DO_FLR_ON_BRIDGE | -+ QUIRK_NO_BRIDGE_D3), - }, - {} - }; -@@ -99,6 +107,9 @@ void mwifiex_initialize_quirks(struct pcie_service_card *card) - dev_info(&pdev->dev, "quirk reset_d3cold enabled\n"); - if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE) - dev_info(&pdev->dev, "quirk do_flr_on_bridge enabled\n"); -+ if (card->quirks & QUIRK_NO_BRIDGE_D3) -+ dev_info(&pdev->dev, -+ "quirk no_brigde_d3 enabled\n"); - } - - static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev) -diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -index 5d30ae39d65ec..c14eb56eb9118 100644 ---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -@@ -5,6 +5,7 @@ - - #define QUIRK_FW_RST_D3COLD BIT(0) - #define QUIRK_DO_FLR_ON_BRIDGE BIT(1) -+#define QUIRK_NO_BRIDGE_D3 BIT(2) - - void mwifiex_initialize_quirks(struct pcie_service_card *card); - int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev); --- -2.42.0 - -From d20b58f9e2ccec57c66864e79c291c2618ab2dbe Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= -Date: Thu, 25 Mar 2021 11:33:02 +0100 -Subject: [PATCH] Bluetooth: btusb: Lower passive lescan interval on Marvell - 88W8897 - -The Marvell 88W8897 combined wifi and bluetooth card (pcie+usb version) -is used in a lot of Microsoft Surface devices, and all those devices -suffer from very low 2.4GHz wifi connection speeds while bluetooth is -enabled. The reason for that is that the default passive scanning -interval for Bluetooth Low Energy devices is quite high in Linux -(interval of 60 msec and scan window of 30 msec, see hci_core.c), and -the Marvell chip is known for its bad bt+wifi coexisting performance. - -So decrease that passive scan interval and make the scan window shorter -on this particular device to allow for spending more time transmitting -wifi signals: The new scan interval is 250 msec (0x190 * 0.625 msec) and -the new scan window is 6.25 msec (0xa * 0,625 msec). - -This change has a very large impact on the 2.4GHz wifi speeds and gets -it up to performance comparable with the Windows driver, which seems to -apply a similar quirk. - -The interval and window length were tested and found to work very well -with a lot of Bluetooth Low Energy devices, including the Surface Pen, a -Bluetooth Speaker and two modern Bluetooth headphones. All devices were -discovered immediately after turning them on. Even lower values were -also tested, but they introduced longer delays until devices get -discovered. - -Patchset: mwifiex ---- - drivers/bluetooth/btusb.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 499f4809fcdf3..2d442e080ca28 100644 ---- a/drivers/bluetooth/btusb.c -+++ b/drivers/bluetooth/btusb.c -@@ -65,6 +65,7 @@ static struct usb_driver btusb_driver; - #define BTUSB_INTEL_BROKEN_INITIAL_NCMD BIT(25) - #define BTUSB_INTEL_NO_WBS_SUPPORT BIT(26) - #define BTUSB_ACTIONS_SEMI BIT(27) -+#define BTUSB_LOWER_LESCAN_INTERVAL BIT(28) - - static const struct usb_device_id btusb_table[] = { - /* Generic Bluetooth USB device */ -@@ -468,6 +469,7 @@ static const struct usb_device_id quirks_table[] = { - { USB_DEVICE(0x1286, 0x2044), .driver_info = BTUSB_MARVELL }, - { USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL }, - { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL }, -+ { USB_DEVICE(0x1286, 0x204c), .driver_info = BTUSB_LOWER_LESCAN_INTERVAL }, - - /* Intel Bluetooth devices */ - { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED }, -@@ -4388,6 +4390,19 @@ static int btusb_probe(struct usb_interface *intf, - if (id->driver_info & BTUSB_MARVELL) - hdev->set_bdaddr = btusb_set_bdaddr_marvell; - -+ /* The Marvell 88W8897 combined wifi and bluetooth card is known for -+ * very bad bt+wifi coexisting performance. -+ * -+ * Decrease the passive BT Low Energy scan interval a bit -+ * (0x0190 * 0.625 msec = 250 msec) and make the scan window shorter -+ * (0x000a * 0,625 msec = 6.25 msec). This allows for significantly -+ * higher wifi throughput while passively scanning for BT LE devices. -+ */ -+ if (id->driver_info & BTUSB_LOWER_LESCAN_INTERVAL) { -+ hdev->le_scan_interval = 0x0190; -+ hdev->le_scan_window = 0x000a; -+ } -+ - if (IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) && - (id->driver_info & BTUSB_MEDIATEK)) { - hdev->setup = btusb_mtk_setup; --- -2.42.0 - -From c6f0985fae241ed43ea1245c9e5861e2c728e21e Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sat, 27 Feb 2021 00:45:52 +0100 -Subject: [PATCH] ath10k: Add module parameters to override board files - -Some Surface devices, specifically the Surface Go and AMD version of the -Surface Laptop 3 (wich both come with QCA6174 WiFi chips), work better -with a different board file, as it seems that the firmeware included -upstream is buggy. - -As it is generally not a good idea to randomly overwrite files, let -alone doing so via packages, we add module parameters to override those -file names in the driver. This allows us to package/deploy the override -via a modprobe.d config. - -Signed-off-by: Maximilian Luz -Patchset: ath10k ---- - drivers/net/wireless/ath/ath10k/core.c | 58 ++++++++++++++++++++++++++ - 1 file changed, 58 insertions(+) - -diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c -index 6cdb225b7eacc..19c036751fb16 100644 ---- a/drivers/net/wireless/ath/ath10k/core.c -+++ b/drivers/net/wireless/ath/ath10k/core.c -@@ -38,6 +38,9 @@ static bool fw_diag_log; - /* frame mode values are mapped as per enum ath10k_hw_txrx_mode */ - unsigned int ath10k_frame_mode = ATH10K_HW_TXRX_NATIVE_WIFI; - -+static char *override_board = ""; -+static char *override_board2 = ""; -+ - unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) | - BIT(ATH10K_FW_CRASH_DUMP_CE_DATA); - -@@ -50,6 +53,9 @@ module_param(fw_diag_log, bool, 0644); - module_param_named(frame_mode, ath10k_frame_mode, uint, 0644); - module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444); - -+module_param(override_board, charp, 0644); -+module_param(override_board2, charp, 0644); -+ - MODULE_PARM_DESC(debug_mask, "Debugging mask"); - MODULE_PARM_DESC(uart_print, "Uart target debugging"); - MODULE_PARM_DESC(skip_otp, "Skip otp failure for calibration in testmode"); -@@ -59,6 +65,9 @@ MODULE_PARM_DESC(frame_mode, - MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file"); - MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging"); - -+MODULE_PARM_DESC(override_board, "Override for board.bin file"); -+MODULE_PARM_DESC(override_board2, "Override for board-2.bin file"); -+ - static const struct ath10k_hw_params ath10k_hw_params_list[] = { - { - .id = QCA988X_HW_2_0_VERSION, -@@ -911,6 +920,42 @@ static int ath10k_init_configure_target(struct ath10k *ar) - return 0; - } - -+static const char *ath10k_override_board_fw_file(struct ath10k *ar, -+ const char *file) -+{ -+ if (strcmp(file, "board.bin") == 0) { -+ if (strcmp(override_board, "") == 0) -+ return file; -+ -+ if (strcmp(override_board, "none") == 0) { -+ dev_info(ar->dev, "firmware override: pretending 'board.bin' does not exist\n"); -+ return NULL; -+ } -+ -+ dev_info(ar->dev, "firmware override: replacing 'board.bin' with '%s'\n", -+ override_board); -+ -+ return override_board; -+ } -+ -+ if (strcmp(file, "board-2.bin") == 0) { -+ if (strcmp(override_board2, "") == 0) -+ return file; -+ -+ if (strcmp(override_board2, "none") == 0) { -+ dev_info(ar->dev, "firmware override: pretending 'board-2.bin' does not exist\n"); -+ return NULL; -+ } -+ -+ dev_info(ar->dev, "firmware override: replacing 'board-2.bin' with '%s'\n", -+ override_board2); -+ -+ return override_board2; -+ } -+ -+ return file; -+} -+ - static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar, - const char *dir, - const char *file) -@@ -925,6 +970,19 @@ static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar, - if (dir == NULL) - dir = "."; - -+ /* HACK: Override board.bin and board-2.bin files if specified. -+ * -+ * Some Surface devices perform better with a different board -+ * configuration. To this end, one would need to replace the board.bin -+ * file with the modified config and remove the board-2.bin file. -+ * Unfortunately, that's not a solution that we can easily package. So -+ * we add module options to perform these overrides here. -+ */ -+ -+ file = ath10k_override_board_fw_file(ar, file); -+ if (!file) -+ return ERR_PTR(-ENOENT); -+ - snprintf(filename, sizeof(filename), "%s/%s", dir, file); - ret = firmware_request_nowarn(&fw, filename, ar->dev); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", --- -2.42.0 - -From 986fe56f682f93925b2964f59fe78c7043758e47 Mon Sep 17 00:00:00 2001 -From: Dorian Stoll -Date: Thu, 30 Jul 2020 13:21:53 +0200 -Subject: [PATCH] misc: mei: Add missing IPTS device IDs - -Patchset: ipts ---- - drivers/misc/mei/hw-me-regs.h | 1 + - drivers/misc/mei/pci-me.c | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h -index bdc65d50b945f..08723c01d7275 100644 ---- a/drivers/misc/mei/hw-me-regs.h -+++ b/drivers/misc/mei/hw-me-regs.h -@@ -92,6 +92,7 @@ - #define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ - - #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ -+#define MEI_DEV_ID_ICP_LP_3 0x34E4 /* Ice Lake Point LP 3 (iTouch) */ - #define MEI_DEV_ID_ICP_N 0x38E0 /* Ice Lake Point N */ - - #define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ -diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c -index 676d566f38ddf..6b37dd1f8b2a3 100644 ---- a/drivers/misc/mei/pci-me.c -+++ b/drivers/misc/mei/pci-me.c -@@ -97,6 +97,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, -+ {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP_3, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, --- -2.42.0 - -From 72ee1cbf26ccc575dbfbaee5e7305ab13e1aeb1e Mon Sep 17 00:00:00 2001 -From: Liban Hannan -Date: Tue, 12 Apr 2022 23:31:12 +0100 -Subject: [PATCH] iommu: ipts: use IOMMU passthrough mode for IPTS - -Adds a quirk so that IOMMU uses passthrough mode for the IPTS device. -Otherwise, when IOMMU is enabled, IPTS produces DMAR errors like: - -DMAR: [DMA Read NO_PASID] Request device [00:16.4] fault addr -0x104ea3000 [fault reason 0x06] PTE Read access is not set - -This is very similar to the bug described at: -https://bugs.launchpad.net/bugs/1958004 - -Fixed with the following patch which this patch basically copies: -https://launchpadlibrarian.net/586396847/43255ca.diff -Patchset: ipts ---- - drivers/iommu/intel/iommu.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index 3685ba90ec88e..5a627e081797c 100644 ---- a/drivers/iommu/intel/iommu.c -+++ b/drivers/iommu/intel/iommu.c -@@ -38,6 +38,8 @@ - #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) - #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) - #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) -+#define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL && \ -+ ((pdev)->device == 0x9d3e)) - #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) - - #define IOAPIC_RANGE_START (0xfee00000) -@@ -292,12 +294,14 @@ int intel_iommu_enabled = 0; - EXPORT_SYMBOL_GPL(intel_iommu_enabled); - - static int dmar_map_gfx = 1; -+static int dmar_map_ipts = 1; - static int intel_iommu_superpage = 1; - static int iommu_identity_mapping; - static int iommu_skip_te_disable; - - #define IDENTMAP_GFX 2 - #define IDENTMAP_AZALIA 4 -+#define IDENTMAP_IPTS 16 - - const struct iommu_ops intel_iommu_ops; - -@@ -2542,6 +2546,9 @@ static int device_def_domain_type(struct device *dev) - - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return IOMMU_DOMAIN_IDENTITY; -+ -+ if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev)) -+ return IOMMU_DOMAIN_IDENTITY; - } - - return 0; -@@ -2849,6 +2856,9 @@ static int __init init_dmars(void) - if (!dmar_map_gfx) - iommu_identity_mapping |= IDENTMAP_GFX; - -+ if (!dmar_map_ipts) -+ iommu_identity_mapping |= IDENTMAP_IPTS; -+ - check_tylersburg_isoch(); - - ret = si_domain_init(hw_pass_through); -@@ -4828,6 +4838,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev) - dmar_map_gfx = 0; - } - -+static void quirk_iommu_ipts(struct pci_dev *dev) -+{ -+ if (!IS_IPTS(dev)) -+ return; -+ -+ if (risky_device(dev)) -+ return; -+ -+ pci_info(dev, "Passthrough IOMMU for IPTS\n"); -+ dmar_map_ipts = 0; -+} - /* G4x/GM45 integrated gfx dmar support is totally busted. */ - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); -@@ -4863,6 +4884,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); - -+/* disable IPTS dmar support */ -+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts); -+ - static void quirk_iommu_rwbf(struct pci_dev *dev) - { - if (risky_device(dev)) --- -2.42.0 - -From 8330f9f39ce8c9796259a8aeffe919fa950e18f5 Mon Sep 17 00:00:00 2001 -From: Dorian Stoll -Date: Sun, 11 Dec 2022 12:00:59 +0100 -Subject: [PATCH] hid: Add support for Intel Precise Touch and Stylus - -Based on linux-surface/intel-precise-touch@8abe268 - -Signed-off-by: Dorian Stoll -Patchset: ipts ---- - drivers/hid/Kconfig | 2 + - drivers/hid/Makefile | 2 + - drivers/hid/ipts/Kconfig | 14 + - drivers/hid/ipts/Makefile | 16 ++ - drivers/hid/ipts/cmd.c | 61 +++++ - drivers/hid/ipts/cmd.h | 60 ++++ - drivers/hid/ipts/context.h | 52 ++++ - drivers/hid/ipts/control.c | 486 +++++++++++++++++++++++++++++++++ - drivers/hid/ipts/control.h | 126 +++++++++ - drivers/hid/ipts/desc.h | 80 ++++++ - drivers/hid/ipts/eds1.c | 103 +++++++ - drivers/hid/ipts/eds1.h | 35 +++ - drivers/hid/ipts/eds2.c | 144 ++++++++++ - drivers/hid/ipts/eds2.h | 35 +++ - drivers/hid/ipts/hid.c | 225 +++++++++++++++ - drivers/hid/ipts/hid.h | 24 ++ - drivers/hid/ipts/main.c | 126 +++++++++ - drivers/hid/ipts/mei.c | 188 +++++++++++++ - drivers/hid/ipts/mei.h | 66 +++++ - drivers/hid/ipts/receiver.c | 250 +++++++++++++++++ - drivers/hid/ipts/receiver.h | 16 ++ - drivers/hid/ipts/resources.c | 131 +++++++++ - drivers/hid/ipts/resources.h | 41 +++ - drivers/hid/ipts/spec-data.h | 100 +++++++ - drivers/hid/ipts/spec-device.h | 290 ++++++++++++++++++++ - drivers/hid/ipts/spec-hid.h | 34 +++ - drivers/hid/ipts/thread.c | 84 ++++++ - drivers/hid/ipts/thread.h | 59 ++++ - 28 files changed, 2850 insertions(+) - create mode 100644 drivers/hid/ipts/Kconfig - create mode 100644 drivers/hid/ipts/Makefile - create mode 100644 drivers/hid/ipts/cmd.c - create mode 100644 drivers/hid/ipts/cmd.h - create mode 100644 drivers/hid/ipts/context.h - create mode 100644 drivers/hid/ipts/control.c - create mode 100644 drivers/hid/ipts/control.h - create mode 100644 drivers/hid/ipts/desc.h - create mode 100644 drivers/hid/ipts/eds1.c - create mode 100644 drivers/hid/ipts/eds1.h - create mode 100644 drivers/hid/ipts/eds2.c - create mode 100644 drivers/hid/ipts/eds2.h - create mode 100644 drivers/hid/ipts/hid.c - create mode 100644 drivers/hid/ipts/hid.h - create mode 100644 drivers/hid/ipts/main.c - create mode 100644 drivers/hid/ipts/mei.c - create mode 100644 drivers/hid/ipts/mei.h - create mode 100644 drivers/hid/ipts/receiver.c - create mode 100644 drivers/hid/ipts/receiver.h - create mode 100644 drivers/hid/ipts/resources.c - create mode 100644 drivers/hid/ipts/resources.h - create mode 100644 drivers/hid/ipts/spec-data.h - create mode 100644 drivers/hid/ipts/spec-device.h - create mode 100644 drivers/hid/ipts/spec-hid.h - create mode 100644 drivers/hid/ipts/thread.c - create mode 100644 drivers/hid/ipts/thread.h - -diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 790aa908e2a78..0b9d245d10e54 100644 ---- a/drivers/hid/Kconfig -+++ b/drivers/hid/Kconfig -@@ -1345,4 +1345,6 @@ source "drivers/hid/amd-sfh-hid/Kconfig" - - source "drivers/hid/surface-hid/Kconfig" - -+source "drivers/hid/ipts/Kconfig" -+ - endif # HID_SUPPORT -diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 8a06d0f840bcb..2ef21b257d0b5 100644 ---- a/drivers/hid/Makefile -+++ b/drivers/hid/Makefile -@@ -169,3 +169,5 @@ obj-$(INTEL_ISH_FIRMWARE_DOWNLOADER) += intel-ish-hid/ - obj-$(CONFIG_AMD_SFH_HID) += amd-sfh-hid/ - - obj-$(CONFIG_SURFACE_HID_CORE) += surface-hid/ -+ -+obj-$(CONFIG_HID_IPTS) += ipts/ -diff --git a/drivers/hid/ipts/Kconfig b/drivers/hid/ipts/Kconfig -new file mode 100644 -index 0000000000000..297401bd388dd ---- /dev/null -+++ b/drivers/hid/ipts/Kconfig -@@ -0,0 +1,14 @@ -+# SPDX-License-Identifier: GPL-2.0-or-later -+ -+config HID_IPTS -+ tristate "Intel Precise Touch & Stylus" -+ depends on INTEL_MEI -+ depends on HID -+ help -+ Say Y here if your system has a touchscreen using Intels -+ Precise Touch & Stylus (IPTS) technology. -+ -+ If unsure say N. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called ipts. -diff --git a/drivers/hid/ipts/Makefile b/drivers/hid/ipts/Makefile -new file mode 100644 -index 0000000000000..883896f68e6ad ---- /dev/null -+++ b/drivers/hid/ipts/Makefile -@@ -0,0 +1,16 @@ -+# SPDX-License-Identifier: GPL-2.0-or-later -+# -+# Makefile for the IPTS touchscreen driver -+# -+ -+obj-$(CONFIG_HID_IPTS) += ipts.o -+ipts-objs := cmd.o -+ipts-objs += control.o -+ipts-objs += eds1.o -+ipts-objs += eds2.o -+ipts-objs += hid.o -+ipts-objs += main.o -+ipts-objs += mei.o -+ipts-objs += receiver.o -+ipts-objs += resources.o -+ipts-objs += thread.o -diff --git a/drivers/hid/ipts/cmd.c b/drivers/hid/ipts/cmd.c -new file mode 100644 -index 0000000000000..63a4934bbc5fa ---- /dev/null -+++ b/drivers/hid/ipts/cmd.c -@@ -0,0 +1,61 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+ -+#include "cmd.h" -+#include "context.h" -+#include "mei.h" -+#include "spec-device.h" -+ -+int ipts_cmd_recv_timeout(struct ipts_context *ipts, enum ipts_command_code code, -+ struct ipts_response *rsp, u64 timeout) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!rsp) -+ return -EFAULT; -+ -+ /* -+ * In a response, the command code will have the most significant bit flipped to 1. -+ * If code is passed to ipts_mei_recv as is, no messages will be received. -+ */ -+ ret = ipts_mei_recv(&ipts->mei, code | IPTS_RSP_BIT, rsp, timeout); -+ if (ret < 0) -+ return ret; -+ -+ dev_dbg(ipts->dev, "Received 0x%02X with status 0x%02X\n", code, rsp->status); -+ -+ /* -+ * Some devices will always return this error. -+ * It is allowed to ignore it and to try continuing. -+ */ -+ if (rsp->status == IPTS_STATUS_COMPAT_CHECK_FAIL) -+ rsp->status = IPTS_STATUS_SUCCESS; -+ -+ return 0; -+} -+ -+int ipts_cmd_send(struct ipts_context *ipts, enum ipts_command_code code, void *data, size_t size) -+{ -+ struct ipts_command cmd = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ cmd.cmd = code; -+ -+ if (data && size > 0) -+ memcpy(cmd.payload, data, size); -+ -+ dev_dbg(ipts->dev, "Sending 0x%02X with %ld bytes payload\n", code, size); -+ return ipts_mei_send(&ipts->mei, &cmd, sizeof(cmd.cmd) + size); -+} -diff --git a/drivers/hid/ipts/cmd.h b/drivers/hid/ipts/cmd.h -new file mode 100644 -index 0000000000000..2b4079075b642 ---- /dev/null -+++ b/drivers/hid/ipts/cmd.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_CMD_H -+#define IPTS_CMD_H -+ -+#include -+ -+#include "context.h" -+#include "spec-device.h" -+ -+/* -+ * The default timeout for receiving responses -+ */ -+#define IPTS_CMD_DEFAULT_TIMEOUT 1000 -+ -+/** -+ * ipts_cmd_recv_timeout() - Receives a response to a command. -+ * @ipts: The IPTS driver context. -+ * @code: The type of the command / response. -+ * @rsp: The address that the received response will be copied to. -+ * @timeout: How many milliseconds the function will wait at most. -+ * -+ * A negative timeout means to wait forever. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received. -+ */ -+int ipts_cmd_recv_timeout(struct ipts_context *ipts, enum ipts_command_code code, -+ struct ipts_response *rsp, u64 timeout); -+ -+/** -+ * ipts_cmd_recv() - Receives a response to a command. -+ * @ipts: The IPTS driver context. -+ * @code: The type of the command / response. -+ * @rsp: The address that the received response will be copied to. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received. -+ */ -+static inline int ipts_cmd_recv(struct ipts_context *ipts, enum ipts_command_code code, -+ struct ipts_response *rsp) -+{ -+ return ipts_cmd_recv_timeout(ipts, code, rsp, IPTS_CMD_DEFAULT_TIMEOUT); -+} -+ -+/** -+ * ipts_cmd_send() - Executes a command on the device. -+ * @ipts: The IPTS driver context. -+ * @code: The type of the command to execute. -+ * @data: The payload containing parameters for the command. -+ * @size: The size of the payload. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_cmd_send(struct ipts_context *ipts, enum ipts_command_code code, void *data, size_t size); -+ -+#endif /* IPTS_CMD_H */ -diff --git a/drivers/hid/ipts/context.h b/drivers/hid/ipts/context.h -new file mode 100644 -index 0000000000000..ba33259f1f7c5 ---- /dev/null -+++ b/drivers/hid/ipts/context.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_CONTEXT_H -+#define IPTS_CONTEXT_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mei.h" -+#include "resources.h" -+#include "spec-device.h" -+#include "thread.h" -+ -+struct ipts_context { -+ struct device *dev; -+ struct ipts_mei mei; -+ -+ enum ipts_mode mode; -+ -+ /* -+ * Prevents concurrent GET_FEATURE reports. -+ */ -+ struct mutex feature_lock; -+ struct completion feature_event; -+ -+ /* -+ * These are not inside of struct ipts_resources -+ * because they don't own the memory they point to. -+ */ -+ struct ipts_buffer feature_report; -+ struct ipts_buffer descriptor; -+ -+ bool hid_active; -+ struct hid_device *hid; -+ -+ struct ipts_device_info info; -+ struct ipts_resources resources; -+ -+ struct ipts_thread receiver_loop; -+}; -+ -+#endif /* IPTS_CONTEXT_H */ -diff --git a/drivers/hid/ipts/control.c b/drivers/hid/ipts/control.c -new file mode 100644 -index 0000000000000..5360842d260ba ---- /dev/null -+++ b/drivers/hid/ipts/control.c -@@ -0,0 +1,486 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "cmd.h" -+#include "context.h" -+#include "control.h" -+#include "desc.h" -+#include "hid.h" -+#include "receiver.h" -+#include "resources.h" -+#include "spec-data.h" -+#include "spec-device.h" -+ -+static int ipts_control_get_device_info(struct ipts_context *ipts, struct ipts_device_info *info) -+{ -+ int ret = 0; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!info) -+ return -EFAULT; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_GET_DEVICE_INFO, NULL, 0); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DEVICE_INFO: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_GET_DEVICE_INFO, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DEVICE_INFO: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "GET_DEVICE_INFO: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ memcpy(info, rsp.payload, sizeof(*info)); -+ return 0; -+} -+ -+static int ipts_control_set_mode(struct ipts_context *ipts, enum ipts_mode mode) -+{ -+ int ret = 0; -+ struct ipts_set_mode cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ cmd.mode = mode; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_SET_MODE, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MODE: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_SET_MODE, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MODE: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "SET_MODE: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+static int ipts_control_set_mem_window(struct ipts_context *ipts, struct ipts_resources *res) -+{ -+ int i = 0; -+ int ret = 0; -+ struct ipts_mem_window cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!res) -+ return -EFAULT; -+ -+ for (i = 0; i < IPTS_BUFFERS; i++) { -+ cmd.data_addr_lower[i] = lower_32_bits(res->data[i].dma_address); -+ cmd.data_addr_upper[i] = upper_32_bits(res->data[i].dma_address); -+ cmd.feedback_addr_lower[i] = lower_32_bits(res->feedback[i].dma_address); -+ cmd.feedback_addr_upper[i] = upper_32_bits(res->feedback[i].dma_address); -+ } -+ -+ cmd.workqueue_addr_lower = lower_32_bits(res->workqueue.dma_address); -+ cmd.workqueue_addr_upper = upper_32_bits(res->workqueue.dma_address); -+ -+ cmd.doorbell_addr_lower = lower_32_bits(res->doorbell.dma_address); -+ cmd.doorbell_addr_upper = upper_32_bits(res->doorbell.dma_address); -+ -+ cmd.hid2me_addr_lower = lower_32_bits(res->hid2me.dma_address); -+ cmd.hid2me_addr_upper = upper_32_bits(res->hid2me.dma_address); -+ -+ cmd.workqueue_size = IPTS_WORKQUEUE_SIZE; -+ cmd.workqueue_item_size = IPTS_WORKQUEUE_ITEM_SIZE; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_SET_MEM_WINDOW, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MEM_WINDOW: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_SET_MEM_WINDOW, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "SET_MEM_WINDOW: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "SET_MEM_WINDOW: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+static int ipts_control_get_descriptor(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_data_header *header = NULL; -+ struct ipts_get_descriptor cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->resources.descriptor.address) -+ return -EFAULT; -+ -+ memset(ipts->resources.descriptor.address, 0, ipts->resources.descriptor.size); -+ -+ cmd.addr_lower = lower_32_bits(ipts->resources.descriptor.dma_address); -+ cmd.addr_upper = upper_32_bits(ipts->resources.descriptor.dma_address); -+ cmd.magic = 8; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_GET_DESCRIPTOR, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DESCRIPTOR: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_GET_DESCRIPTOR, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "GET_DESCRIPTOR: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "GET_DESCRIPTOR: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ header = (struct ipts_data_header *)ipts->resources.descriptor.address; -+ -+ if (header->type == IPTS_DATA_TYPE_DESCRIPTOR) { -+ ipts->descriptor.address = &header->data[8]; -+ ipts->descriptor.size = header->size - 8; -+ -+ return 0; -+ } -+ -+ return -ENODATA; -+} -+ -+int ipts_control_request_flush(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_quiesce_io cmd = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_QUIESCE_IO, &cmd, sizeof(cmd)); -+ if (ret) -+ dev_err(ipts->dev, "QUIESCE_IO: send failed: %d\n", ret); -+ -+ return ret; -+} -+ -+int ipts_control_wait_flush(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_QUIESCE_IO, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "QUIESCE_IO: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ if (rsp.status == IPTS_STATUS_TIMEOUT) -+ return -EAGAIN; -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "QUIESCE_IO: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_request_data(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_READY_FOR_DATA, NULL, 0); -+ if (ret) -+ dev_err(ipts->dev, "READY_FOR_DATA: send failed: %d\n", ret); -+ -+ return ret; -+} -+ -+int ipts_control_wait_data(struct ipts_context *ipts, bool shutdown) -+{ -+ int ret = 0; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!shutdown) -+ ret = ipts_cmd_recv_timeout(ipts, IPTS_CMD_READY_FOR_DATA, &rsp, 0); -+ else -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_READY_FOR_DATA, &rsp); -+ -+ if (ret) { -+ if (ret != -EAGAIN) -+ dev_err(ipts->dev, "READY_FOR_DATA: recv failed: %d\n", ret); -+ -+ return ret; -+ } -+ -+ /* -+ * During shutdown, it is possible that the sensor has already been disabled. -+ */ -+ if (rsp.status == IPTS_STATUS_SENSOR_DISABLED) -+ return 0; -+ -+ if (rsp.status == IPTS_STATUS_TIMEOUT) -+ return -EAGAIN; -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "READY_FOR_DATA: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_send_feedback(struct ipts_context *ipts, u32 buffer) -+{ -+ int ret = 0; -+ struct ipts_feedback cmd = { 0 }; -+ struct ipts_response rsp = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ cmd.buffer = buffer; -+ -+ ret = ipts_cmd_send(ipts, IPTS_CMD_FEEDBACK, &cmd, sizeof(cmd)); -+ if (ret) { -+ dev_err(ipts->dev, "FEEDBACK: send failed: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_cmd_recv(ipts, IPTS_CMD_FEEDBACK, &rsp); -+ if (ret) { -+ dev_err(ipts->dev, "FEEDBACK: recv failed: %d\n", ret); -+ return ret; -+ } -+ -+ /* -+ * We don't know what feedback data looks like so we are sending zeros. -+ * See also ipts_control_refill_buffer. -+ */ -+ if (rsp.status == IPTS_STATUS_INVALID_PARAMS) -+ return 0; -+ -+ if (rsp.status != IPTS_STATUS_SUCCESS) { -+ dev_err(ipts->dev, "FEEDBACK: cmd failed: %d\n", rsp.status); -+ return -EBADR; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_hid2me_feedback(struct ipts_context *ipts, enum ipts_feedback_cmd_type cmd, -+ enum ipts_feedback_data_type type, void *data, size_t size) -+{ -+ struct ipts_feedback_header *header = NULL; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->resources.hid2me.address) -+ return -EFAULT; -+ -+ memset(ipts->resources.hid2me.address, 0, ipts->resources.hid2me.size); -+ header = (struct ipts_feedback_header *)ipts->resources.hid2me.address; -+ -+ header->cmd_type = cmd; -+ header->data_type = type; -+ header->size = size; -+ header->buffer = IPTS_HID2ME_BUFFER; -+ -+ if (size + sizeof(*header) > ipts->resources.hid2me.size) -+ return -EINVAL; -+ -+ if (data && size > 0) -+ memcpy(header->payload, data, size); -+ -+ return ipts_control_send_feedback(ipts, IPTS_HID2ME_BUFFER); -+} -+ -+int ipts_control_start(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ struct ipts_device_info info = { 0 }; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "Starting IPTS\n"); -+ -+ ret = ipts_control_get_device_info(ipts, &info); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to get device info: %d\n", ret); -+ return ret; -+ } -+ -+ ipts->info = info; -+ -+ ret = ipts_resources_init(&ipts->resources, ipts->dev, info.data_size, info.feedback_size); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to allocate buffers: %d", ret); -+ return ret; -+ } -+ -+ dev_info(ipts->dev, "IPTS EDS Version: %d\n", info.intf_eds); -+ -+ /* -+ * Handle newer devices -+ */ -+ if (info.intf_eds > 1) { -+ /* -+ * Fetching the descriptor will only work on newer devices. -+ * For older devices, a fallback descriptor will be used. -+ */ -+ ret = ipts_control_get_descriptor(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to fetch HID descriptor: %d\n", ret); -+ return ret; -+ } -+ -+ /* -+ * Newer devices can be directly initialized in polling mode. -+ */ -+ ipts->mode = IPTS_MODE_POLL; -+ } -+ -+ ret = ipts_control_set_mode(ipts, ipts->mode); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to set mode: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_set_mem_window(ipts, &ipts->resources); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to set memory window: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_receiver_start(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to start receiver: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_request_data(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to request data: %d\n", ret); -+ return ret; -+ } -+ -+ ipts_hid_enable(ipts); -+ -+ ret = ipts_hid_init(ipts, info); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to initialize HID device: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int _ipts_control_stop(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ipts_hid_disable(ipts); -+ dev_info(ipts->dev, "Stopping IPTS\n"); -+ -+ ret = ipts_receiver_stop(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to stop receiver: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_resources_free(&ipts->resources); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to free resources: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_stop(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ ret = _ipts_control_stop(ipts); -+ if (ret) -+ return ret; -+ -+ ret = ipts_hid_free(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to free HID device: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_control_restart(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ ret = _ipts_control_stop(ipts); -+ if (ret) -+ return ret; -+ -+ /* -+ * Wait a second to give the sensor time to fully shut down. -+ */ -+ msleep(1000); -+ -+ ret = ipts_control_start(ipts); -+ if (ret) -+ return ret; -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/control.h b/drivers/hid/ipts/control.h -new file mode 100644 -index 0000000000000..26629c5144edb ---- /dev/null -+++ b/drivers/hid/ipts/control.h -@@ -0,0 +1,126 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_CONTROL_H -+#define IPTS_CONTROL_H -+ -+#include -+ -+#include "context.h" -+#include "spec-data.h" -+#include "spec-device.h" -+ -+/** -+ * ipts_control_request_flush() - Stop the data flow. -+ * @ipts: The IPTS driver context. -+ * -+ * Runs the command to stop the data flow on the device. -+ * All outstanding data needs to be acknowledged using feedback before the command will return. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_request_flush(struct ipts_context *ipts); -+ -+/** -+ * ipts_control_wait_flush() - Wait until data flow has been stopped. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_wait_flush(struct ipts_context *ipts); -+ -+/** -+ * ipts_control_wait_flush() - Notify the device that the driver can receive new data. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_request_data(struct ipts_context *ipts); -+ -+/** -+ * ipts_control_wait_data() - Wait until new data is available. -+ * @ipts: The IPTS driver context. -+ * @block: Whether to block execution until data is available. -+ * -+ * In poll mode, this function will never return while the data flow is active. Instead, -+ * the poll will be incremented when new data is available. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no data is available. -+ */ -+int ipts_control_wait_data(struct ipts_context *ipts, bool block); -+ -+/** -+ * ipts_control_send_feedback() - Submits a feedback buffer to the device. -+ * @ipts: The IPTS driver context. -+ * @buffer: The ID of the buffer containing feedback data. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_send_feedback(struct ipts_context *ipts, u32 buffer); -+ -+/** -+ * ipts_control_hid2me_feedback() - Sends HID2ME feedback, a special type of feedback. -+ * @ipts: The IPTS driver context. -+ * @cmd: The command that will be run on the device. -+ * @type: The type of the payload that is sent to the device. -+ * @data: The payload of the feedback command. -+ * @size: The size of the payload. -+ * -+ * HID2ME feedback is a special type of feedback, because it allows interfacing with -+ * the HID API of the device at any moment, without requiring a buffer that has to -+ * be acknowledged. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_hid2me_feedback(struct ipts_context *ipts, enum ipts_feedback_cmd_type cmd, -+ enum ipts_feedback_data_type type, void *data, size_t size); -+ -+/** -+ * ipts_control_refill_buffer() - Acknowledges that data in a buffer has been processed. -+ * @ipts: The IPTS driver context. -+ * @buffer: The buffer that has been processed and can be refilled. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+static inline int ipts_control_refill_buffer(struct ipts_context *ipts, u32 buffer) -+{ -+ /* -+ * IPTS expects structured data in the feedback buffer matching the buffer that will be -+ * refilled. We don't know what that data looks like, so we just keep the buffer empty. -+ * This results in an INVALID_PARAMS error, but the buffer gets refilled without an issue. -+ * Sending a minimal structure with the buffer ID fixes the error, but breaks refilling -+ * the buffers on some devices. -+ */ -+ -+ return ipts_control_send_feedback(ipts, buffer); -+} -+ -+/** -+ * ipts_control_start() - Initialized the device and starts the data flow. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_start(struct ipts_context *ipts); -+ -+/** -+ * ipts_control_stop() - Stops the data flow and resets the device. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_stop(struct ipts_context *ipts); -+ -+/** -+ * ipts_control_restart() - Stops the device and starts it again. -+ * @ipts: The IPTS driver context. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_control_restart(struct ipts_context *ipts); -+ -+#endif /* IPTS_CONTROL_H */ -diff --git a/drivers/hid/ipts/desc.h b/drivers/hid/ipts/desc.h -new file mode 100644 -index 0000000000000..307438c7c80cd ---- /dev/null -+++ b/drivers/hid/ipts/desc.h -@@ -0,0 +1,80 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2022-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_DESC_H -+#define IPTS_DESC_H -+ -+#include -+ -+#define IPTS_HID_REPORT_SINGLETOUCH 64 -+#define IPTS_HID_REPORT_DATA 65 -+#define IPTS_HID_REPORT_SET_MODE 66 -+ -+#define IPTS_HID_REPORT_DATA_SIZE 7485 -+ -+/* -+ * HID descriptor for singletouch data. -+ * This descriptor should be present on all IPTS devices. -+ */ -+static const u8 ipts_singletouch_descriptor[] = { -+ 0x05, 0x0D, /* Usage Page (Digitizer), */ -+ 0x09, 0x04, /* Usage (Touchscreen), */ -+ 0xA1, 0x01, /* Collection (Application), */ -+ 0x85, 0x40, /* Report ID (64), */ -+ 0x09, 0x42, /* Usage (Tip Switch), */ -+ 0x15, 0x00, /* Logical Minimum (0), */ -+ 0x25, 0x01, /* Logical Maximum (1), */ -+ 0x75, 0x01, /* Report Size (1), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0x95, 0x07, /* Report Count (7), */ -+ 0x81, 0x03, /* Input (Constant, Variable), */ -+ 0x05, 0x01, /* Usage Page (Desktop), */ -+ 0x09, 0x30, /* Usage (X), */ -+ 0x75, 0x10, /* Report Size (16), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0xA4, /* Push, */ -+ 0x55, 0x0E, /* Unit Exponent (14), */ -+ 0x65, 0x11, /* Unit (Centimeter), */ -+ 0x46, 0x76, 0x0B, /* Physical Maximum (2934), */ -+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0x09, 0x31, /* Usage (Y), */ -+ 0x46, 0x74, 0x06, /* Physical Maximum (1652), */ -+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0xB4, /* Pop, */ -+ 0xC0, /* End Collection */ -+}; -+ -+/* -+ * Fallback HID descriptor for older devices that do not have -+ * the ability to query their HID descriptor. -+ */ -+static const u8 ipts_fallback_descriptor[] = { -+ 0x05, 0x0D, /* Usage Page (Digitizer), */ -+ 0x09, 0x0F, /* Usage (Capacitive Hm Digitizer), */ -+ 0xA1, 0x01, /* Collection (Application), */ -+ 0x85, 0x41, /* Report ID (65), */ -+ 0x09, 0x56, /* Usage (Scan Time), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0x75, 0x10, /* Report Size (16), */ -+ 0x81, 0x02, /* Input (Variable), */ -+ 0x09, 0x61, /* Usage (Gesture Char Quality), */ -+ 0x75, 0x08, /* Report Size (8), */ -+ 0x96, 0x3D, 0x1D, /* Report Count (7485), */ -+ 0x81, 0x03, /* Input (Constant, Variable), */ -+ 0x85, 0x42, /* Report ID (66), */ -+ 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ -+ 0x09, 0xC8, /* Usage (C8h), */ -+ 0x75, 0x08, /* Report Size (8), */ -+ 0x95, 0x01, /* Report Count (1), */ -+ 0xB1, 0x02, /* Feature (Variable), */ -+ 0xC0, /* End Collection, */ -+}; -+ -+#endif /* IPTS_DESC_H */ -diff --git a/drivers/hid/ipts/eds1.c b/drivers/hid/ipts/eds1.c -new file mode 100644 -index 0000000000000..ecbb3a8bdaf60 ---- /dev/null -+++ b/drivers/hid/ipts/eds1.c -@@ -0,0 +1,103 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "control.h" -+#include "desc.h" -+#include "spec-device.h" -+ -+int ipts_eds1_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size) -+{ -+ size_t size = 0; -+ u8 *buffer = NULL; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!desc_buffer) -+ return -EFAULT; -+ -+ if (!desc_size) -+ return -EFAULT; -+ -+ size = sizeof(ipts_singletouch_descriptor) + sizeof(ipts_fallback_descriptor); -+ -+ buffer = kzalloc(size, GFP_KERNEL); -+ if (!buffer) -+ return -ENOMEM; -+ -+ memcpy(buffer, ipts_singletouch_descriptor, sizeof(ipts_singletouch_descriptor)); -+ memcpy(&buffer[sizeof(ipts_singletouch_descriptor)], ipts_fallback_descriptor, -+ sizeof(ipts_fallback_descriptor)); -+ -+ *desc_size = size; -+ *desc_buffer = buffer; -+ -+ return 0; -+} -+ -+static int ipts_eds1_switch_mode(struct ipts_context *ipts, enum ipts_mode mode) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (ipts->mode == mode) -+ return 0; -+ -+ ipts->mode = mode; -+ -+ ret = ipts_control_restart(ipts); -+ if (ret) -+ dev_err(ipts->dev, "Failed to switch modes: %d\n", ret); -+ -+ return ret; -+} -+ -+int ipts_eds1_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id, -+ enum hid_report_type report_type, enum hid_class_request request_type) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buffer) -+ return -EFAULT; -+ -+ if (report_id != IPTS_HID_REPORT_SET_MODE) -+ return -EIO; -+ -+ if (report_type != HID_FEATURE_REPORT) -+ return -EIO; -+ -+ if (size != 2) -+ return -EINVAL; -+ -+ /* -+ * Implement mode switching report for older devices without native HID support. -+ */ -+ -+ if (request_type == HID_REQ_GET_REPORT) { -+ memset(buffer, 0, size); -+ buffer[0] = report_id; -+ buffer[1] = ipts->mode; -+ } else if (request_type == HID_REQ_SET_REPORT) { -+ return ipts_eds1_switch_mode(ipts, buffer[1]); -+ } else { -+ return -EIO; -+ } -+ -+ return ret; -+} -diff --git a/drivers/hid/ipts/eds1.h b/drivers/hid/ipts/eds1.h -new file mode 100644 -index 0000000000000..eeeb6575e3e89 ---- /dev/null -+++ b/drivers/hid/ipts/eds1.h -@@ -0,0 +1,35 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+ -+#include "context.h" -+ -+/** -+ * ipts_eds1_get_descriptor() - Assembles the HID descriptor of the device. -+ * @ipts: The IPTS driver context. -+ * @desc_buffer: A pointer to the location where the address of the allocated buffer is stored. -+ * @desc_size: A pointer to the location where the size of the allocated buffer is stored. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_eds1_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size); -+ -+/** -+ * ipts_eds1_raw_request() - Executes an output or feature report on the device. -+ * @ipts: The IPTS driver context. -+ * @buffer: The buffer containing the report. -+ * @size: The size of the buffer. -+ * @report_id: The HID report ID. -+ * @report_type: Whether this report is an output or a feature report. -+ * @request_type: Whether this report requests or sends data. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_eds1_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id, -+ enum hid_report_type report_type, enum hid_class_request request_type); -diff --git a/drivers/hid/ipts/eds2.c b/drivers/hid/ipts/eds2.c -new file mode 100644 -index 0000000000000..198dc65d78876 ---- /dev/null -+++ b/drivers/hid/ipts/eds2.c -@@ -0,0 +1,144 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "control.h" -+#include "desc.h" -+#include "spec-data.h" -+ -+int ipts_eds2_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size) -+{ -+ size_t size = 0; -+ u8 *buffer = NULL; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!desc_buffer) -+ return -EFAULT; -+ -+ if (!desc_size) -+ return -EFAULT; -+ -+ size = sizeof(ipts_singletouch_descriptor) + ipts->descriptor.size; -+ -+ buffer = kzalloc(size, GFP_KERNEL); -+ if (!buffer) -+ return -ENOMEM; -+ -+ memcpy(buffer, ipts_singletouch_descriptor, sizeof(ipts_singletouch_descriptor)); -+ memcpy(&buffer[sizeof(ipts_singletouch_descriptor)], ipts->descriptor.address, -+ ipts->descriptor.size); -+ -+ *desc_size = size; -+ *desc_buffer = buffer; -+ -+ return 0; -+} -+ -+static int ipts_eds2_get_feature(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id, -+ enum ipts_feedback_data_type type) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buffer) -+ return -EFAULT; -+ -+ mutex_lock(&ipts->feature_lock); -+ -+ memset(buffer, 0, size); -+ buffer[0] = report_id; -+ -+ memset(&ipts->feature_report, 0, sizeof(ipts->feature_report)); -+ reinit_completion(&ipts->feature_event); -+ -+ ret = ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, type, buffer, size); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to send hid2me feedback: %d\n", ret); -+ goto out; -+ } -+ -+ ret = wait_for_completion_timeout(&ipts->feature_event, msecs_to_jiffies(5000)); -+ if (ret == 0) { -+ dev_warn(ipts->dev, "GET_FEATURES timed out!\n"); -+ ret = -EIO; -+ goto out; -+ } -+ -+ if (!ipts->feature_report.address) { -+ ret = -EFAULT; -+ goto out; -+ } -+ -+ if (ipts->feature_report.size > size) { -+ ret = -ETOOSMALL; -+ goto out; -+ } -+ -+ ret = ipts->feature_report.size; -+ memcpy(buffer, ipts->feature_report.address, ipts->feature_report.size); -+ -+out: -+ mutex_unlock(&ipts->feature_lock); -+ return ret; -+} -+ -+static int ipts_eds2_set_feature(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id, -+ enum ipts_feedback_data_type type) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buffer) -+ return -EFAULT; -+ -+ buffer[0] = report_id; -+ -+ ret = ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, type, buffer, size); -+ if (ret) -+ dev_err(ipts->dev, "Failed to send hid2me feedback: %d\n", ret); -+ -+ return ret; -+} -+ -+int ipts_eds2_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id, -+ enum hid_report_type report_type, enum hid_class_request request_type) -+{ -+ enum ipts_feedback_data_type feedback_type = IPTS_FEEDBACK_DATA_TYPE_VENDOR; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!buffer) -+ return -EFAULT; -+ -+ if (report_type == HID_OUTPUT_REPORT && request_type == HID_REQ_SET_REPORT) -+ feedback_type = IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT; -+ else if (report_type == HID_FEATURE_REPORT && request_type == HID_REQ_GET_REPORT) -+ feedback_type = IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES; -+ else if (report_type == HID_FEATURE_REPORT && request_type == HID_REQ_SET_REPORT) -+ feedback_type = IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES; -+ else -+ return -EIO; -+ -+ if (request_type == HID_REQ_GET_REPORT) -+ return ipts_eds2_get_feature(ipts, buffer, size, report_id, feedback_type); -+ else -+ return ipts_eds2_set_feature(ipts, buffer, size, report_id, feedback_type); -+} -diff --git a/drivers/hid/ipts/eds2.h b/drivers/hid/ipts/eds2.h -new file mode 100644 -index 0000000000000..064e3716907ab ---- /dev/null -+++ b/drivers/hid/ipts/eds2.h -@@ -0,0 +1,35 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+ -+#include "context.h" -+ -+/** -+ * ipts_eds2_get_descriptor() - Assembles the HID descriptor of the device. -+ * @ipts: The IPTS driver context. -+ * @desc_buffer: A pointer to the location where the address of the allocated buffer is stored. -+ * @desc_size: A pointer to the location where the size of the allocated buffer is stored. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_eds2_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size); -+ -+/** -+ * ipts_eds2_raw_request() - Executes an output or feature report on the device. -+ * @ipts: The IPTS driver context. -+ * @buffer: The buffer containing the report. -+ * @size: The size of the buffer. -+ * @report_id: The HID report ID. -+ * @report_type: Whether this report is an output or a feature report. -+ * @request_type: Whether this report requests or sends data. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_eds2_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id, -+ enum hid_report_type report_type, enum hid_class_request request_type); -diff --git a/drivers/hid/ipts/hid.c b/drivers/hid/ipts/hid.c -new file mode 100644 -index 0000000000000..e34a1a4f9fa77 ---- /dev/null -+++ b/drivers/hid/ipts/hid.c -@@ -0,0 +1,225 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2022-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "desc.h" -+#include "eds1.h" -+#include "eds2.h" -+#include "hid.h" -+#include "spec-data.h" -+#include "spec-hid.h" -+ -+void ipts_hid_enable(struct ipts_context *ipts) -+{ -+ WRITE_ONCE(ipts->hid_active, true); -+} -+ -+void ipts_hid_disable(struct ipts_context *ipts) -+{ -+ WRITE_ONCE(ipts->hid_active, false); -+} -+ -+static int ipts_hid_start(struct hid_device *hid) -+{ -+ return 0; -+} -+ -+static void ipts_hid_stop(struct hid_device *hid) -+{ -+} -+ -+static int ipts_hid_parse(struct hid_device *hid) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ u8 *buffer = NULL; -+ size_t size = 0; -+ -+ if (!hid) -+ return -ENODEV; -+ -+ ipts = hid->driver_data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!READ_ONCE(ipts->hid_active)) -+ return -ENODEV; -+ -+ if (ipts->info.intf_eds == 1) -+ ret = ipts_eds1_get_descriptor(ipts, &buffer, &size); -+ else -+ ret = ipts_eds2_get_descriptor(ipts, &buffer, &size); -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to allocate HID descriptor: %d\n", ret); -+ return ret; -+ } -+ -+ ret = hid_parse_report(hid, buffer, size); -+ kfree(buffer); -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to parse HID descriptor: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int ipts_hid_raw_request(struct hid_device *hid, unsigned char report_id, __u8 *buffer, -+ size_t size, unsigned char report_type, int request_type) -+{ -+ struct ipts_context *ipts = NULL; -+ -+ if (!hid) -+ return -ENODEV; -+ -+ ipts = hid->driver_data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!READ_ONCE(ipts->hid_active)) -+ return -ENODEV; -+ -+ if (ipts->info.intf_eds == 1) { -+ return ipts_eds1_raw_request(ipts, buffer, size, report_id, report_type, -+ request_type); -+ } else { -+ return ipts_eds2_raw_request(ipts, buffer, size, report_id, report_type, -+ request_type); -+ } -+} -+ -+static struct hid_ll_driver ipts_hid_driver = { -+ .start = ipts_hid_start, -+ .stop = ipts_hid_stop, -+ .open = ipts_hid_start, -+ .close = ipts_hid_stop, -+ .parse = ipts_hid_parse, -+ .raw_request = ipts_hid_raw_request, -+}; -+ -+int ipts_hid_input_data(struct ipts_context *ipts, u32 buffer) -+{ -+ u8 *temp = NULL; -+ struct ipts_hid_header *frame = NULL; -+ struct ipts_data_header *header = NULL; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->hid) -+ return -ENODEV; -+ -+ if (!READ_ONCE(ipts->hid_active)) -+ return -ENODEV; -+ -+ header = (struct ipts_data_header *)ipts->resources.data[buffer].address; -+ -+ temp = ipts->resources.report.address; -+ memset(temp, 0, ipts->resources.report.size); -+ -+ if (!header) -+ return -EFAULT; -+ -+ if (header->size == 0) -+ return 0; -+ -+ if (header->type == IPTS_DATA_TYPE_HID) -+ return hid_input_report(ipts->hid, HID_INPUT_REPORT, header->data, header->size, 1); -+ -+ if (header->type == IPTS_DATA_TYPE_GET_FEATURES) { -+ ipts->feature_report.address = header->data; -+ ipts->feature_report.size = header->size; -+ -+ complete_all(&ipts->feature_event); -+ return 0; -+ } -+ -+ if (header->type != IPTS_DATA_TYPE_FRAME) -+ return 0; -+ -+ if (header->size + 3 + sizeof(struct ipts_hid_header) > IPTS_HID_REPORT_DATA_SIZE) -+ return -ERANGE; -+ -+ /* -+ * Synthesize a HID report matching the devices that natively send HID reports -+ */ -+ temp[0] = IPTS_HID_REPORT_DATA; -+ -+ frame = (struct ipts_hid_header *)&temp[3]; -+ frame->type = IPTS_HID_FRAME_TYPE_RAW; -+ frame->size = header->size + sizeof(*frame); -+ -+ memcpy(frame->data, header->data, header->size); -+ -+ return hid_input_report(ipts->hid, HID_INPUT_REPORT, temp, IPTS_HID_REPORT_DATA_SIZE, 1); -+} -+ -+int ipts_hid_init(struct ipts_context *ipts, struct ipts_device_info info) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (ipts->hid) -+ return 0; -+ -+ ipts->hid = hid_allocate_device(); -+ if (IS_ERR(ipts->hid)) { -+ int err = PTR_ERR(ipts->hid); -+ -+ dev_err(ipts->dev, "Failed to allocate HID device: %d\n", err); -+ return err; -+ } -+ -+ ipts->hid->driver_data = ipts; -+ ipts->hid->dev.parent = ipts->dev; -+ ipts->hid->ll_driver = &ipts_hid_driver; -+ -+ ipts->hid->vendor = info.vendor; -+ ipts->hid->product = info.product; -+ ipts->hid->group = HID_GROUP_GENERIC; -+ -+ snprintf(ipts->hid->name, sizeof(ipts->hid->name), "IPTS %04X:%04X", info.vendor, -+ info.product); -+ -+ ret = hid_add_device(ipts->hid); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to add HID device: %d\n", ret); -+ ipts_hid_free(ipts); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_hid_free(struct ipts_context *ipts) -+{ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (!ipts->hid) -+ return 0; -+ -+ hid_destroy_device(ipts->hid); -+ ipts->hid = NULL; -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/hid.h b/drivers/hid/ipts/hid.h -new file mode 100644 -index 0000000000000..1ebe77447903a ---- /dev/null -+++ b/drivers/hid/ipts/hid.h -@@ -0,0 +1,24 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2022-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_HID_H -+#define IPTS_HID_H -+ -+#include -+ -+#include "context.h" -+#include "spec-device.h" -+ -+void ipts_hid_enable(struct ipts_context *ipts); -+void ipts_hid_disable(struct ipts_context *ipts); -+ -+int ipts_hid_input_data(struct ipts_context *ipts, u32 buffer); -+ -+int ipts_hid_init(struct ipts_context *ipts, struct ipts_device_info info); -+int ipts_hid_free(struct ipts_context *ipts); -+ -+#endif /* IPTS_HID_H */ -diff --git a/drivers/hid/ipts/main.c b/drivers/hid/ipts/main.c -new file mode 100644 -index 0000000000000..fb5b5c13ee3ea ---- /dev/null -+++ b/drivers/hid/ipts/main.c -@@ -0,0 +1,126 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "control.h" -+#include "mei.h" -+#include "receiver.h" -+#include "spec-device.h" -+ -+/* -+ * The MEI client ID for IPTS functionality. -+ */ -+#define IPTS_ID UUID_LE(0x3e8d0870, 0x271a, 0x4208, 0x8e, 0xb5, 0x9a, 0xcb, 0x94, 0x02, 0xae, 0x04) -+ -+static int ipts_set_dma_mask(struct mei_cl_device *cldev) -+{ -+ if (!cldev) -+ return -EFAULT; -+ -+ if (!dma_coerce_mask_and_coherent(&cldev->dev, DMA_BIT_MASK(64))) -+ return 0; -+ -+ return dma_coerce_mask_and_coherent(&cldev->dev, DMA_BIT_MASK(32)); -+} -+ -+static int ipts_probe(struct mei_cl_device *cldev, const struct mei_cl_device_id *id) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ if (!cldev) -+ return -EFAULT; -+ -+ ret = ipts_set_dma_mask(cldev); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to set DMA mask for IPTS: %d\n", ret); -+ return ret; -+ } -+ -+ ret = mei_cldev_enable(cldev); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to enable MEI device: %d\n", ret); -+ return ret; -+ } -+ -+ ipts = devm_kzalloc(&cldev->dev, sizeof(*ipts), GFP_KERNEL); -+ if (!ipts) { -+ mei_cldev_disable(cldev); -+ return -ENOMEM; -+ } -+ -+ ret = ipts_mei_init(&ipts->mei, cldev); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to init MEI bus logic: %d\n", ret); -+ return ret; -+ } -+ -+ ipts->dev = &cldev->dev; -+ ipts->mode = IPTS_MODE_EVENT; -+ -+ mutex_init(&ipts->feature_lock); -+ init_completion(&ipts->feature_event); -+ -+ mei_cldev_set_drvdata(cldev, ipts); -+ -+ ret = ipts_control_start(ipts); -+ if (ret) { -+ dev_err(&cldev->dev, "Failed to start IPTS: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void ipts_remove(struct mei_cl_device *cldev) -+{ -+ int ret = 0; -+ struct ipts_context *ipts = NULL; -+ -+ if (!cldev) { -+ pr_err("MEI device is NULL!"); -+ return; -+ } -+ -+ ipts = mei_cldev_get_drvdata(cldev); -+ -+ ret = ipts_control_stop(ipts); -+ if (ret) -+ dev_err(&cldev->dev, "Failed to stop IPTS: %d\n", ret); -+ -+ mei_cldev_disable(cldev); -+} -+ -+static struct mei_cl_device_id ipts_device_id_table[] = { -+ { .uuid = IPTS_ID, .version = MEI_CL_VERSION_ANY }, -+ {}, -+}; -+MODULE_DEVICE_TABLE(mei, ipts_device_id_table); -+ -+static struct mei_cl_driver ipts_driver = { -+ .id_table = ipts_device_id_table, -+ .name = "ipts", -+ .probe = ipts_probe, -+ .remove = ipts_remove, -+}; -+module_mei_cl_driver(ipts_driver); -+ -+MODULE_DESCRIPTION("IPTS touchscreen driver"); -+MODULE_AUTHOR("Dorian Stoll "); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/hid/ipts/mei.c b/drivers/hid/ipts/mei.c -new file mode 100644 -index 0000000000000..1e0395ceae4a4 ---- /dev/null -+++ b/drivers/hid/ipts/mei.c -@@ -0,0 +1,188 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "context.h" -+#include "mei.h" -+ -+static void locked_list_add(struct list_head *new, struct list_head *head, -+ struct rw_semaphore *lock) -+{ -+ down_write(lock); -+ list_add(new, head); -+ up_write(lock); -+} -+ -+static void locked_list_del(struct list_head *entry, struct rw_semaphore *lock) -+{ -+ down_write(lock); -+ list_del(entry); -+ up_write(lock); -+} -+ -+static void ipts_mei_incoming(struct mei_cl_device *cldev) -+{ -+ ssize_t ret = 0; -+ struct ipts_mei_message *entry = NULL; -+ struct ipts_context *ipts = NULL; -+ -+ if (!cldev) { -+ pr_err("MEI device is NULL!"); -+ return; -+ } -+ -+ ipts = mei_cldev_get_drvdata(cldev); -+ if (!ipts) { -+ pr_err("IPTS driver context is NULL!"); -+ return; -+ } -+ -+ entry = devm_kzalloc(ipts->dev, sizeof(*entry), GFP_KERNEL); -+ if (!entry) -+ return; -+ -+ INIT_LIST_HEAD(&entry->list); -+ -+ do { -+ ret = mei_cldev_recv(cldev, (u8 *)&entry->rsp, sizeof(entry->rsp)); -+ } while (ret == -EINTR); -+ -+ if (ret < 0) { -+ dev_err(ipts->dev, "Error while reading response: %ld\n", ret); -+ return; -+ } -+ -+ if (ret == 0) { -+ dev_err(ipts->dev, "Received empty response\n"); -+ return; -+ } -+ -+ locked_list_add(&entry->list, &ipts->mei.messages, &ipts->mei.message_lock); -+ wake_up_all(&ipts->mei.message_queue); -+} -+ -+static int ipts_mei_search(struct ipts_mei *mei, enum ipts_command_code code, -+ struct ipts_response *rsp) -+{ -+ struct ipts_mei_message *entry = NULL; -+ -+ if (!mei) -+ return -EFAULT; -+ -+ if (!rsp) -+ return -EFAULT; -+ -+ down_read(&mei->message_lock); -+ -+ /* -+ * Iterate over the list of received messages, and check if there is one -+ * matching the requested command code. -+ */ -+ list_for_each_entry(entry, &mei->messages, list) { -+ if (entry->rsp.cmd == code) -+ break; -+ } -+ -+ up_read(&mei->message_lock); -+ -+ /* -+ * If entry is not the list head, this means that the loop above has been stopped early, -+ * and that we found a matching element. We drop the message from the list and return it. -+ */ -+ if (!list_entry_is_head(entry, &mei->messages, list)) { -+ locked_list_del(&entry->list, &mei->message_lock); -+ -+ *rsp = entry->rsp; -+ devm_kfree(&mei->cldev->dev, entry); -+ -+ return 0; -+ } -+ -+ return -EAGAIN; -+} -+ -+int ipts_mei_recv(struct ipts_mei *mei, enum ipts_command_code code, struct ipts_response *rsp, -+ u64 timeout) -+{ -+ int ret = 0; -+ -+ if (!mei) -+ return -EFAULT; -+ -+ /* -+ * A timeout of 0 means check and return immideately. -+ */ -+ if (timeout == 0) -+ return ipts_mei_search(mei, code, rsp); -+ -+ /* -+ * A timeout of less than 0 means to wait forever. -+ */ -+ if (timeout < 0) { -+ wait_event(mei->message_queue, ipts_mei_search(mei, code, rsp) == 0); -+ return 0; -+ } -+ -+ ret = wait_event_timeout(mei->message_queue, ipts_mei_search(mei, code, rsp) == 0, -+ msecs_to_jiffies(timeout)); -+ -+ if (ret > 0) -+ return 0; -+ -+ return -EAGAIN; -+} -+ -+int ipts_mei_send(struct ipts_mei *mei, void *data, size_t length) -+{ -+ int ret = 0; -+ -+ if (!mei) -+ return -EFAULT; -+ -+ if (!mei->cldev) -+ return -EFAULT; -+ -+ if (!data) -+ return -EFAULT; -+ -+ do { -+ ret = mei_cldev_send(mei->cldev, (u8 *)data, length); -+ } while (ret == -EINTR); -+ -+ if (ret < 0) -+ return ret; -+ -+ return 0; -+} -+ -+int ipts_mei_init(struct ipts_mei *mei, struct mei_cl_device *cldev) -+{ -+ if (!mei) -+ return -EFAULT; -+ -+ if (!cldev) -+ return -EFAULT; -+ -+ mei->cldev = cldev; -+ -+ INIT_LIST_HEAD(&mei->messages); -+ init_waitqueue_head(&mei->message_queue); -+ init_rwsem(&mei->message_lock); -+ -+ mei_cldev_register_rx_cb(cldev, ipts_mei_incoming); -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/mei.h b/drivers/hid/ipts/mei.h -new file mode 100644 -index 0000000000000..973bade6b0fdd ---- /dev/null -+++ b/drivers/hid/ipts/mei.h -@@ -0,0 +1,66 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_MEI_H -+#define IPTS_MEI_H -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "spec-device.h" -+ -+struct ipts_mei_message { -+ struct list_head list; -+ struct ipts_response rsp; -+}; -+ -+struct ipts_mei { -+ struct mei_cl_device *cldev; -+ -+ struct list_head messages; -+ -+ wait_queue_head_t message_queue; -+ struct rw_semaphore message_lock; -+}; -+ -+/** -+ * ipts_mei_recv() - Receive data from a MEI device. -+ * @mei: The IPTS MEI device context. -+ * @code: The IPTS command code to look for. -+ * @rsp: The address that the received data will be copied to. -+ * @timeout: How many milliseconds the function will wait at most. -+ * -+ * A negative timeout means to wait forever. -+ * -+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received. -+ */ -+int ipts_mei_recv(struct ipts_mei *mei, enum ipts_command_code code, struct ipts_response *rsp, -+ u64 timeout); -+ -+/** -+ * ipts_mei_send() - Send data to a MEI device. -+ * @ipts: The IPTS MEI device context. -+ * @data: The data to send. -+ * @size: The size of the data. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_mei_send(struct ipts_mei *mei, void *data, size_t length); -+ -+/** -+ * ipts_mei_init() - Initialize the MEI device context. -+ * @mei: The MEI device context to initialize. -+ * @cldev: The MEI device the context will be bound to. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_mei_init(struct ipts_mei *mei, struct mei_cl_device *cldev); -+ -+#endif /* IPTS_MEI_H */ -diff --git a/drivers/hid/ipts/receiver.c b/drivers/hid/ipts/receiver.c -new file mode 100644 -index 0000000000000..ef66c3c9db807 ---- /dev/null -+++ b/drivers/hid/ipts/receiver.c -@@ -0,0 +1,250 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "cmd.h" -+#include "context.h" -+#include "control.h" -+#include "hid.h" -+#include "resources.h" -+#include "spec-device.h" -+#include "thread.h" -+ -+static void ipts_receiver_next_doorbell(struct ipts_context *ipts) -+{ -+ u32 *doorbell = (u32 *)ipts->resources.doorbell.address; -+ *doorbell = *doorbell + 1; -+} -+ -+static u32 ipts_receiver_current_doorbell(struct ipts_context *ipts) -+{ -+ u32 *doorbell = (u32 *)ipts->resources.doorbell.address; -+ return *doorbell; -+} -+ -+static void ipts_receiver_backoff(time64_t last, u32 n) -+{ -+ /* -+ * If the last change was less than n seconds ago, -+ * sleep for a shorter period so that new data can be -+ * processed quickly. If there was no change for more than -+ * n seconds, sleep longer to avoid wasting CPU cycles. -+ */ -+ if (last + n > ktime_get_seconds()) -+ usleep_range(1 * USEC_PER_MSEC, 5 * USEC_PER_MSEC); -+ else -+ msleep(200); -+} -+ -+static int ipts_receiver_event_loop(struct ipts_thread *thread) -+{ -+ int ret = 0; -+ u32 buffer = 0; -+ -+ struct ipts_context *ipts = NULL; -+ time64_t last = ktime_get_seconds(); -+ -+ if (!thread) -+ return -EFAULT; -+ -+ ipts = thread->data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "IPTS running in event mode\n"); -+ -+ while (!ipts_thread_should_stop(thread)) { -+ int i = 0; -+ -+ for (i = 0; i < IPTS_BUFFERS; i++) { -+ ret = ipts_control_wait_data(ipts, false); -+ if (ret == -EAGAIN) -+ break; -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for data: %d\n", ret); -+ continue; -+ } -+ -+ buffer = ipts_receiver_current_doorbell(ipts) % IPTS_BUFFERS; -+ ipts_receiver_next_doorbell(ipts); -+ -+ ret = ipts_hid_input_data(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to process buffer: %d\n", ret); -+ -+ ret = ipts_control_refill_buffer(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to send feedback: %d\n", ret); -+ -+ ret = ipts_control_request_data(ipts); -+ if (ret) -+ dev_err(ipts->dev, "Failed to request data: %d\n", ret); -+ -+ last = ktime_get_seconds(); -+ } -+ -+ ipts_receiver_backoff(last, 5); -+ } -+ -+ ret = ipts_control_request_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to request flush: %d\n", ret); -+ return ret; -+ } -+ -+ ret = ipts_control_wait_data(ipts, true); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for data: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ ret = ipts_control_wait_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for flush: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ return 0; -+} -+ -+static int ipts_receiver_poll_loop(struct ipts_thread *thread) -+{ -+ int ret = 0; -+ u32 buffer = 0; -+ -+ u32 doorbell = 0; -+ u32 lastdb = 0; -+ -+ struct ipts_context *ipts = NULL; -+ time64_t last = ktime_get_seconds(); -+ -+ if (!thread) -+ return -EFAULT; -+ -+ ipts = thread->data; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ dev_info(ipts->dev, "IPTS running in poll mode\n"); -+ -+ while (true) { -+ if (ipts_thread_should_stop(thread)) { -+ ret = ipts_control_request_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to request flush: %d\n", ret); -+ return ret; -+ } -+ } -+ -+ doorbell = ipts_receiver_current_doorbell(ipts); -+ -+ /* -+ * After filling up one of the data buffers, IPTS will increment -+ * the doorbell. The value of the doorbell stands for the *next* -+ * buffer that IPTS is going to fill. -+ */ -+ while (lastdb != doorbell) { -+ buffer = lastdb % IPTS_BUFFERS; -+ -+ ret = ipts_hid_input_data(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to process buffer: %d\n", ret); -+ -+ ret = ipts_control_refill_buffer(ipts, buffer); -+ if (ret) -+ dev_err(ipts->dev, "Failed to send feedback: %d\n", ret); -+ -+ last = ktime_get_seconds(); -+ lastdb++; -+ } -+ -+ if (ipts_thread_should_stop(thread)) -+ break; -+ -+ ipts_receiver_backoff(last, 5); -+ } -+ -+ ret = ipts_control_wait_data(ipts, true); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for data: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ ret = ipts_control_wait_flush(ipts); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to wait for flush: %d\n", ret); -+ -+ if (ret != -EAGAIN) -+ return ret; -+ else -+ return 0; -+ } -+ -+ return 0; -+} -+ -+int ipts_receiver_start(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ if (ipts->mode == IPTS_MODE_EVENT) { -+ ret = ipts_thread_start(&ipts->receiver_loop, ipts_receiver_event_loop, ipts, -+ "ipts_event"); -+ } else if (ipts->mode == IPTS_MODE_POLL) { -+ ret = ipts_thread_start(&ipts->receiver_loop, ipts_receiver_poll_loop, ipts, -+ "ipts_poll"); -+ } else { -+ ret = -EINVAL; -+ } -+ -+ if (ret) { -+ dev_err(ipts->dev, "Failed to start receiver loop: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int ipts_receiver_stop(struct ipts_context *ipts) -+{ -+ int ret = 0; -+ -+ if (!ipts) -+ return -EFAULT; -+ -+ ret = ipts_thread_stop(&ipts->receiver_loop); -+ if (ret) { -+ dev_err(ipts->dev, "Failed to stop receiver loop: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/receiver.h b/drivers/hid/ipts/receiver.h -new file mode 100644 -index 0000000000000..3de7da62d40c1 ---- /dev/null -+++ b/drivers/hid/ipts/receiver.h -@@ -0,0 +1,16 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_RECEIVER_H -+#define IPTS_RECEIVER_H -+ -+#include "context.h" -+ -+int ipts_receiver_start(struct ipts_context *ipts); -+int ipts_receiver_stop(struct ipts_context *ipts); -+ -+#endif /* IPTS_RECEIVER_H */ -diff --git a/drivers/hid/ipts/resources.c b/drivers/hid/ipts/resources.c -new file mode 100644 -index 0000000000000..cc14653b2a9f5 ---- /dev/null -+++ b/drivers/hid/ipts/resources.c -@@ -0,0 +1,131 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+ -+#include "desc.h" -+#include "resources.h" -+#include "spec-device.h" -+ -+static int ipts_resources_alloc_buffer(struct ipts_buffer *buffer, struct device *dev, size_t size) -+{ -+ if (!buffer) -+ return -EFAULT; -+ -+ if (buffer->address) -+ return 0; -+ -+ buffer->address = dma_alloc_coherent(dev, size, &buffer->dma_address, GFP_KERNEL); -+ -+ if (!buffer->address) -+ return -ENOMEM; -+ -+ buffer->size = size; -+ buffer->device = dev; -+ -+ return 0; -+} -+ -+static void ipts_resources_free_buffer(struct ipts_buffer *buffer) -+{ -+ if (!buffer->address) -+ return; -+ -+ dma_free_coherent(buffer->device, buffer->size, buffer->address, buffer->dma_address); -+ -+ buffer->address = NULL; -+ buffer->size = 0; -+ -+ buffer->dma_address = 0; -+ buffer->device = NULL; -+} -+ -+int ipts_resources_init(struct ipts_resources *res, struct device *dev, size_t ds, size_t fs) -+{ -+ int ret = 0; -+ -+ /* -+ * Some compilers (AOSP clang) complain about a redefined -+ * variable when this is declared inside of the for loop. -+ */ -+ int i = 0; -+ -+ if (!res) -+ return -EFAULT; -+ -+ for (i = 0; i < IPTS_BUFFERS; i++) { -+ ret = ipts_resources_alloc_buffer(&res->data[i], dev, ds); -+ if (ret) -+ goto err; -+ } -+ -+ for (i = 0; i < IPTS_BUFFERS; i++) { -+ ret = ipts_resources_alloc_buffer(&res->feedback[i], dev, fs); -+ if (ret) -+ goto err; -+ } -+ -+ ret = ipts_resources_alloc_buffer(&res->doorbell, dev, sizeof(u32)); -+ if (ret) -+ goto err; -+ -+ ret = ipts_resources_alloc_buffer(&res->workqueue, dev, sizeof(u32)); -+ if (ret) -+ goto err; -+ -+ ret = ipts_resources_alloc_buffer(&res->hid2me, dev, fs); -+ if (ret) -+ goto err; -+ -+ ret = ipts_resources_alloc_buffer(&res->descriptor, dev, ds + 8); -+ if (ret) -+ goto err; -+ -+ if (!res->report.address) { -+ res->report.size = IPTS_HID_REPORT_DATA_SIZE; -+ res->report.address = kzalloc(res->report.size, GFP_KERNEL); -+ -+ if (!res->report.address) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ } -+ -+ return 0; -+ -+err: -+ -+ ipts_resources_free(res); -+ return ret; -+} -+ -+int ipts_resources_free(struct ipts_resources *res) -+{ -+ int i = 0; -+ -+ if (!res) -+ return -EFAULT; -+ -+ for (i = 0; i < IPTS_BUFFERS; i++) -+ ipts_resources_free_buffer(&res->data[i]); -+ -+ for (i = 0; i < IPTS_BUFFERS; i++) -+ ipts_resources_free_buffer(&res->feedback[i]); -+ -+ ipts_resources_free_buffer(&res->doorbell); -+ ipts_resources_free_buffer(&res->workqueue); -+ ipts_resources_free_buffer(&res->hid2me); -+ ipts_resources_free_buffer(&res->descriptor); -+ -+ kfree(res->report.address); -+ res->report.address = NULL; -+ res->report.size = 0; -+ -+ return 0; -+} -diff --git a/drivers/hid/ipts/resources.h b/drivers/hid/ipts/resources.h -new file mode 100644 -index 0000000000000..2068e13285f0e ---- /dev/null -+++ b/drivers/hid/ipts/resources.h -@@ -0,0 +1,41 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_RESOURCES_H -+#define IPTS_RESOURCES_H -+ -+#include -+#include -+ -+#include "spec-device.h" -+ -+struct ipts_buffer { -+ u8 *address; -+ size_t size; -+ -+ dma_addr_t dma_address; -+ struct device *device; -+}; -+ -+struct ipts_resources { -+ struct ipts_buffer data[IPTS_BUFFERS]; -+ struct ipts_buffer feedback[IPTS_BUFFERS]; -+ -+ struct ipts_buffer doorbell; -+ struct ipts_buffer workqueue; -+ struct ipts_buffer hid2me; -+ -+ struct ipts_buffer descriptor; -+ -+ // Buffer for synthesizing HID reports -+ struct ipts_buffer report; -+}; -+ -+int ipts_resources_init(struct ipts_resources *res, struct device *dev, size_t ds, size_t fs); -+int ipts_resources_free(struct ipts_resources *res); -+ -+#endif /* IPTS_RESOURCES_H */ -diff --git a/drivers/hid/ipts/spec-data.h b/drivers/hid/ipts/spec-data.h -new file mode 100644 -index 0000000000000..e8dd98895a7ee ---- /dev/null -+++ b/drivers/hid/ipts/spec-data.h -@@ -0,0 +1,100 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_SPEC_DATA_H -+#define IPTS_SPEC_DATA_H -+ -+#include -+#include -+ -+/** -+ * enum ipts_feedback_cmd_type - Commands that can be executed on the sensor through feedback. -+ */ -+enum ipts_feedback_cmd_type { -+ IPTS_FEEDBACK_CMD_TYPE_NONE = 0, -+ IPTS_FEEDBACK_CMD_TYPE_SOFT_RESET = 1, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_ARMED = 2, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_SENSING = 3, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_SLEEP = 4, -+ IPTS_FEEDBACK_CMD_TYPE_GOTO_DOZE = 5, -+ IPTS_FEEDBACK_CMD_TYPE_HARD_RESET = 6, -+}; -+ -+/** -+ * enum ipts_feedback_data_type - Defines what data a feedback buffer contains. -+ * @IPTS_FEEDBACK_DATA_TYPE_VENDOR: The buffer contains vendor specific feedback. -+ * @IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES: The buffer contains a HID set features report. -+ * @IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES: The buffer contains a HID get features report. -+ * @IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT: The buffer contains a HID output report. -+ * @IPTS_FEEDBACK_DATA_TYPE_STORE_DATA: The buffer contains calibration data for the sensor. -+ */ -+enum ipts_feedback_data_type { -+ IPTS_FEEDBACK_DATA_TYPE_VENDOR = 0, -+ IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES = 1, -+ IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES = 2, -+ IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT = 3, -+ IPTS_FEEDBACK_DATA_TYPE_STORE_DATA = 4, -+}; -+ -+/** -+ * struct ipts_feedback_header - Header that is prefixed to the data in a feedback buffer. -+ * @cmd_type: A command that should be executed on the sensor. -+ * @size: The size of the payload to be written. -+ * @buffer: The ID of the buffer that contains this feedback data. -+ * @protocol: The protocol version of the EDS. -+ * @data_type: The type of data that the buffer contains. -+ * @spi_offset: The offset at which to write the payload data to the sensor. -+ * @payload: Payload for the feedback command, or 0 if no payload is sent. -+ */ -+struct ipts_feedback_header { -+ enum ipts_feedback_cmd_type cmd_type; -+ u32 size; -+ u32 buffer; -+ u32 protocol; -+ enum ipts_feedback_data_type data_type; -+ u32 spi_offset; -+ u8 reserved[40]; -+ u8 payload[]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_feedback_header) == 64); -+ -+/** -+ * enum ipts_data_type - Defines what type of data a buffer contains. -+ * @IPTS_DATA_TYPE_FRAME: Raw data frame. -+ * @IPTS_DATA_TYPE_ERROR: Error data. -+ * @IPTS_DATA_TYPE_VENDOR: Vendor specific data. -+ * @IPTS_DATA_TYPE_HID: A HID report. -+ * @IPTS_DATA_TYPE_GET_FEATURES: The response to a GET_FEATURES HID2ME command. -+ */ -+enum ipts_data_type { -+ IPTS_DATA_TYPE_FRAME = 0x00, -+ IPTS_DATA_TYPE_ERROR = 0x01, -+ IPTS_DATA_TYPE_VENDOR = 0x02, -+ IPTS_DATA_TYPE_HID = 0x03, -+ IPTS_DATA_TYPE_GET_FEATURES = 0x04, -+ IPTS_DATA_TYPE_DESCRIPTOR = 0x05, -+}; -+ -+/** -+ * struct ipts_data_header - Header that is prefixed to the data in a data buffer. -+ * @type: What data the buffer contains. -+ * @size: How much data the buffer contains. -+ * @buffer: Which buffer the data is in. -+ */ -+struct ipts_data_header { -+ enum ipts_data_type type; -+ u32 size; -+ u32 buffer; -+ u8 reserved[52]; -+ u8 data[]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_data_header) == 64); -+ -+#endif /* IPTS_SPEC_DATA_H */ -diff --git a/drivers/hid/ipts/spec-device.h b/drivers/hid/ipts/spec-device.h -new file mode 100644 -index 0000000000000..41845f9d90257 ---- /dev/null -+++ b/drivers/hid/ipts/spec-device.h -@@ -0,0 +1,290 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2016 Intel Corporation -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_SPEC_DEVICE_H -+#define IPTS_SPEC_DEVICE_H -+ -+#include -+#include -+ -+/* -+ * The amount of buffers that IPTS can use for data transfer. -+ */ -+#define IPTS_BUFFERS 16 -+ -+/* -+ * The buffer ID that is used for HID2ME feedback -+ */ -+#define IPTS_HID2ME_BUFFER IPTS_BUFFERS -+ -+/** -+ * enum ipts_command - Commands that can be sent to the IPTS hardware. -+ * @IPTS_CMD_GET_DEVICE_INFO: Retrieves vendor information from the device. -+ * @IPTS_CMD_SET_MODE: Changes the mode that the device will operate in. -+ * @IPTS_CMD_SET_MEM_WINDOW: Configures memory buffers for passing data between device and driver. -+ * @IPTS_CMD_QUIESCE_IO: Stops the data flow from the device to the driver. -+ * @IPTS_CMD_READY_FOR_DATA: Informs the device that the driver is ready to receive data. -+ * @IPTS_CMD_FEEDBACK: Informs the device that a buffer was processed and can be refilled. -+ * @IPTS_CMD_CLEAR_MEM_WINDOW: Stops the data flow and clears the buffer addresses on the device. -+ * @IPTS_CMD_RESET_SENSOR: Resets the sensor to its default state. -+ * @IPTS_CMD_GET_DESCRIPTOR: Retrieves the HID descriptor of the device. -+ */ -+enum ipts_command_code { -+ IPTS_CMD_GET_DEVICE_INFO = 0x01, -+ IPTS_CMD_SET_MODE = 0x02, -+ IPTS_CMD_SET_MEM_WINDOW = 0x03, -+ IPTS_CMD_QUIESCE_IO = 0x04, -+ IPTS_CMD_READY_FOR_DATA = 0x05, -+ IPTS_CMD_FEEDBACK = 0x06, -+ IPTS_CMD_CLEAR_MEM_WINDOW = 0x07, -+ IPTS_CMD_RESET_SENSOR = 0x0B, -+ IPTS_CMD_GET_DESCRIPTOR = 0x0F, -+}; -+ -+/** -+ * enum ipts_status - Possible status codes returned by the IPTS device. -+ * @IPTS_STATUS_SUCCESS: Operation completed successfully. -+ * @IPTS_STATUS_INVALID_PARAMS: Command contained an invalid payload. -+ * @IPTS_STATUS_ACCESS_DENIED: ME could not validate a buffer address. -+ * @IPTS_STATUS_CMD_SIZE_ERROR: Command contains an invalid payload. -+ * @IPTS_STATUS_NOT_READY: Buffer addresses have not been set. -+ * @IPTS_STATUS_REQUEST_OUTSTANDING: There is an outstanding command of the same type. -+ * @IPTS_STATUS_NO_SENSOR_FOUND: No sensor could be found. -+ * @IPTS_STATUS_OUT_OF_MEMORY: Not enough free memory for requested operation. -+ * @IPTS_STATUS_INTERNAL_ERROR: An unexpected error occurred. -+ * @IPTS_STATUS_SENSOR_DISABLED: The sensor has been disabled and must be reinitialized. -+ * @IPTS_STATUS_COMPAT_CHECK_FAIL: Compatibility revision check between sensor and ME failed. -+ * The host can ignore this error and attempt to continue. -+ * @IPTS_STATUS_SENSOR_EXPECTED_RESET: The sensor went through a reset initiated by the driver. -+ * @IPTS_STATUS_SENSOR_UNEXPECTED_RESET: The sensor went through an unexpected reset. -+ * @IPTS_STATUS_RESET_FAILED: Requested sensor reset failed to complete. -+ * @IPTS_STATUS_TIMEOUT: The operation timed out. -+ * @IPTS_STATUS_TEST_MODE_FAIL: Test mode pattern did not match expected values. -+ * @IPTS_STATUS_SENSOR_FAIL_FATAL: The sensor reported an error during reset sequence. -+ * Further progress is not possible. -+ * @IPTS_STATUS_SENSOR_FAIL_NONFATAL: The sensor reported an error during reset sequence. -+ * The driver can attempt to continue. -+ * @IPTS_STATUS_INVALID_DEVICE_CAPS: The device reported invalid capabilities. -+ * @IPTS_STATUS_QUIESCE_IO_IN_PROGRESS: Command cannot be completed until Quiesce IO is done. -+ */ -+enum ipts_status { -+ IPTS_STATUS_SUCCESS = 0x00, -+ IPTS_STATUS_INVALID_PARAMS = 0x01, -+ IPTS_STATUS_ACCESS_DENIED = 0x02, -+ IPTS_STATUS_CMD_SIZE_ERROR = 0x03, -+ IPTS_STATUS_NOT_READY = 0x04, -+ IPTS_STATUS_REQUEST_OUTSTANDING = 0x05, -+ IPTS_STATUS_NO_SENSOR_FOUND = 0x06, -+ IPTS_STATUS_OUT_OF_MEMORY = 0x07, -+ IPTS_STATUS_INTERNAL_ERROR = 0x08, -+ IPTS_STATUS_SENSOR_DISABLED = 0x09, -+ IPTS_STATUS_COMPAT_CHECK_FAIL = 0x0A, -+ IPTS_STATUS_SENSOR_EXPECTED_RESET = 0x0B, -+ IPTS_STATUS_SENSOR_UNEXPECTED_RESET = 0x0C, -+ IPTS_STATUS_RESET_FAILED = 0x0D, -+ IPTS_STATUS_TIMEOUT = 0x0E, -+ IPTS_STATUS_TEST_MODE_FAIL = 0x0F, -+ IPTS_STATUS_SENSOR_FAIL_FATAL = 0x10, -+ IPTS_STATUS_SENSOR_FAIL_NONFATAL = 0x11, -+ IPTS_STATUS_INVALID_DEVICE_CAPS = 0x12, -+ IPTS_STATUS_QUIESCE_IO_IN_PROGRESS = 0x13, -+}; -+ -+/** -+ * struct ipts_command - Message that is sent to the device for calling a command. -+ * @cmd: The command that will be called. -+ * @payload: Payload containing parameters for the called command. -+ */ -+struct ipts_command { -+ enum ipts_command_code cmd; -+ u8 payload[320]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_command) == 324); -+ -+/** -+ * enum ipts_mode - Configures what data the device produces and how its sent. -+ * @IPTS_MODE_EVENT: The device will send an event once a buffer was filled. -+ * Older devices will return singletouch data in this mode. -+ * @IPTS_MODE_POLL: The device will notify the driver by incrementing the doorbell value. -+ * Older devices will return multitouch data in this mode. -+ */ -+enum ipts_mode { -+ IPTS_MODE_EVENT = 0x00, -+ IPTS_MODE_POLL = 0x01, -+}; -+ -+/** -+ * struct ipts_set_mode - Payload for the SET_MODE command. -+ * @mode: Changes the mode that IPTS will operate in. -+ */ -+struct ipts_set_mode { -+ enum ipts_mode mode; -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_set_mode) == 16); -+ -+#define IPTS_WORKQUEUE_SIZE 8192 -+#define IPTS_WORKQUEUE_ITEM_SIZE 16 -+ -+/** -+ * struct ipts_mem_window - Payload for the SET_MEM_WINDOW command. -+ * @data_addr_lower: Lower 32 bits of the data buffer addresses. -+ * @data_addr_upper: Upper 32 bits of the data buffer addresses. -+ * @workqueue_addr_lower: Lower 32 bits of the workqueue buffer address. -+ * @workqueue_addr_upper: Upper 32 bits of the workqueue buffer address. -+ * @doorbell_addr_lower: Lower 32 bits of the doorbell buffer address. -+ * @doorbell_addr_upper: Upper 32 bits of the doorbell buffer address. -+ * @feedbackaddr_lower: Lower 32 bits of the feedback buffer addresses. -+ * @feedbackaddr_upper: Upper 32 bits of the feedback buffer addresses. -+ * @hid2me_addr_lower: Lower 32 bits of the hid2me buffer address. -+ * @hid2me_addr_upper: Upper 32 bits of the hid2me buffer address. -+ * @hid2me_size: Size of the hid2me feedback buffer. -+ * @workqueue_item_size: Magic value. Must be 16. -+ * @workqueue_size: Magic value. Must be 8192. -+ * -+ * The workqueue related items in this struct are required for using -+ * GuC submission with binary processing firmware. Since this driver does -+ * not use GuC submission and instead exports raw data to userspace, these -+ * items are not actually used, but they need to be allocated and passed -+ * to the device, otherwise initialization will fail. -+ */ -+struct ipts_mem_window { -+ u32 data_addr_lower[IPTS_BUFFERS]; -+ u32 data_addr_upper[IPTS_BUFFERS]; -+ u32 workqueue_addr_lower; -+ u32 workqueue_addr_upper; -+ u32 doorbell_addr_lower; -+ u32 doorbell_addr_upper; -+ u32 feedback_addr_lower[IPTS_BUFFERS]; -+ u32 feedback_addr_upper[IPTS_BUFFERS]; -+ u32 hid2me_addr_lower; -+ u32 hid2me_addr_upper; -+ u32 hid2me_size; -+ u8 reserved1; -+ u8 workqueue_item_size; -+ u16 workqueue_size; -+ u8 reserved[32]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_mem_window) == 320); -+ -+/** -+ * struct ipts_quiesce_io - Payload for the QUIESCE_IO command. -+ */ -+struct ipts_quiesce_io { -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_quiesce_io) == 12); -+ -+/** -+ * struct ipts_feedback - Payload for the FEEDBACK command. -+ * @buffer: The buffer that the device should refill. -+ */ -+struct ipts_feedback { -+ u32 buffer; -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_feedback) == 16); -+ -+/** -+ * enum ipts_reset_type - Possible ways of resetting the device. -+ * @IPTS_RESET_TYPE_HARD: Perform hardware reset using GPIO pin. -+ * @IPTS_RESET_TYPE_SOFT: Perform software reset using SPI command. -+ */ -+enum ipts_reset_type { -+ IPTS_RESET_TYPE_HARD = 0x00, -+ IPTS_RESET_TYPE_SOFT = 0x01, -+}; -+ -+/** -+ * struct ipts_reset - Payload for the RESET_SENSOR command. -+ * @type: How the device should get reset. -+ */ -+struct ipts_reset_sensor { -+ enum ipts_reset_type type; -+ u8 reserved[4]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_reset_sensor) == 8); -+ -+/** -+ * struct ipts_get_descriptor - Payload for the GET_DESCRIPTOR command. -+ * @addr_lower: The lower 32 bits of the descriptor buffer address. -+ * @addr_upper: The upper 32 bits of the descriptor buffer address. -+ * @magic: A magic value. Must be 8. -+ */ -+struct ipts_get_descriptor { -+ u32 addr_lower; -+ u32 addr_upper; -+ u32 magic; -+ u8 reserved[12]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_get_descriptor) == 24); -+ -+/* -+ * The type of a response is indicated by a -+ * command code, with the most significant bit flipped to 1. -+ */ -+#define IPTS_RSP_BIT BIT(31) -+ -+/** -+ * struct ipts_response - Data returned from the device in response to a command. -+ * @cmd: The command that this response answers (IPTS_RSP_BIT will be 1). -+ * @status: The return code of the command. -+ * @payload: The data that was produced by the command. -+ */ -+struct ipts_response { -+ enum ipts_command_code cmd; -+ enum ipts_status status; -+ u8 payload[80]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_response) == 88); -+ -+/** -+ * struct ipts_device_info - Vendor information of the IPTS device. -+ * @vendor: Vendor ID of this device. -+ * @product: Product ID of this device. -+ * @hw_version: Hardware revision of this device. -+ * @fw_version: Firmware revision of this device. -+ * @data_size: Requested size for a data buffer. -+ * @feedback_size: Requested size for a feedback buffer. -+ * @mode: Mode that the device currently operates in. -+ * @max_contacts: Maximum amount of concurrent touches the sensor can process. -+ * @sensor_min_eds: The minimum EDS version supported by the sensor. -+ * @sensor_max_eds: The maximum EDS version supported by the sensor. -+ * @me_min_eds: The minimum EDS version supported by the ME for communicating with the sensor. -+ * @me_max_eds: The maximum EDS version supported by the ME for communicating with the sensor. -+ * @intf_eds: The EDS version implemented by the interface between ME and host. -+ */ -+struct ipts_device_info { -+ u16 vendor; -+ u16 product; -+ u32 hw_version; -+ u32 fw_version; -+ u32 data_size; -+ u32 feedback_size; -+ enum ipts_mode mode; -+ u8 max_contacts; -+ u8 reserved1[3]; -+ u8 sensor_min_eds; -+ u8 sensor_maj_eds; -+ u8 me_min_eds; -+ u8 me_maj_eds; -+ u8 intf_eds; -+ u8 reserved2[11]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_device_info) == 44); -+ -+#endif /* IPTS_SPEC_DEVICE_H */ -diff --git a/drivers/hid/ipts/spec-hid.h b/drivers/hid/ipts/spec-hid.h -new file mode 100644 -index 0000000000000..5a58d4a0a610f ---- /dev/null -+++ b/drivers/hid/ipts/spec-hid.h -@@ -0,0 +1,34 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2020-2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_SPEC_HID_H -+#define IPTS_SPEC_HID_H -+ -+#include -+#include -+ -+/* -+ * Made-up type for passing raw IPTS data in a HID report. -+ */ -+#define IPTS_HID_FRAME_TYPE_RAW 0xEE -+ -+/** -+ * struct ipts_hid_frame - Header that is prefixed to raw IPTS data wrapped in a HID report. -+ * @size: Size of the data inside the report, including this header. -+ * @type: What type of data does this report contain. -+ */ -+struct ipts_hid_header { -+ u32 size; -+ u8 reserved1; -+ u8 type; -+ u8 reserved2; -+ u8 data[]; -+} __packed; -+ -+static_assert(sizeof(struct ipts_hid_header) == 7); -+ -+#endif /* IPTS_SPEC_HID_H */ -diff --git a/drivers/hid/ipts/thread.c b/drivers/hid/ipts/thread.c -new file mode 100644 -index 0000000000000..355e92bea26f8 ---- /dev/null -+++ b/drivers/hid/ipts/thread.c -@@ -0,0 +1,84 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include "thread.h" -+ -+bool ipts_thread_should_stop(struct ipts_thread *thread) -+{ -+ if (!thread) -+ return false; -+ -+ return READ_ONCE(thread->should_stop); -+} -+ -+static int ipts_thread_runner(void *data) -+{ -+ int ret = 0; -+ struct ipts_thread *thread = data; -+ -+ if (!thread) -+ return -EFAULT; -+ -+ if (!thread->threadfn) -+ return -EFAULT; -+ -+ ret = thread->threadfn(thread); -+ complete_all(&thread->done); -+ -+ return ret; -+} -+ -+int ipts_thread_start(struct ipts_thread *thread, int (*threadfn)(struct ipts_thread *thread), -+ void *data, const char *name) -+{ -+ if (!thread) -+ return -EFAULT; -+ -+ if (!threadfn) -+ return -EFAULT; -+ -+ init_completion(&thread->done); -+ -+ thread->data = data; -+ thread->should_stop = false; -+ thread->threadfn = threadfn; -+ -+ thread->thread = kthread_run(ipts_thread_runner, thread, name); -+ return PTR_ERR_OR_ZERO(thread->thread); -+} -+ -+int ipts_thread_stop(struct ipts_thread *thread) -+{ -+ int ret = 0; -+ -+ if (!thread) -+ return -EFAULT; -+ -+ if (!thread->thread) -+ return 0; -+ -+ WRITE_ONCE(thread->should_stop, true); -+ -+ /* -+ * Make sure that the write has gone through before waiting. -+ */ -+ wmb(); -+ -+ wait_for_completion(&thread->done); -+ ret = kthread_stop(thread->thread); -+ -+ thread->thread = NULL; -+ thread->data = NULL; -+ thread->threadfn = NULL; -+ -+ return ret; -+} -diff --git a/drivers/hid/ipts/thread.h b/drivers/hid/ipts/thread.h -new file mode 100644 -index 0000000000000..1f966b8b32c45 ---- /dev/null -+++ b/drivers/hid/ipts/thread.h -@@ -0,0 +1,59 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Copyright (c) 2023 Dorian Stoll -+ * -+ * Linux driver for Intel Precise Touch & Stylus -+ */ -+ -+#ifndef IPTS_THREAD_H -+#define IPTS_THREAD_H -+ -+#include -+#include -+#include -+ -+/* -+ * This wrapper over kthread is necessary, because calling kthread_stop makes it impossible -+ * to issue MEI commands from that thread while it shuts itself down. By using a custom -+ * boolean variable and a completion object, we can call kthread_stop only when the thread -+ * already finished all of its work and has returned. -+ */ -+struct ipts_thread { -+ struct task_struct *thread; -+ -+ bool should_stop; -+ struct completion done; -+ -+ void *data; -+ int (*threadfn)(struct ipts_thread *thread); -+}; -+ -+/** -+ * ipts_thread_should_stop() - Returns true if the thread is asked to terminate. -+ * @thread: The current thread. -+ * -+ * Returns: true if the thread should stop, false if not. -+ */ -+bool ipts_thread_should_stop(struct ipts_thread *thread); -+ -+/** -+ * ipts_thread_start() - Starts an IPTS thread. -+ * @thread: The thread to initialize and start. -+ * @threadfn: The function to execute. -+ * @data: An argument that will be passed to threadfn. -+ * @name: The name of the new thread. -+ * -+ * Returns: 0 on success, <0 on error. -+ */ -+int ipts_thread_start(struct ipts_thread *thread, int (*threadfn)(struct ipts_thread *thread), -+ void *data, const char name[]); -+ -+/** -+ * ipts_thread_stop() - Asks the thread to terminate and waits until it has finished. -+ * @thread: The thread that should stop. -+ * -+ * Returns: The return value of the thread function. -+ */ -+int ipts_thread_stop(struct ipts_thread *thread); -+ -+#endif /* IPTS_THREAD_H */ --- -2.42.0 - -From 033de13abc9653b2d773f06182465e03d5d6463b Mon Sep 17 00:00:00 2001 -From: Dorian Stoll -Date: Sun, 11 Dec 2022 12:03:38 +0100 -Subject: [PATCH] iommu: intel: Disable source id verification for ITHC - -Signed-off-by: Dorian Stoll -Patchset: ithc ---- - drivers/iommu/intel/irq_remapping.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c -index 29b9e55dcf26c..986e91c813ae1 100644 ---- a/drivers/iommu/intel/irq_remapping.c -+++ b/drivers/iommu/intel/irq_remapping.c -@@ -386,6 +386,22 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) - data.busmatch_count = 0; - pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); - -+ /* -+ * The Intel Touch Host Controller is at 00:10.6, but for some reason -+ * the MSI interrupts have request id 01:05.0. -+ * Disable id verification to work around this. -+ * FIXME Find proper fix or turn this into a quirk. -+ */ -+ if (dev->vendor == PCI_VENDOR_ID_INTEL && (dev->class >> 8) == PCI_CLASS_INPUT_PEN) { -+ switch(dev->device) { -+ case 0x98d0: case 0x98d1: // LKF -+ case 0xa0d0: case 0xa0d1: // TGL LP -+ case 0x43d0: case 0x43d1: // TGL H -+ set_irte_sid(irte, SVT_NO_VERIFY, SQ_ALL_16, 0); -+ return 0; -+ } -+ } -+ - /* - * DMA alias provides us with a PCI device and alias. The only case - * where the it will return an alias on a different bus than the --- -2.42.0 - -From 0dd32bcfb70f9e36cfa009d94cd6c86a4839cff3 Mon Sep 17 00:00:00 2001 -From: Dorian Stoll -Date: Sun, 11 Dec 2022 12:10:54 +0100 -Subject: [PATCH] hid: Add support for Intel Touch Host Controller - -Based on quo/ithc-linux@55803a2 - -Signed-off-by: Dorian Stoll -Patchset: ithc ---- - drivers/hid/Kconfig | 2 + - drivers/hid/Makefile | 1 + - drivers/hid/ithc/Kbuild | 6 + - drivers/hid/ithc/Kconfig | 12 + - drivers/hid/ithc/ithc-debug.c | 96 ++++++ - drivers/hid/ithc/ithc-dma.c | 258 ++++++++++++++++ - drivers/hid/ithc/ithc-dma.h | 67 +++++ - drivers/hid/ithc/ithc-main.c | 534 ++++++++++++++++++++++++++++++++++ - drivers/hid/ithc/ithc-regs.c | 64 ++++ - drivers/hid/ithc/ithc-regs.h | 186 ++++++++++++ - drivers/hid/ithc/ithc.h | 60 ++++ - 11 files changed, 1286 insertions(+) - create mode 100644 drivers/hid/ithc/Kbuild - create mode 100644 drivers/hid/ithc/Kconfig - create mode 100644 drivers/hid/ithc/ithc-debug.c - create mode 100644 drivers/hid/ithc/ithc-dma.c - create mode 100644 drivers/hid/ithc/ithc-dma.h - create mode 100644 drivers/hid/ithc/ithc-main.c - create mode 100644 drivers/hid/ithc/ithc-regs.c - create mode 100644 drivers/hid/ithc/ithc-regs.h - create mode 100644 drivers/hid/ithc/ithc.h - -diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 0b9d245d10e54..8ba1c309228be 100644 ---- a/drivers/hid/Kconfig -+++ b/drivers/hid/Kconfig -@@ -1347,4 +1347,6 @@ source "drivers/hid/surface-hid/Kconfig" - - source "drivers/hid/ipts/Kconfig" - -+source "drivers/hid/ithc/Kconfig" -+ - endif # HID_SUPPORT -diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 2ef21b257d0b5..e94b79727b489 100644 ---- a/drivers/hid/Makefile -+++ b/drivers/hid/Makefile -@@ -171,3 +171,4 @@ obj-$(CONFIG_AMD_SFH_HID) += amd-sfh-hid/ - obj-$(CONFIG_SURFACE_HID_CORE) += surface-hid/ - - obj-$(CONFIG_HID_IPTS) += ipts/ -+obj-$(CONFIG_HID_ITHC) += ithc/ -diff --git a/drivers/hid/ithc/Kbuild b/drivers/hid/ithc/Kbuild -new file mode 100644 -index 0000000000000..aea83f2ac07b4 ---- /dev/null -+++ b/drivers/hid/ithc/Kbuild -@@ -0,0 +1,6 @@ -+obj-$(CONFIG_HID_ITHC) := ithc.o -+ -+ithc-objs := ithc-main.o ithc-regs.o ithc-dma.o ithc-debug.o -+ -+ccflags-y := -std=gnu11 -Wno-declaration-after-statement -+ -diff --git a/drivers/hid/ithc/Kconfig b/drivers/hid/ithc/Kconfig -new file mode 100644 -index 0000000000000..ede7130236096 ---- /dev/null -+++ b/drivers/hid/ithc/Kconfig -@@ -0,0 +1,12 @@ -+config HID_ITHC -+ tristate "Intel Touch Host Controller" -+ depends on PCI -+ depends on HID -+ help -+ Say Y here if your system has a touchscreen using Intels -+ Touch Host Controller (ITHC / IPTS) technology. -+ -+ If unsure say N. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called ithc. -diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c -new file mode 100644 -index 0000000000000..57bf125c45bd5 ---- /dev/null -+++ b/drivers/hid/ithc/ithc-debug.c -@@ -0,0 +1,96 @@ -+#include "ithc.h" -+ -+void ithc_log_regs(struct ithc *ithc) { -+ if (!ithc->prev_regs) return; -+ u32 __iomem *cur = (__iomem void*)ithc->regs; -+ u32 *prev = (void*)ithc->prev_regs; -+ for (int i = 1024; i < sizeof *ithc->regs / 4; i++) { -+ u32 x = readl(cur + i); -+ if (x != prev[i]) { -+ pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x); -+ prev[i] = x; -+ } -+ } -+} -+ -+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) { -+ struct ithc *ithc = file_inode(f)->i_private; -+ char cmd[256]; -+ if (!ithc || !ithc->pci) return -ENODEV; -+ if (!len) return -EINVAL; -+ if (len >= sizeof cmd) return -EINVAL; -+ if (copy_from_user(cmd, buf, len)) return -EFAULT; -+ cmd[len] = 0; -+ if (cmd[len-1] == '\n') cmd[len-1] = 0; -+ pci_info(ithc->pci, "debug command: %s\n", cmd); -+ u32 n = 0; -+ const char *s = cmd + 1; -+ u32 a[32]; -+ while (*s && *s != '\n') { -+ if (n >= ARRAY_SIZE(a)) return -EINVAL; -+ if (*s++ != ' ') return -EINVAL; -+ char *e; -+ a[n++] = simple_strtoul(s, &e, 0); -+ if (e == s) return -EINVAL; -+ s = e; -+ } -+ ithc_log_regs(ithc); -+ switch(cmd[0]) { -+ case 'x': // reset -+ ithc_reset(ithc); -+ break; -+ case 'w': // write register: offset mask value -+ if (n != 3 || (a[0] & 3)) return -EINVAL; -+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]); -+ bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]); -+ break; -+ case 'r': // read register: offset -+ if (n != 1 || (a[0] & 3)) return -EINVAL; -+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); -+ break; -+ case 's': // spi command: cmd offset len data... -+ // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -+ // set touch cfg: s 6 12 4 XX -+ if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL; -+ pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]); -+ if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3)) -+ for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); -+ break; -+ case 'd': // dma command: cmd len data... -+ // get report descriptor: d 7 8 0 0 -+ // enable multitouch: d 3 2 0x0105 -+ if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL; -+ pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]); -+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n"); -+ break; -+ default: -+ return -EINVAL; -+ } -+ ithc_log_regs(ithc); -+ return len; -+} -+ -+static const struct file_operations ithc_debugfops_cmd = { -+ .owner = THIS_MODULE, -+ .write = ithc_debugfs_cmd_write, -+}; -+ -+static void ithc_debugfs_devres_release(struct device *dev, void *res) { -+ struct dentry **dbgm = res; -+ if (*dbgm) debugfs_remove_recursive(*dbgm); -+} -+ -+int ithc_debug_init(struct ithc *ithc) { -+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL); -+ if (!dbgm) return -ENOMEM; -+ devres_add(&ithc->pci->dev, dbgm); -+ struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL); -+ if (IS_ERR(dbg)) return PTR_ERR(dbg); -+ *dbgm = dbg; -+ -+ struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd); -+ if (IS_ERR(cmd)) return PTR_ERR(cmd); -+ -+ return 0; -+} -+ -diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c -new file mode 100644 -index 0000000000000..7e89b3496918d ---- /dev/null -+++ b/drivers/hid/ithc/ithc-dma.c -@@ -0,0 +1,258 @@ -+#include "ithc.h" -+ -+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) { -+ p->num_pages = num_pages; -+ p->dir = dir; -+ p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE); -+ p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL); -+ if (!p->addr) return -ENOMEM; -+ if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT; -+ return 0; -+} -+ -+struct ithc_sg_table { -+ void *addr; -+ struct sg_table sgt; -+ enum dma_data_direction dir; -+}; -+static void ithc_dma_sgtable_free(struct sg_table *sgt) { -+ struct scatterlist *sg; -+ int i; -+ for_each_sgtable_sg(sgt, sg, i) { -+ struct page *p = sg_page(sg); -+ if (p) __free_page(p); -+ } -+ sg_free_table(sgt); -+} -+static void ithc_dma_data_devres_release(struct device *dev, void *res) { -+ struct ithc_sg_table *sgt = res; -+ if (sgt->addr) vunmap(sgt->addr); -+ dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0); -+ ithc_dma_sgtable_free(&sgt->sgt); -+} -+ -+static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) { -+ // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional). -+ // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now). -+ struct page *pages[16]; -+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL; -+ b->active_idx = -1; -+ struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL); -+ if (!sgt) return -ENOMEM; -+ sgt->dir = prds->dir; -+ if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) { -+ struct scatterlist *sg; -+ int i; -+ bool ok = true; -+ for_each_sgtable_sg(&sgt->sgt, sg, i) { -+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA -+ if (!p) { ok = false; break; } -+ sg_set_page(sg, p, PAGE_SIZE, 0); -+ } -+ if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) { -+ devres_add(&ithc->pci->dev, sgt); -+ b->sgt = &sgt->sgt; -+ b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL); -+ if (!b->addr) return -ENOMEM; -+ return 0; -+ } -+ ithc_dma_sgtable_free(&sgt->sgt); -+ } -+ devres_free(sgt); -+ return -ENOMEM; -+} -+ -+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+ struct ithc_phys_region_desc *prd = prds->addr; -+ prd += idx * prds->num_pages; -+ if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; } -+ b->active_idx = idx; -+ if (prds->dir == DMA_TO_DEVICE) { -+ if (b->data_size > PAGE_SIZE) return -EINVAL; -+ prd->addr = sg_dma_address(b->sgt->sgl) >> 10; -+ prd->size = b->data_size | PRD_FLAG_END; -+ flush_kernel_vmap_range(b->addr, b->data_size); -+ } else if (prds->dir == DMA_FROM_DEVICE) { -+ struct scatterlist *sg; -+ int i; -+ for_each_sgtable_dma_sg(b->sgt, sg, i) { -+ prd->addr = sg_dma_address(sg) >> 10; -+ prd->size = sg_dma_len(sg); -+ prd++; -+ } -+ prd[-1].size |= PRD_FLAG_END; -+ } -+ dma_wmb(); // for the prds -+ dma_sync_sgtable_for_device(&ithc->pci->dev, b->sgt, prds->dir); -+ return 0; -+} -+ -+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+ struct ithc_phys_region_desc *prd = prds->addr; -+ prd += idx * prds->num_pages; -+ if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; } -+ b->active_idx = -1; -+ if (prds->dir == DMA_FROM_DEVICE) { -+ dma_rmb(); // for the prds -+ b->data_size = 0; -+ struct scatterlist *sg; -+ int i; -+ for_each_sgtable_dma_sg(b->sgt, sg, i) { -+ unsigned size = prd->size; -+ b->data_size += size & PRD_SIZE_MASK; -+ if (size & PRD_FLAG_END) break; -+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; } -+ prd++; -+ } -+ invalidate_kernel_vmap_range(b->addr, b->data_size); -+ } -+ dma_sync_sgtable_for_cpu(&ithc->pci->dev, b->sgt, prds->dir); -+ return 0; -+} -+ -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) { -+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ mutex_init(&rx->mutex); -+ u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes); -+ unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages); -+ CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE); -+ for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]); -+ writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2); -+ lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr); -+ writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs); -+ writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds); -+ u8 head = readb(&ithc->regs->dma_rx[channel].head); -+ if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; } -+ for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i); -+ writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail); -+ return 0; -+} -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) { -+ bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); -+} -+ -+int ithc_dma_tx_init(struct ithc *ithc) { -+ struct ithc_dma_tx *tx = &ithc->dma_tx; -+ mutex_init(&tx->mutex); -+ tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes); -+ unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages); -+ CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE); -+ CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf); -+ lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr); -+ writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds); -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); -+ return 0; -+} -+ -+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) { -+ if (buf >= NUM_RX_BUF) { -+ pci_err(ithc->pci, "invalid dma ringbuffer index\n"); -+ return -EINVAL; -+ } -+ ithc_set_active(ithc); -+ u32 len = data->data_size; -+ struct ithc_dma_rx_header *hdr = data->addr; -+ u8 *hiddata = (void *)(hdr + 1); -+ if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) { -+ CHECK(ithc_reset, ithc); -+ } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) { -+ if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ // When the CPU enters a low power state during DMA, we can get truncated messages. -+ // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes. -+ // See also ithc_set_active(). -+ } else { -+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ } -+ } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) { -+ CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8); -+ WRITE_ONCE(ithc->hid_parse_done, true); -+ wake_up(&ithc->wait_hid_parse); -+ } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1); -+ } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) { -+ bool done = false; -+ mutex_lock(&ithc->hid_get_feature_mutex); -+ if (ithc->hid_get_feature_buf) { -+ if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size; -+ memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size); -+ ithc->hid_get_feature_buf = NULL; -+ done = true; -+ } -+ mutex_unlock(&ithc->hid_get_feature_mutex); -+ if (done) wake_up(&ithc->wait_hid_get_feature); -+ else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1); -+ } else { -+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ } -+ return 0; -+} -+ -+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { -+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ unsigned n = rx->num_received; -+ u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head); -+ while (1) { -+ u8 tail = n % NUM_RX_BUF; -+ u8 tail_wrap = tail | ((n / NUM_RX_BUF) & 1 ? 0 : DMA_RX_WRAP_FLAG); -+ writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail); -+ // ringbuffer is full if tail_wrap == head_wrap -+ // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG -+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0; -+ -+ // take the buffer that the device just filled -+ struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF]; -+ CHECK_RET(ithc_dma_data_buffer_get, ithc, &rx->prds, b, tail); -+ rx->num_received = ++n; -+ -+ // process data -+ CHECK(ithc_dma_rx_process_buf, ithc, b, channel, tail); -+ -+ // give the buffer back to the device -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail); -+ } -+} -+int ithc_dma_rx(struct ithc *ithc, u8 channel) { -+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ mutex_lock(&rx->mutex); -+ int ret = ithc_dma_rx_unlocked(ithc, channel); -+ mutex_unlock(&rx->mutex); -+ return ret; -+} -+ -+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+ pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize); -+ struct ithc_dma_tx_header *hdr; -+ u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0; -+ unsigned fullsize = sizeof *hdr + datasize + padding; -+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL; -+ CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); -+ -+ ithc->dma_tx.buf.data_size = fullsize; -+ hdr = ithc->dma_tx.buf.addr; -+ hdr->code = cmdcode; -+ hdr->data_size = datasize; -+ u8 *dest = (void *)(hdr + 1); -+ memcpy(dest, data, datasize); -+ dest += datasize; -+ for (u8 p = 0; p < padding; p++) *dest++ = 0; -+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); -+ -+ bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND); -+ CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); -+ writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status); -+ return 0; -+} -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+ mutex_lock(&ithc->dma_tx.mutex); -+ int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data); -+ mutex_unlock(&ithc->dma_tx.mutex); -+ return ret; -+} -+ -diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h -new file mode 100644 -index 0000000000000..d9f2c19a13f3a ---- /dev/null -+++ b/drivers/hid/ithc/ithc-dma.h -@@ -0,0 +1,67 @@ -+#define PRD_SIZE_MASK 0xffffff -+#define PRD_FLAG_END 0x1000000 -+#define PRD_FLAG_SUCCESS 0x2000000 -+#define PRD_FLAG_ERROR 0x4000000 -+ -+struct ithc_phys_region_desc { -+ u64 addr; // physical addr/1024 -+ u32 size; // num bytes, PRD_FLAG_END marks last prd for data split over multiple prds -+ u32 unused; -+}; -+ -+#define DMA_RX_CODE_INPUT_REPORT 3 -+#define DMA_RX_CODE_FEATURE_REPORT 4 -+#define DMA_RX_CODE_REPORT_DESCRIPTOR 5 -+#define DMA_RX_CODE_RESET 7 -+ -+struct ithc_dma_rx_header { -+ u32 code; -+ u32 data_size; -+ u32 _unknown[14]; -+}; -+ -+#define DMA_TX_CODE_SET_FEATURE 3 -+#define DMA_TX_CODE_GET_FEATURE 4 -+#define DMA_TX_CODE_OUTPUT_REPORT 5 -+#define DMA_TX_CODE_GET_REPORT_DESCRIPTOR 7 -+ -+struct ithc_dma_tx_header { -+ u32 code; -+ u32 data_size; -+}; -+ -+struct ithc_dma_prd_buffer { -+ void *addr; -+ dma_addr_t dma_addr; -+ u32 size; -+ u32 num_pages; // per data buffer -+ enum dma_data_direction dir; -+}; -+ -+struct ithc_dma_data_buffer { -+ void *addr; -+ struct sg_table *sgt; -+ int active_idx; -+ u32 data_size; -+}; -+ -+struct ithc_dma_tx { -+ struct mutex mutex; -+ u32 max_size; -+ struct ithc_dma_prd_buffer prds; -+ struct ithc_dma_data_buffer buf; -+}; -+ -+struct ithc_dma_rx { -+ struct mutex mutex; -+ u32 num_received; -+ struct ithc_dma_prd_buffer prds; -+ struct ithc_dma_data_buffer bufs[NUM_RX_BUF]; -+}; -+ -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname); -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel); -+int ithc_dma_tx_init(struct ithc *ithc); -+int ithc_dma_rx(struct ithc *ithc, u8 channel); -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *cmddata); -+ -diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c -new file mode 100644 -index 0000000000000..09512b9cb4d31 ---- /dev/null -+++ b/drivers/hid/ithc/ithc-main.c -@@ -0,0 +1,534 @@ -+#include "ithc.h" -+ -+MODULE_DESCRIPTION("Intel Touch Host Controller driver"); -+MODULE_LICENSE("Dual BSD/GPL"); -+ -+// Lakefield -+#define PCI_DEVICE_ID_INTEL_THC_LKF_PORT1 0x98d0 -+#define PCI_DEVICE_ID_INTEL_THC_LKF_PORT2 0x98d1 -+// Tiger Lake -+#define PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT1 0xa0d0 -+#define PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT2 0xa0d1 -+#define PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT1 0x43d0 -+#define PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT2 0x43d1 -+// Alder Lake -+#define PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT1 0x7ad8 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT2 0x7ad9 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT1 0x51d0 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT2 0x51d1 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT1 0x54d0 -+#define PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT2 0x54d1 -+// Raptor Lake -+#define PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT1 0x7a58 -+#define PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2 0x7a59 -+// Meteor Lake -+#define PCI_DEVICE_ID_INTEL_THC_MTL_PORT1 0x7e48 -+#define PCI_DEVICE_ID_INTEL_THC_MTL_PORT2 0x7e4a -+ -+static const struct pci_device_id ithc_pci_tbl[] = { -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_LKF_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_LKF_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) }, -+ {} -+}; -+MODULE_DEVICE_TABLE(pci, ithc_pci_tbl); -+ -+// Module parameters -+ -+static bool ithc_use_polling = false; -+module_param_named(poll, ithc_use_polling, bool, 0); -+MODULE_PARM_DESC(poll, "Use polling instead of interrupts"); -+ -+static bool ithc_use_rx0 = false; -+module_param_named(rx0, ithc_use_rx0, bool, 0); -+MODULE_PARM_DESC(rx0, "Use DMA RX channel 0"); -+ -+static bool ithc_use_rx1 = true; -+module_param_named(rx1, ithc_use_rx1, bool, 0); -+MODULE_PARM_DESC(rx1, "Use DMA RX channel 1"); -+ -+static bool ithc_log_regs_enabled = false; -+module_param_named(logregs, ithc_log_regs_enabled, bool, 0); -+MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)"); -+ -+// Sysfs attributes -+ -+static bool ithc_is_config_valid(struct ithc *ithc) { -+ return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC; -+} -+ -+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ return sprintf(buf, "0x%04x", ithc->config.vendor_id); -+} -+static DEVICE_ATTR_RO(vendor); -+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ return sprintf(buf, "0x%04x", ithc->config.product_id); -+} -+static DEVICE_ATTR_RO(product); -+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ return sprintf(buf, "%u", ithc->config.revision); -+} -+static DEVICE_ATTR_RO(revision); -+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { -+ struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ u32 v = ithc->config.fw_version; -+ return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff); -+} -+static DEVICE_ATTR_RO(fw_version); -+ -+static const struct attribute_group *ithc_attribute_groups[] = { -+ &(const struct attribute_group){ -+ .name = DEVNAME, -+ .attrs = (struct attribute *[]){ -+ &dev_attr_vendor.attr, -+ &dev_attr_product.attr, -+ &dev_attr_revision.attr, -+ &dev_attr_fw_version.attr, -+ NULL -+ }, -+ }, -+ NULL -+}; -+ -+// HID setup -+ -+static int ithc_hid_start(struct hid_device *hdev) { return 0; } -+static void ithc_hid_stop(struct hid_device *hdev) { } -+static int ithc_hid_open(struct hid_device *hdev) { return 0; } -+static void ithc_hid_close(struct hid_device *hdev) { } -+ -+static int ithc_hid_parse(struct hid_device *hdev) { -+ struct ithc *ithc = hdev->driver_data; -+ u64 val = 0; -+ WRITE_ONCE(ithc->hid_parse_done, false); -+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val); -+ if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT; -+ return 0; -+} -+ -+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { -+ struct ithc *ithc = hdev->driver_data; -+ if (!buf || !len) return -EINVAL; -+ u32 code; -+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE; -+ else { -+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum); -+ return -EINVAL; -+ } -+ buf[0] = reportnum; -+ if (reqtype == HID_REQ_GET_REPORT) { -+ mutex_lock(&ithc->hid_get_feature_mutex); -+ ithc->hid_get_feature_buf = buf; -+ ithc->hid_get_feature_size = len; -+ mutex_unlock(&ithc->hid_get_feature_mutex); -+ int r = CHECK(ithc_dma_tx, ithc, code, 1, buf); -+ if (!r) { -+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -+ if (!r) r = -ETIMEDOUT; -+ else if (r < 0) r = -EINTR; -+ else r = 0; -+ } -+ mutex_lock(&ithc->hid_get_feature_mutex); -+ ithc->hid_get_feature_buf = NULL; -+ if (!r) r = ithc->hid_get_feature_size; -+ mutex_unlock(&ithc->hid_get_feature_mutex); -+ return r; -+ } -+ CHECK_RET(ithc_dma_tx, ithc, code, len, buf); -+ return 0; -+} -+ -+static struct hid_ll_driver ithc_ll_driver = { -+ .start = ithc_hid_start, -+ .stop = ithc_hid_stop, -+ .open = ithc_hid_open, -+ .close = ithc_hid_close, -+ .parse = ithc_hid_parse, -+ .raw_request = ithc_hid_raw_request, -+}; -+ -+static void ithc_hid_devres_release(struct device *dev, void *res) { -+ struct hid_device **hidm = res; -+ if (*hidm) hid_destroy_device(*hidm); -+} -+ -+static int ithc_hid_init(struct ithc *ithc) { -+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL); -+ if (!hidm) return -ENOMEM; -+ devres_add(&ithc->pci->dev, hidm); -+ struct hid_device *hid = hid_allocate_device(); -+ if (IS_ERR(hid)) return PTR_ERR(hid); -+ *hidm = hid; -+ -+ strscpy(hid->name, DEVFULLNAME, sizeof(hid->name)); -+ strscpy(hid->phys, ithc->phys, sizeof(hid->phys)); -+ hid->ll_driver = &ithc_ll_driver; -+ hid->bus = BUS_PCI; -+ hid->vendor = ithc->config.vendor_id; -+ hid->product = ithc->config.product_id; -+ hid->version = 0x100; -+ hid->dev.parent = &ithc->pci->dev; -+ hid->driver_data = ithc; -+ -+ ithc->hid = hid; -+ return 0; -+} -+ -+// Interrupts/polling -+ -+static void ithc_activity_timer_callback(struct timer_list *t) { -+ struct ithc *ithc = container_of(t, struct ithc, activity_timer); -+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+} -+ -+void ithc_set_active(struct ithc *ithc) { -+ // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens. -+ // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor). -+ // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions. -+ cpu_latency_qos_update_request(&ithc->activity_qos, 0); -+ mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000)); -+} -+ -+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) { -+ u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 -+ | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x); -+} -+ -+static void ithc_disable_interrupts(struct ithc *ithc) { -+ writel(0, &ithc->regs->error_control); -+ bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0); -+ bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); -+ bitsb(&ithc->regs->dma_rx[1].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); -+ bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0); -+} -+ -+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) { -+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status); -+} -+ -+static void ithc_clear_interrupts(struct ithc *ithc) { -+ writel(0xffffffff, &ithc->regs->error_flags); -+ writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status); -+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ ithc_clear_dma_rx_interrupts(ithc, 0); -+ ithc_clear_dma_rx_interrupts(ithc, 1); -+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status); -+} -+ -+static void ithc_process(struct ithc *ithc) { -+ ithc_log_regs(ithc); -+ -+ // read and clear error bits -+ u32 err = readl(&ithc->regs->error_flags); -+ if (err) { -+ if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err); -+ writel(err, &ithc->regs->error_flags); -+ } -+ -+ // process DMA rx -+ if (ithc_use_rx0) { -+ ithc_clear_dma_rx_interrupts(ithc, 0); -+ ithc_dma_rx(ithc, 0); -+ } -+ if (ithc_use_rx1) { -+ ithc_clear_dma_rx_interrupts(ithc, 1); -+ ithc_dma_rx(ithc, 1); -+ } -+ -+ ithc_log_regs(ithc); -+} -+ -+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { -+ struct ithc *ithc = arg; -+ pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n", -+ readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags), -+ readb(&ithc->regs->spi_cmd.control), readl(&ithc->regs->spi_cmd.status), -+ readb(&ithc->regs->dma_rx[0].control), readl(&ithc->regs->dma_rx[0].status), -+ readb(&ithc->regs->dma_rx[1].control), readl(&ithc->regs->dma_rx[1].status), -+ readb(&ithc->regs->dma_tx.control), readl(&ithc->regs->dma_tx.status)); -+ ithc_process(ithc); -+ return IRQ_HANDLED; -+} -+ -+static int ithc_poll_thread(void *arg) { -+ struct ithc *ithc = arg; -+ unsigned sleep = 100; -+ while (!kthread_should_stop()) { -+ u32 n = ithc->dma_rx[1].num_received; -+ ithc_process(ithc); -+ if (n != ithc->dma_rx[1].num_received) sleep = 20; -+ else sleep = min(200u, sleep + (sleep >> 4) + 1); -+ msleep_interruptible(sleep); -+ } -+ return 0; -+} -+ -+// Device initialization and shutdown -+ -+static void ithc_disable(struct ithc *ithc) { -+ bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE); -+ CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED); -+ bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -+ bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND, 0); -+ bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); -+ bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_ENABLE, 0); -+ bitsb(&ithc->regs->dma_rx[1].control, DMA_RX_CONTROL_ENABLE, 0); -+ ithc_disable_interrupts(ithc); -+ ithc_clear_interrupts(ithc); -+} -+ -+static int ithc_init_device(struct ithc *ithc) { -+ ithc_log_regs(ithc); -+ bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0; -+ ithc_disable(ithc); -+ CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY); -+ ithc_set_spi_config(ithc, 10, 0); -+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config -+ -+ if (was_enabled) if (msleep_interruptible(100)) return -EINTR; -+ bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0); -+ CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0); -+ for (int retries = 0; ; retries++) { -+ ithc_log_regs(ithc); -+ bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET); -+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break; -+ if (retries > 5) { -+ pci_err(ithc->pci, "too many retries, failed to reset device\n"); -+ return -ETIMEDOUT; -+ } -+ pci_err(ithc->pci, "invalid state, retrying reset\n"); -+ bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -+ if (msleep_interruptible(1000)) return -EINTR; -+ } -+ ithc_log_regs(ithc); -+ -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4); -+ -+ // read config -+ for (int retries = 0; ; retries++) { -+ ithc_log_regs(ithc); -+ memset(&ithc->config, 0, sizeof ithc->config); -+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config); -+ u32 *p = (void *)&ithc->config; -+ pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", -+ p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); -+ if (ithc_is_config_valid(ithc)) break; -+ if (retries > 10) { -+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id); -+ return -EIO; -+ } -+ pci_err(ithc->pci, "failed to read config, retrying\n"); -+ if (msleep_interruptible(100)) return -EINTR; -+ } -+ ithc_log_regs(ithc); -+ -+ CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config)); -+ CHECK_RET(ithc_set_device_enabled, ithc, true); -+ ithc_log_regs(ithc); -+ return 0; -+} -+ -+int ithc_reset(struct ithc *ithc) { -+ // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA -+ // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking? -+ pci_err(ithc->pci, "reset\n"); -+ CHECK(ithc_init_device, ithc); -+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ ithc_log_regs(ithc); -+ pci_dbg(ithc->pci, "reset completed\n"); -+ return 0; -+} -+ -+static void ithc_stop(void *res) { -+ struct ithc *ithc = res; -+ pci_dbg(ithc->pci, "stopping\n"); -+ ithc_log_regs(ithc); -+ if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread); -+ if (ithc->irq >= 0) disable_irq(ithc->irq); -+ CHECK(ithc_set_device_enabled, ithc, false); -+ ithc_disable(ithc); -+ del_timer_sync(&ithc->activity_timer); -+ cpu_latency_qos_remove_request(&ithc->activity_qos); -+ // clear dma config -+ for(unsigned i = 0; i < 2; i++) { -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0); -+ lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr); -+ writeb(0, &ithc->regs->dma_rx[i].num_bufs); -+ writeb(0, &ithc->regs->dma_rx[i].num_prds); -+ } -+ lo_hi_writeq(0, &ithc->regs->dma_tx.addr); -+ writeb(0, &ithc->regs->dma_tx.num_prds); -+ ithc_log_regs(ithc); -+ pci_dbg(ithc->pci, "stopped\n"); -+} -+ -+static void ithc_clear_drvdata(void *res) { -+ struct pci_dev *pci = res; -+ pci_set_drvdata(pci, NULL); -+} -+ -+static int ithc_start(struct pci_dev *pci) { -+ pci_dbg(pci, "starting\n"); -+ if (pci_get_drvdata(pci)) { -+ pci_err(pci, "device already initialized\n"); -+ return -EINVAL; -+ } -+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM; -+ -+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL); -+ if (!ithc) return -ENOMEM; -+ ithc->irq = -1; -+ ithc->pci = pci; -+ snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci)); -+ init_waitqueue_head(&ithc->wait_hid_parse); -+ init_waitqueue_head(&ithc->wait_hid_get_feature); -+ mutex_init(&ithc->hid_get_feature_mutex); -+ pci_set_drvdata(pci, ithc); -+ CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci); -+ if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL); -+ -+ CHECK_RET(pcim_enable_device, pci); -+ pci_set_master(pci); -+ CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs"); -+ CHECK_RET(dma_set_mask_and_coherent, &pci->dev, DMA_BIT_MASK(64)); -+ CHECK_RET(pci_set_power_state, pci, PCI_D0); -+ ithc->regs = pcim_iomap_table(pci)[0]; -+ -+ if (!ithc_use_polling) { -+ CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); -+ ithc->irq = CHECK(pci_irq_vector, pci, 0); -+ if (ithc->irq < 0) return ithc->irq; -+ } -+ -+ CHECK_RET(ithc_init_device, ithc); -+ CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups); -+ if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME); -+ if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME); -+ CHECK_RET(ithc_dma_tx_init, ithc); -+ -+ CHECK_RET(ithc_hid_init, ithc); -+ -+ cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0); -+ -+ // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed -+ CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc); -+ -+ if (ithc_use_polling) { -+ pci_info(pci, "using polling instead of irq\n"); -+ // use a thread instead of simple timer because we want to be able to sleep -+ ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll"); -+ if (IS_ERR(ithc->poll_thread)) { -+ int err = PTR_ERR(ithc->poll_thread); -+ ithc->poll_thread = NULL; -+ return err; -+ } -+ } else { -+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); -+ } -+ -+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ -+ // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA -+ CHECK_RET(hid_add_device, ithc->hid); -+ -+ CHECK(ithc_debug_init, ithc); -+ -+ pci_dbg(pci, "started\n"); -+ return 0; -+} -+ -+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) { -+ pci_dbg(pci, "device probe\n"); -+ return ithc_start(pci); -+} -+ -+static void ithc_remove(struct pci_dev *pci) { -+ pci_dbg(pci, "device remove\n"); -+ // all cleanup is handled by devres -+} -+ -+static int ithc_suspend(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm suspend\n"); -+ devres_release_group(dev, ithc_start); -+ return 0; -+} -+ -+static int ithc_resume(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm resume\n"); -+ return ithc_start(pci); -+} -+ -+static int ithc_freeze(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm freeze\n"); -+ devres_release_group(dev, ithc_start); -+ return 0; -+} -+ -+static int ithc_thaw(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm thaw\n"); -+ return ithc_start(pci); -+} -+ -+static int ithc_restore(struct device *dev) { -+ struct pci_dev *pci = to_pci_dev(dev); -+ pci_dbg(pci, "pm restore\n"); -+ return ithc_start(pci); -+} -+ -+static struct pci_driver ithc_driver = { -+ .name = DEVNAME, -+ .id_table = ithc_pci_tbl, -+ .probe = ithc_probe, -+ .remove = ithc_remove, -+ .driver.pm = &(const struct dev_pm_ops) { -+ .suspend = ithc_suspend, -+ .resume = ithc_resume, -+ .freeze = ithc_freeze, -+ .thaw = ithc_thaw, -+ .restore = ithc_restore, -+ }, -+ //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway -+}; -+ -+static int __init ithc_init(void) { -+ return pci_register_driver(&ithc_driver); -+} -+ -+static void __exit ithc_exit(void) { -+ pci_unregister_driver(&ithc_driver); -+} -+ -+module_init(ithc_init); -+module_exit(ithc_exit); -+ -diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c -new file mode 100644 -index 0000000000000..85d567b05761f ---- /dev/null -+++ b/drivers/hid/ithc/ithc-regs.c -@@ -0,0 +1,64 @@ -+#include "ithc.h" -+ -+#define reg_num(r) (0x1fff & (u16)(__force u64)(r)) -+ -+void bitsl(__iomem u32 *reg, u32 mask, u32 val) { -+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+ writel((readl(reg) & ~mask) | (val & mask), reg); -+} -+ -+void bitsb(__iomem u8 *reg, u8 mask, u8 val) { -+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+ writeb((readb(reg) & ~mask) | (val & mask), reg); -+} -+ -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) { -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+ u32 x; -+ if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+ return -ETIMEDOUT; -+ } -+ pci_dbg(ithc->pci, "done waiting\n"); -+ return 0; -+} -+ -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) { -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+ u8 x; -+ if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+ return -ETIMEDOUT; -+ } -+ pci_dbg(ithc->pci, "done waiting\n"); -+ return 0; -+} -+ -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) { -+ pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode); -+ if (mode == 3) mode = 2; -+ bitsl(&ithc->regs->spi_config, -+ SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff), -+ SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed)); -+ return 0; -+} -+ -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) { -+ pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset); -+ if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL; -+ CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ writeb(command, &ithc->regs->spi_cmd.code); -+ writew(size, &ithc->regs->spi_cmd.size); -+ writel(offset, &ithc->regs->spi_cmd.offset); -+ u32 *p = data, n = (size + 3) / 4; -+ for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]); -+ bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND); -+ CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO; -+ if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE; -+ for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]); -+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ return 0; -+} -+ -diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h -new file mode 100644 -index 0000000000000..1a96092ed7eed ---- /dev/null -+++ b/drivers/hid/ithc/ithc-regs.h -@@ -0,0 +1,186 @@ -+#define CONTROL_QUIESCE BIT(1) -+#define CONTROL_IS_QUIESCED BIT(2) -+#define CONTROL_NRESET BIT(3) -+#define CONTROL_READY BIT(29) -+ -+#define SPI_CONFIG_MODE(x) (((x) & 3) << 2) -+#define SPI_CONFIG_SPEED(x) (((x) & 7) << 4) -+#define SPI_CONFIG_UNKNOWN_18(x) (((x) & 3) << 18) -+#define SPI_CONFIG_SPEED2(x) (((x) & 0xf) << 20) // high bit = high speed mode? -+ -+#define ERROR_CONTROL_UNKNOWN_0 BIT(0) -+#define ERROR_CONTROL_DISABLE_DMA BIT(1) // clears DMA_RX_CONTROL_ENABLE when a DMA error occurs -+#define ERROR_CONTROL_UNKNOWN_2 BIT(2) -+#define ERROR_CONTROL_UNKNOWN_3 BIT(3) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_9 BIT(9) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_10 BIT(10) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_12 BIT(12) -+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_13 BIT(13) -+#define ERROR_CONTROL_UNKNOWN_16(x) (((x) & 0xff) << 16) // spi error code irq? -+#define ERROR_CONTROL_SET_DMA_STATUS BIT(29) // sets DMA_RX_STATUS_ERROR when a DMA error occurs -+ -+#define ERROR_STATUS_DMA BIT(28) -+#define ERROR_STATUS_SPI BIT(30) -+ -+#define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9) -+#define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10) -+#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message -+#define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13) -+#define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16) -+#define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17) -+#define ERROR_FLAG_SPI_INTRA_PACKET_TIMEOUT BIT(18) -+#define ERROR_FLAG_SPI_INVALID_RESPONSE BIT(19) -+#define ERROR_FLAG_SPI_HS_RX_TIMEOUT BIT(20) -+#define ERROR_FLAG_SPI_TOUCH_IC_INIT BIT(21) -+ -+#define SPI_CMD_CONTROL_SEND BIT(0) // cleared by device when sending is complete -+#define SPI_CMD_CONTROL_IRQ BIT(1) -+ -+#define SPI_CMD_CODE_READ 4 -+#define SPI_CMD_CODE_WRITE 6 -+ -+#define SPI_CMD_STATUS_DONE BIT(0) -+#define SPI_CMD_STATUS_ERROR BIT(1) -+#define SPI_CMD_STATUS_BUSY BIT(3) -+ -+#define DMA_TX_CONTROL_SEND BIT(0) // cleared by device when sending is complete -+#define DMA_TX_CONTROL_IRQ BIT(3) -+ -+#define DMA_TX_STATUS_DONE BIT(0) -+#define DMA_TX_STATUS_ERROR BIT(1) -+#define DMA_TX_STATUS_UNKNOWN_2 BIT(2) -+#define DMA_TX_STATUS_UNKNOWN_3 BIT(3) // busy? -+ -+#define DMA_RX_CONTROL_ENABLE BIT(0) -+#define DMA_RX_CONTROL_IRQ_UNKNOWN_1 BIT(1) // rx1 only? -+#define DMA_RX_CONTROL_IRQ_ERROR BIT(3) // rx1 only? -+#define DMA_RX_CONTROL_IRQ_UNKNOWN_4 BIT(4) // rx0 only? -+#define DMA_RX_CONTROL_IRQ_DATA BIT(5) -+ -+#define DMA_RX_CONTROL2_UNKNOWN_5 BIT(5) // rx0 only? -+#define DMA_RX_CONTROL2_RESET BIT(7) // resets ringbuffer indices -+ -+#define DMA_RX_WRAP_FLAG BIT(7) -+ -+#define DMA_RX_STATUS_ERROR BIT(3) -+#define DMA_RX_STATUS_UNKNOWN_4 BIT(4) // set in rx0 after using CONTROL_NRESET when it becomes possible to read config (can take >100ms) -+#define DMA_RX_STATUS_HAVE_DATA BIT(5) -+#define DMA_RX_STATUS_ENABLED BIT(8) -+ -+#define COUNTER_RESET BIT(31) -+ -+struct ithc_registers { -+ /* 0000 */ u32 _unknown_0000[1024]; -+ /* 1000 */ u32 _unknown_1000; -+ /* 1004 */ u32 _unknown_1004; -+ /* 1008 */ u32 control_bits; -+ /* 100c */ u32 _unknown_100c; -+ /* 1010 */ u32 spi_config; -+ /* 1014 */ u32 _unknown_1014[3]; -+ /* 1020 */ u32 error_control; -+ /* 1024 */ u32 error_status; // write to clear -+ /* 1028 */ u32 error_flags; // write to clear -+ /* 102c */ u32 _unknown_102c[5]; -+ struct { -+ /* 1040 */ u8 control; -+ /* 1041 */ u8 code; -+ /* 1042 */ u16 size; -+ /* 1044 */ u32 status; // write to clear -+ /* 1048 */ u32 offset; -+ /* 104c */ u32 data[16]; -+ /* 108c */ u32 _unknown_108c; -+ } spi_cmd; -+ struct { -+ /* 1090 */ u64 addr; // cannot be written with writeq(), must use lo_hi_writeq() -+ /* 1098 */ u8 control; -+ /* 1099 */ u8 _unknown_1099; -+ /* 109a */ u8 _unknown_109a; -+ /* 109b */ u8 num_prds; -+ /* 109c */ u32 status; // write to clear -+ } dma_tx; -+ /* 10a0 */ u32 _unknown_10a0[7]; -+ /* 10bc */ u32 state; // is 0xe0000402 (dev config val 0) after CONTROL_NRESET, 0xe0000461 after first touch, 0xe0000401 after DMA_RX_CODE_RESET -+ /* 10c0 */ u32 _unknown_10c0[8]; -+ /* 10e0 */ u32 _unknown_10e0_counters[3]; -+ /* 10ec */ u32 _unknown_10ec[5]; -+ struct { -+ /* 1100/1200 */ u64 addr; // cannot be written with writeq(), must use lo_hi_writeq() -+ /* 1108/1208 */ u8 num_bufs; -+ /* 1109/1209 */ u8 num_prds; -+ /* 110a/120a */ u16 _unknown_110a; -+ /* 110c/120c */ u8 control; -+ /* 110d/120d */ u8 head; -+ /* 110e/120e */ u8 tail; -+ /* 110f/120f */ u8 control2; -+ /* 1110/1210 */ u32 status; // write to clear -+ /* 1114/1214 */ u32 _unknown_1114; -+ /* 1118/1218 */ u64 _unknown_1118_guc_addr; -+ /* 1120/1220 */ u32 _unknown_1120_guc; -+ /* 1124/1224 */ u32 _unknown_1124_guc; -+ /* 1128/1228 */ u32 unknown_init_bits; // bit 2 = guc related, bit 3 = rx1 related, bit 4 = guc related -+ /* 112c/122c */ u32 _unknown_112c; -+ /* 1130/1230 */ u64 _unknown_1130_guc_addr; -+ /* 1138/1238 */ u32 _unknown_1138_guc; -+ /* 113c/123c */ u32 _unknown_113c; -+ /* 1140/1240 */ u32 _unknown_1140_guc; -+ /* 1144/1244 */ u32 _unknown_1144[23]; -+ /* 11a0/12a0 */ u32 _unknown_11a0_counters[6]; -+ /* 11b8/12b8 */ u32 _unknown_11b8[18]; -+ } dma_rx[2]; -+}; -+static_assert(sizeof(struct ithc_registers) == 0x1300); -+ -+#define DEVCFG_DMA_RX_SIZE(x) ((((x) & 0x3fff) + 1) << 6) -+#define DEVCFG_DMA_TX_SIZE(x) (((((x) >> 14) & 0x3ff) + 1) << 6) -+ -+#define DEVCFG_TOUCH_MASK 0x3f -+#define DEVCFG_TOUCH_ENABLE BIT(0) -+#define DEVCFG_TOUCH_UNKNOWN_1 BIT(1) -+#define DEVCFG_TOUCH_UNKNOWN_2 BIT(2) -+#define DEVCFG_TOUCH_UNKNOWN_3 BIT(3) -+#define DEVCFG_TOUCH_UNKNOWN_4 BIT(4) -+#define DEVCFG_TOUCH_UNKNOWN_5 BIT(5) -+#define DEVCFG_TOUCH_UNKNOWN_6 BIT(6) -+ -+#define DEVCFG_DEVICE_ID_TIC 0x43495424 // "$TIC" -+ -+#define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode? -+#define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3) -+#define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f) -+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) -+#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7) -+#define DEVCFG_SPI_UNKNOWN_25 BIT(25) -+#define DEVCFG_SPI_UNKNOWN_26 BIT(26) -+#define DEVCFG_SPI_UNKNOWN_27 BIT(27) -+#define DEVCFG_SPI_DELAY (((x) >> 28) & 7) -+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) -+ -+struct ithc_device_config { -+ u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET) -+ u32 _unknown_04; // 04 = 0x00000000 -+ u32 dma_buf_sizes; // 08 = 0x000a00ff -+ u32 touch_cfg; // 0c = 0x0000001c -+ u32 _unknown_10; // 10 = 0x0000001c -+ u32 device_id; // 14 = 0x43495424 = "$TIC" -+ u32 spi_config; // 18 = 0xfda00a2e -+ u16 vendor_id; // 1c = 0x045e = Microsoft Corp. -+ u16 product_id; // 1e = 0x0c1a -+ u32 revision; // 20 = 0x00000001 -+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 -+ u32 _unknown_28; // 28 = 0x00000000 -+ u32 fw_mode; // 2c = 0x00000000 -+ u32 _unknown_30; // 30 = 0x00000000 -+ u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?) -+ u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET) -+ u32 _unknown_3c; // 3c = 0x00000002 -+}; -+ -+void bitsl(__iomem u32 *reg, u32 mask, u32 val); -+void bitsb(__iomem u8 *reg, u8 mask, u8 val); -+#define bitsl_set(reg, x) bitsl(reg, x, x) -+#define bitsb_set(reg, x) bitsb(reg, x, x) -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val); -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val); -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode); -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data); -+ -diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h -new file mode 100644 -index 0000000000000..6a9b0d480bc15 ---- /dev/null -+++ b/drivers/hid/ithc/ithc.h -@@ -0,0 +1,60 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DEVNAME "ithc" -+#define DEVFULLNAME "Intel Touch Host Controller" -+ -+#undef pr_fmt -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; }) -+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0) -+ -+#define NUM_RX_BUF 16 -+ -+struct ithc; -+ -+#include "ithc-regs.h" -+#include "ithc-dma.h" -+ -+struct ithc { -+ char phys[32]; -+ struct pci_dev *pci; -+ int irq; -+ struct task_struct *poll_thread; -+ struct pm_qos_request activity_qos; -+ struct timer_list activity_timer; -+ -+ struct hid_device *hid; -+ bool hid_parse_done; -+ wait_queue_head_t wait_hid_parse; -+ wait_queue_head_t wait_hid_get_feature; -+ struct mutex hid_get_feature_mutex; -+ void *hid_get_feature_buf; -+ size_t hid_get_feature_size; -+ -+ struct ithc_registers __iomem *regs; -+ struct ithc_registers *prev_regs; // for debugging -+ struct ithc_device_config config; -+ struct ithc_dma_rx dma_rx[2]; -+ struct ithc_dma_tx dma_tx; -+}; -+ -+int ithc_reset(struct ithc *ithc); -+void ithc_set_active(struct ithc *ithc); -+int ithc_debug_init(struct ithc *ithc); -+void ithc_log_regs(struct ithc *ithc); -+ --- -2.42.0 - -From 9f8d2a0f4012644f56ed8dfd322e575b57e1c208 Mon Sep 17 00:00:00 2001 -From: quo -Date: Mon, 23 Oct 2023 10:15:29 +0200 -Subject: [PATCH] Update ITHC from module repo - -Changes: - - Added some comments and fixed a few checkpatch warnings - - Improved CPU latency QoS handling - - Retry reading the report descriptor on error / timeout - -Based on https://github.com/quo/ithc-linux/commit/0b8b45d9775e756d6bd3a699bfaf9f5bd7b9b10b - -Signed-off-by: Dorian Stoll -Patchset: ithc ---- - drivers/hid/ithc/ithc-debug.c | 94 +++++--- - drivers/hid/ithc/ithc-dma.c | 231 +++++++++++++----- - drivers/hid/ithc/ithc-dma.h | 4 +- - drivers/hid/ithc/ithc-main.c | 430 ++++++++++++++++++++++++---------- - drivers/hid/ithc/ithc-regs.c | 68 ++++-- - drivers/hid/ithc/ithc-regs.h | 19 +- - drivers/hid/ithc/ithc.h | 13 +- - 7 files changed, 623 insertions(+), 236 deletions(-) - -diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c -index 57bf125c45bd5..1f1f1e33f2e5a 100644 ---- a/drivers/hid/ithc/ithc-debug.c -+++ b/drivers/hid/ithc/ithc-debug.c -@@ -1,10 +1,14 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - --void ithc_log_regs(struct ithc *ithc) { -- if (!ithc->prev_regs) return; -- u32 __iomem *cur = (__iomem void*)ithc->regs; -- u32 *prev = (void*)ithc->prev_regs; -- for (int i = 1024; i < sizeof *ithc->regs / 4; i++) { -+void ithc_log_regs(struct ithc *ithc) -+{ -+ if (!ithc->prev_regs) -+ return; -+ u32 __iomem *cur = (__iomem void *)ithc->regs; -+ u32 *prev = (void *)ithc->prev_regs; -+ for (int i = 1024; i < sizeof(*ithc->regs) / 4; i++) { - u32 x = readl(cur + i); - if (x != prev[i]) { - pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x); -@@ -13,55 +17,79 @@ void ithc_log_regs(struct ithc *ithc) { - } - } - --static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) { -+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, -+ loff_t *offset) -+{ -+ // Debug commands consist of a single letter followed by a list of numbers (decimal or -+ // hexadecimal, space-separated). - struct ithc *ithc = file_inode(f)->i_private; - char cmd[256]; -- if (!ithc || !ithc->pci) return -ENODEV; -- if (!len) return -EINVAL; -- if (len >= sizeof cmd) return -EINVAL; -- if (copy_from_user(cmd, buf, len)) return -EFAULT; -+ if (!ithc || !ithc->pci) -+ return -ENODEV; -+ if (!len) -+ return -EINVAL; -+ if (len >= sizeof(cmd)) -+ return -EINVAL; -+ if (copy_from_user(cmd, buf, len)) -+ return -EFAULT; - cmd[len] = 0; -- if (cmd[len-1] == '\n') cmd[len-1] = 0; -+ if (cmd[len-1] == '\n') -+ cmd[len-1] = 0; - pci_info(ithc->pci, "debug command: %s\n", cmd); -+ -+ // Parse the list of arguments into a u32 array. - u32 n = 0; - const char *s = cmd + 1; - u32 a[32]; - while (*s && *s != '\n') { -- if (n >= ARRAY_SIZE(a)) return -EINVAL; -- if (*s++ != ' ') return -EINVAL; -+ if (n >= ARRAY_SIZE(a)) -+ return -EINVAL; -+ if (*s++ != ' ') -+ return -EINVAL; - char *e; - a[n++] = simple_strtoul(s, &e, 0); -- if (e == s) return -EINVAL; -+ if (e == s) -+ return -EINVAL; - s = e; - } - ithc_log_regs(ithc); -- switch(cmd[0]) { -+ -+ // Execute the command. -+ switch (cmd[0]) { - case 'x': // reset - ithc_reset(ithc); - break; - case 'w': // write register: offset mask value -- if (n != 3 || (a[0] & 3)) return -EINVAL; -- pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]); -+ if (n != 3 || (a[0] & 3)) -+ return -EINVAL; -+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", -+ a[0], a[2], a[1]); - bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]); - break; - case 'r': // read register: offset -- if (n != 1 || (a[0] & 3)) return -EINVAL; -- pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); -+ if (n != 1 || (a[0] & 3)) -+ return -EINVAL; -+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], -+ readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); - break; - case 's': // spi command: cmd offset len data... - // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // set touch cfg: s 6 12 4 XX -- if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL; -+ if (n < 3 || a[2] > (n - 3) * 4) -+ return -EINVAL; - pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]); - if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3)) -- for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); -+ for (u32 i = 0; i < (a[2] + 3) / 4; i++) -+ pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); - break; - case 'd': // dma command: cmd len data... - // get report descriptor: d 7 8 0 0 - // enable multitouch: d 3 2 0x0105 -- if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL; -+ if (n < 2 || a[1] > (n - 2) * 4) -+ return -EINVAL; - pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]); -- if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n"); -+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) -+ pci_err(ithc->pci, "dma tx failed\n"); - break; - default: - return -EINVAL; -@@ -75,21 +103,27 @@ static const struct file_operations ithc_debugfops_cmd = { - .write = ithc_debugfs_cmd_write, - }; - --static void ithc_debugfs_devres_release(struct device *dev, void *res) { -+static void ithc_debugfs_devres_release(struct device *dev, void *res) -+{ - struct dentry **dbgm = res; -- if (*dbgm) debugfs_remove_recursive(*dbgm); -+ if (*dbgm) -+ debugfs_remove_recursive(*dbgm); - } - --int ithc_debug_init(struct ithc *ithc) { -- struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL); -- if (!dbgm) return -ENOMEM; -+int ithc_debug_init(struct ithc *ithc) -+{ -+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof(*dbgm), GFP_KERNEL); -+ if (!dbgm) -+ return -ENOMEM; - devres_add(&ithc->pci->dev, dbgm); - struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL); -- if (IS_ERR(dbg)) return PTR_ERR(dbg); -+ if (IS_ERR(dbg)) -+ return PTR_ERR(dbg); - *dbgm = dbg; - - struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd); -- if (IS_ERR(cmd)) return PTR_ERR(cmd); -+ if (IS_ERR(cmd)) -+ return PTR_ERR(cmd); - - return 0; - } -diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c -index 7e89b3496918d..ffb8689b8a780 100644 ---- a/drivers/hid/ithc/ithc-dma.c -+++ b/drivers/hid/ithc/ithc-dma.c -@@ -1,59 +1,91 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - --static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) { -+// The THC uses tables of PRDs (physical region descriptors) to describe the TX and RX data buffers. -+// Each PRD contains the DMA address and size of a block of DMA memory, and some status flags. -+// This allows each data buffer to consist of multiple non-contiguous blocks of memory. -+ -+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, -+ unsigned int num_buffers, unsigned int num_pages, enum dma_data_direction dir) -+{ - p->num_pages = num_pages; - p->dir = dir; -+ // We allocate enough space to have one PRD per data buffer page, however if the data -+ // buffer pages happen to be contiguous, we can describe the buffer using fewer PRDs, so -+ // some will remain unused (which is fine). - p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE); - p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL); -- if (!p->addr) return -ENOMEM; -- if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT; -+ if (!p->addr) -+ return -ENOMEM; -+ if (p->dma_addr & (PAGE_SIZE - 1)) -+ return -EFAULT; - return 0; - } - -+// Devres managed sg_table wrapper. - struct ithc_sg_table { - void *addr; - struct sg_table sgt; - enum dma_data_direction dir; - }; --static void ithc_dma_sgtable_free(struct sg_table *sgt) { -+static void ithc_dma_sgtable_free(struct sg_table *sgt) -+{ - struct scatterlist *sg; - int i; - for_each_sgtable_sg(sgt, sg, i) { - struct page *p = sg_page(sg); -- if (p) __free_page(p); -+ if (p) -+ __free_page(p); - } - sg_free_table(sgt); - } --static void ithc_dma_data_devres_release(struct device *dev, void *res) { -+static void ithc_dma_data_devres_release(struct device *dev, void *res) -+{ - struct ithc_sg_table *sgt = res; -- if (sgt->addr) vunmap(sgt->addr); -+ if (sgt->addr) -+ vunmap(sgt->addr); - dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0); - ithc_dma_sgtable_free(&sgt->sgt); - } - --static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) { -- // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional). -- // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now). -+static int ithc_dma_data_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, -+ struct ithc_dma_data_buffer *b) -+{ -+ // We don't use dma_alloc_coherent() for data buffers, because they don't have to be -+ // coherent (they are unidirectional) or contiguous (we can use one PRD per page). -+ // We could use dma_alloc_noncontiguous(), however this still always allocates a single -+ // DMA mapped segment, which is more restrictive than what we need. -+ // Instead we use an sg_table of individually allocated pages. - struct page *pages[16]; -- if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL; -+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) -+ return -EINVAL; - b->active_idx = -1; -- struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL); -- if (!sgt) return -ENOMEM; -+ struct ithc_sg_table *sgt = devres_alloc( -+ ithc_dma_data_devres_release, sizeof(*sgt), GFP_KERNEL); -+ if (!sgt) -+ return -ENOMEM; - sgt->dir = prds->dir; -+ - if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) { - struct scatterlist *sg; - int i; - bool ok = true; - for_each_sgtable_sg(&sgt->sgt, sg, i) { -- struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA -- if (!p) { ok = false; break; } -+ // NOTE: don't need __GFP_DMA for PCI DMA -+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); -+ if (!p) { -+ ok = false; -+ break; -+ } - sg_set_page(sg, p, PAGE_SIZE, 0); - } - if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) { - devres_add(&ithc->pci->dev, sgt); - b->sgt = &sgt->sgt; - b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL); -- if (!b->addr) return -ENOMEM; -+ if (!b->addr) -+ return -ENOMEM; - return 0; - } - ithc_dma_sgtable_free(&sgt->sgt); -@@ -62,17 +94,29 @@ static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *pr - return -ENOMEM; - } - --static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, -+ struct ithc_dma_data_buffer *b, unsigned int idx) -+{ -+ // Give a buffer to the THC. - struct ithc_phys_region_desc *prd = prds->addr; - prd += idx * prds->num_pages; -- if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; } -+ if (b->active_idx >= 0) { -+ pci_err(ithc->pci, "buffer already active\n"); -+ return -EINVAL; -+ } - b->active_idx = idx; - if (prds->dir == DMA_TO_DEVICE) { -- if (b->data_size > PAGE_SIZE) return -EINVAL; -+ // TX buffer: Caller should have already filled the data buffer, so just fill -+ // the PRD and flush. -+ // (TODO: Support multi-page TX buffers. So far no device seems to use or need -+ // these though.) -+ if (b->data_size > PAGE_SIZE) -+ return -EINVAL; - prd->addr = sg_dma_address(b->sgt->sgl) >> 10; - prd->size = b->data_size | PRD_FLAG_END; - flush_kernel_vmap_range(b->addr, b->data_size); - } else if (prds->dir == DMA_FROM_DEVICE) { -+ // RX buffer: Reset PRDs. - struct scatterlist *sg; - int i; - for_each_sgtable_dma_sg(b->sgt, sg, i) { -@@ -87,21 +131,34 @@ static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffe - return 0; - } - --static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, -+ struct ithc_dma_data_buffer *b, unsigned int idx) -+{ -+ // Take a buffer from the THC. - struct ithc_phys_region_desc *prd = prds->addr; - prd += idx * prds->num_pages; -- if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; } -+ // This is purely a sanity check. We don't strictly need the idx parameter for this -+ // function, because it should always be the same as active_idx, unless we have a bug. -+ if (b->active_idx != idx) { -+ pci_err(ithc->pci, "wrong buffer index\n"); -+ return -EINVAL; -+ } - b->active_idx = -1; - if (prds->dir == DMA_FROM_DEVICE) { -+ // RX buffer: Calculate actual received data size from PRDs. - dma_rmb(); // for the prds - b->data_size = 0; - struct scatterlist *sg; - int i; - for_each_sgtable_dma_sg(b->sgt, sg, i) { -- unsigned size = prd->size; -+ unsigned int size = prd->size; - b->data_size += size & PRD_SIZE_MASK; -- if (size & PRD_FLAG_END) break; -- if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; } -+ if (size & PRD_FLAG_END) -+ break; -+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { -+ pci_err(ithc->pci, "truncated prd\n"); -+ break; -+ } - prd++; - } - invalidate_kernel_vmap_range(b->addr, b->data_size); -@@ -110,93 +167,139 @@ static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffe - return 0; - } - --int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) { -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel) -+{ - struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; - mutex_init(&rx->mutex); -+ -+ // Allocate buffers. - u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes); -- unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -- pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages); -+ unsigned int num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", -+ NUM_RX_BUF, buf_size, num_pages); - CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE); -- for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ for (unsigned int i = 0; i < NUM_RX_BUF; i++) - CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]); -+ -+ // Init registers. - writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2); - lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr); - writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs); - writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds); - u8 head = readb(&ithc->regs->dma_rx[channel].head); -- if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; } -- for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ if (head) { -+ pci_err(ithc->pci, "head is nonzero (%u)\n", head); -+ return -EIO; -+ } -+ -+ // Init buffers. -+ for (unsigned int i = 0; i < NUM_RX_BUF; i++) - CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i); -+ - writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail); - return 0; - } --void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) { -- bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -- CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); -+ -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) -+{ -+ bitsb_set(&ithc->regs->dma_rx[channel].control, -+ DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[channel].status, -+ DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); - } - --int ithc_dma_tx_init(struct ithc *ithc) { -+int ithc_dma_tx_init(struct ithc *ithc) -+{ - struct ithc_dma_tx *tx = &ithc->dma_tx; - mutex_init(&tx->mutex); -+ -+ // Allocate buffers. - tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes); -- unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -- pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages); -+ unsigned int num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", -+ tx->max_size, num_pages); - CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE); - CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf); -+ -+ // Init registers. - lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr); - writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds); -+ -+ // Init buffers. - CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); - return 0; - } - --static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) { -+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, -+ u8 channel, u8 buf) -+{ - if (buf >= NUM_RX_BUF) { - pci_err(ithc->pci, "invalid dma ringbuffer index\n"); - return -EINVAL; - } -- ithc_set_active(ithc); - u32 len = data->data_size; - struct ithc_dma_rx_header *hdr = data->addr; - u8 *hiddata = (void *)(hdr + 1); -- if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) { -+ if (len >= sizeof(*hdr) && hdr->code == DMA_RX_CODE_RESET) { -+ // The THC sends a reset request when we need to reinitialize the device. -+ // This usually only happens if we send an invalid command or put the device -+ // in a bad state. - CHECK(ithc_reset, ithc); -- } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) { -+ } else if (len < sizeof(*hdr) || len != sizeof(*hdr) + hdr->data_size) { - if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -- // When the CPU enters a low power state during DMA, we can get truncated messages. -- // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes. -+ // When the CPU enters a low power state during DMA, we can get truncated -+ // messages. For Surface devices, this will typically be a single touch -+ // report that is only 1 byte, or a multitouch report that is 257 bytes. - // See also ithc_set_active(). - } else { -- pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size); -- print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", -+ channel, buf, len, hdr->code, hdr->data_size); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, -+ hdr, min(len, 0x400u), 0); - } - } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) { -+ // Response to a 'get report descriptor' request. -+ // The actual descriptor is preceded by 8 nul bytes. - CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8); - WRITE_ONCE(ithc->hid_parse_done, true); - wake_up(&ithc->wait_hid_parse); - } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ // Standard HID input report containing touch data. - CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1); - } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) { -+ // Response to a 'get feature' request. - bool done = false; - mutex_lock(&ithc->hid_get_feature_mutex); - if (ithc->hid_get_feature_buf) { -- if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size; -+ if (hdr->data_size < ithc->hid_get_feature_size) -+ ithc->hid_get_feature_size = hdr->data_size; - memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size); - ithc->hid_get_feature_buf = NULL; - done = true; - } - mutex_unlock(&ithc->hid_get_feature_mutex); -- if (done) wake_up(&ithc->wait_hid_get_feature); -- else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1); -+ if (done) { -+ wake_up(&ithc->wait_hid_get_feature); -+ } else { -+ // Received data without a matching request, or the request already -+ // timed out. (XXX What's the correct thing to do here?) -+ CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, -+ hiddata, hdr->data_size, 1); -+ } - } else { -- pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code); -- print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", -+ channel, buf, len, hdr->code); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, -+ hdr, min(len, 0x400u), 0); - } - return 0; - } - --static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { -+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) -+{ -+ // Process all filled RX buffers from the ringbuffer. - struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -- unsigned n = rx->num_received; -+ unsigned int n = rx->num_received; - u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head); - while (1) { - u8 tail = n % NUM_RX_BUF; -@@ -204,7 +307,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { - writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail); - // ringbuffer is full if tail_wrap == head_wrap - // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG -- if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0; -+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) -+ return 0; - - // take the buffer that the device just filled - struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF]; -@@ -218,7 +322,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { - CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail); - } - } --int ithc_dma_rx(struct ithc *ithc, u8 channel) { -+int ithc_dma_rx(struct ithc *ithc, u8 channel) -+{ - struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; - mutex_lock(&rx->mutex); - int ret = ithc_dma_rx_unlocked(ithc, channel); -@@ -226,14 +331,21 @@ int ithc_dma_rx(struct ithc *ithc, u8 channel) { - return ret; - } - --static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) -+{ -+ ithc_set_active(ithc, 100 * USEC_PER_MSEC); -+ -+ // Send a single TX buffer to the THC. - pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize); - struct ithc_dma_tx_header *hdr; -+ // Data must be padded to next 4-byte boundary. - u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0; -- unsigned fullsize = sizeof *hdr + datasize + padding; -- if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL; -+ unsigned int fullsize = sizeof(*hdr) + datasize + padding; -+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) -+ return -EINVAL; - CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); - -+ // Fill the TX buffer with header and data. - ithc->dma_tx.buf.data_size = fullsize; - hdr = ithc->dma_tx.buf.addr; - hdr->code = cmdcode; -@@ -241,15 +353,18 @@ static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, vo - u8 *dest = (void *)(hdr + 1); - memcpy(dest, data, datasize); - dest += datasize; -- for (u8 p = 0; p < padding; p++) *dest++ = 0; -+ for (u8 p = 0; p < padding; p++) -+ *dest++ = 0; - CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); - -+ // Let the THC process the buffer. - bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND); - CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); - writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status); - return 0; - } --int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) -+{ - mutex_lock(&ithc->dma_tx.mutex); - int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data); - mutex_unlock(&ithc->dma_tx.mutex); -diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h -index d9f2c19a13f3a..93652e4476bf8 100644 ---- a/drivers/hid/ithc/ithc-dma.h -+++ b/drivers/hid/ithc/ithc-dma.h -@@ -1,3 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -+ - #define PRD_SIZE_MASK 0xffffff - #define PRD_FLAG_END 0x1000000 - #define PRD_FLAG_SUCCESS 0x2000000 -@@ -59,7 +61,7 @@ struct ithc_dma_rx { - struct ithc_dma_data_buffer bufs[NUM_RX_BUF]; - }; - --int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname); -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel); - void ithc_dma_rx_enable(struct ithc *ithc, u8 channel); - int ithc_dma_tx_init(struct ithc *ithc); - int ithc_dma_rx(struct ithc *ithc, u8 channel); -diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c -index 09512b9cb4d31..87ed4aa70fda0 100644 ---- a/drivers/hid/ithc/ithc-main.c -+++ b/drivers/hid/ithc/ithc-main.c -@@ -1,3 +1,5 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - - MODULE_DESCRIPTION("Intel Touch Host Controller driver"); -@@ -42,6 +44,9 @@ static const struct pci_device_id ithc_pci_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) }, -+ // XXX So far the THC seems to be the only Intel PCI device with PCI_CLASS_INPUT_PEN, -+ // so instead of the device list we could just do: -+ // { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .class = PCI_CLASS_INPUT_PEN, .class_mask = ~0, }, - {} - }; - MODULE_DEVICE_TABLE(pci, ithc_pci_tbl); -@@ -52,6 +57,7 @@ static bool ithc_use_polling = false; - module_param_named(poll, ithc_use_polling, bool, 0); - MODULE_PARM_DESC(poll, "Use polling instead of interrupts"); - -+// Since all known devices seem to use only channel 1, by default we disable channel 0. - static bool ithc_use_rx0 = false; - module_param_named(rx0, ithc_use_rx0, bool, 0); - MODULE_PARM_DESC(rx0, "Use DMA RX channel 0"); -@@ -60,37 +66,56 @@ static bool ithc_use_rx1 = true; - module_param_named(rx1, ithc_use_rx1, bool, 0); - MODULE_PARM_DESC(rx1, "Use DMA RX channel 1"); - -+// Values below 250 seem to work well on the SP7+. If this is set too high, you may observe cursor stuttering. -+static int ithc_dma_latency_us = 200; -+module_param_named(dma_latency_us, ithc_dma_latency_us, int, 0); -+MODULE_PARM_DESC(dma_latency_us, "Determines the CPU latency QoS value for DMA transfers (in microseconds), -1 to disable latency QoS"); -+ -+// Values above 1700 seem to work well on the SP7+. If this is set too low, you may observe cursor stuttering. -+static unsigned int ithc_dma_early_us = 2000; -+module_param_named(dma_early_us, ithc_dma_early_us, uint, 0); -+MODULE_PARM_DESC(dma_early_us, "Determines how early the CPU latency QoS value is applied before the next expected IRQ (in microseconds)"); -+ - static bool ithc_log_regs_enabled = false; - module_param_named(logregs, ithc_log_regs_enabled, bool, 0); - MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)"); - - // Sysfs attributes - --static bool ithc_is_config_valid(struct ithc *ithc) { -+static bool ithc_is_config_valid(struct ithc *ithc) -+{ - return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC; - } - --static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - return sprintf(buf, "0x%04x", ithc->config.vendor_id); - } - static DEVICE_ATTR_RO(vendor); --static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - return sprintf(buf, "0x%04x", ithc->config.product_id); - } - static DEVICE_ATTR_RO(product); --static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - return sprintf(buf, "%u", ithc->config.revision); - } - static DEVICE_ATTR_RO(revision); --static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - u32 v = ithc->config.fw_version; - return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff); - } -@@ -117,45 +142,75 @@ static void ithc_hid_stop(struct hid_device *hdev) { } - static int ithc_hid_open(struct hid_device *hdev) { return 0; } - static void ithc_hid_close(struct hid_device *hdev) { } - --static int ithc_hid_parse(struct hid_device *hdev) { -+static int ithc_hid_parse(struct hid_device *hdev) -+{ - struct ithc *ithc = hdev->driver_data; - u64 val = 0; - WRITE_ONCE(ithc->hid_parse_done, false); -- CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val); -- if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT; -- return 0; -+ for (int retries = 0; ; retries++) { -+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof(val), &val); -+ if (wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), -+ msecs_to_jiffies(200))) -+ return 0; -+ if (retries > 5) { -+ pci_err(ithc->pci, "failed to read report descriptor\n"); -+ return -ETIMEDOUT; -+ } -+ pci_warn(ithc->pci, "failed to read report descriptor, retrying\n"); -+ } - } - --static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { -+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, -+ size_t len, unsigned char rtype, int reqtype) -+{ - struct ithc *ithc = hdev->driver_data; -- if (!buf || !len) return -EINVAL; -+ if (!buf || !len) -+ return -EINVAL; - u32 code; -- if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT; -- else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE; -- else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE; -- else { -- pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum); -+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) { -+ code = DMA_TX_CODE_OUTPUT_REPORT; -+ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) { -+ code = DMA_TX_CODE_SET_FEATURE; -+ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) { -+ code = DMA_TX_CODE_GET_FEATURE; -+ } else { -+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", -+ rtype, reqtype, reportnum); - return -EINVAL; - } - buf[0] = reportnum; -+ - if (reqtype == HID_REQ_GET_REPORT) { -+ // Prepare for response. - mutex_lock(&ithc->hid_get_feature_mutex); - ithc->hid_get_feature_buf = buf; - ithc->hid_get_feature_size = len; - mutex_unlock(&ithc->hid_get_feature_mutex); -+ -+ // Transmit 'get feature' request. - int r = CHECK(ithc_dma_tx, ithc, code, 1, buf); - if (!r) { -- r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -- if (!r) r = -ETIMEDOUT; -- else if (r < 0) r = -EINTR; -- else r = 0; -+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, -+ !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -+ if (!r) -+ r = -ETIMEDOUT; -+ else if (r < 0) -+ r = -EINTR; -+ else -+ r = 0; - } -+ -+ // If everything went ok, the buffer has been filled with the response data. -+ // Return the response size. - mutex_lock(&ithc->hid_get_feature_mutex); - ithc->hid_get_feature_buf = NULL; -- if (!r) r = ithc->hid_get_feature_size; -+ if (!r) -+ r = ithc->hid_get_feature_size; - mutex_unlock(&ithc->hid_get_feature_mutex); - return r; - } -+ -+ // 'Set feature', or 'output report'. These don't have a response. - CHECK_RET(ithc_dma_tx, ithc, code, len, buf); - return 0; - } -@@ -169,17 +224,22 @@ static struct hid_ll_driver ithc_ll_driver = { - .raw_request = ithc_hid_raw_request, - }; - --static void ithc_hid_devres_release(struct device *dev, void *res) { -+static void ithc_hid_devres_release(struct device *dev, void *res) -+{ - struct hid_device **hidm = res; -- if (*hidm) hid_destroy_device(*hidm); -+ if (*hidm) -+ hid_destroy_device(*hidm); - } - --static int ithc_hid_init(struct ithc *ithc) { -- struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL); -- if (!hidm) return -ENOMEM; -+static int ithc_hid_init(struct ithc *ithc) -+{ -+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof(*hidm), GFP_KERNEL); -+ if (!hidm) -+ return -ENOMEM; - devres_add(&ithc->pci->dev, hidm); - struct hid_device *hid = hid_allocate_device(); -- if (IS_ERR(hid)) return PTR_ERR(hid); -+ if (IS_ERR(hid)) -+ return PTR_ERR(hid); - *hidm = hid; - - strscpy(hid->name, DEVFULLNAME, sizeof(hid->name)); -@@ -198,27 +258,45 @@ static int ithc_hid_init(struct ithc *ithc) { - - // Interrupts/polling - --static void ithc_activity_timer_callback(struct timer_list *t) { -- struct ithc *ithc = container_of(t, struct ithc, activity_timer); -- cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+static enum hrtimer_restart ithc_activity_start_timer_callback(struct hrtimer *t) -+{ -+ struct ithc *ithc = container_of(t, struct ithc, activity_start_timer); -+ ithc_set_active(ithc, ithc_dma_early_us * 2 + USEC_PER_MSEC); -+ return HRTIMER_NORESTART; - } - --void ithc_set_active(struct ithc *ithc) { -- // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -- // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens. -- // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor). -- // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions. -- cpu_latency_qos_update_request(&ithc->activity_qos, 0); -- mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000)); --} -- --static int ithc_set_device_enabled(struct ithc *ithc, bool enable) { -- u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 -- | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -- return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x); -+static enum hrtimer_restart ithc_activity_end_timer_callback(struct hrtimer *t) -+{ -+ struct ithc *ithc = container_of(t, struct ithc, activity_end_timer); -+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ return HRTIMER_NORESTART; - } - --static void ithc_disable_interrupts(struct ithc *ithc) { -+void ithc_set_active(struct ithc *ithc, unsigned int duration_us) -+{ -+ if (ithc_dma_latency_us < 0) -+ return; -+ // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_RX_TIMEOUT will be -+ // set when this happens. The amount of truncated messages can become very high, resulting -+ // in user-visible effects (laggy/stuttering cursor). To avoid this, we use a CPU latency -+ // QoS request to prevent the CPU from entering low power states during touch interactions. -+ cpu_latency_qos_update_request(&ithc->activity_qos, ithc_dma_latency_us); -+ hrtimer_start_range_ns(&ithc->activity_end_timer, -+ ns_to_ktime(duration_us * NSEC_PER_USEC), duration_us * NSEC_PER_USEC, HRTIMER_MODE_REL); -+} -+ -+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) -+{ -+ u32 x = ithc->config.touch_cfg = -+ (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 | -+ (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, -+ offsetof(struct ithc_device_config, touch_cfg), sizeof(x), &x); -+} -+ -+static void ithc_disable_interrupts(struct ithc *ithc) -+{ - writel(0, &ithc->regs->error_control); - bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0); - bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); -@@ -226,43 +304,85 @@ static void ithc_disable_interrupts(struct ithc *ithc) { - bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0); - } - --static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) { -- writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status); -+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned int channel) -+{ -+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, -+ &ithc->regs->dma_rx[channel].status); - } - --static void ithc_clear_interrupts(struct ithc *ithc) { -+static void ithc_clear_interrupts(struct ithc *ithc) -+{ - writel(0xffffffff, &ithc->regs->error_flags); - writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status); - writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); - ithc_clear_dma_rx_interrupts(ithc, 0); - ithc_clear_dma_rx_interrupts(ithc, 1); -- writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status); -+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, -+ &ithc->regs->dma_tx.status); - } - --static void ithc_process(struct ithc *ithc) { -+static void ithc_process(struct ithc *ithc) -+{ - ithc_log_regs(ithc); - -- // read and clear error bits -+ bool rx0 = ithc_use_rx0 && (readl(&ithc->regs->dma_rx[0].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0; -+ bool rx1 = ithc_use_rx1 && (readl(&ithc->regs->dma_rx[1].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0; -+ -+ // Track time between DMA rx transfers, so we can try to predict when we need to enable CPU latency QoS for the next transfer -+ ktime_t t = ktime_get(); -+ ktime_t dt = ktime_sub(t, ithc->last_rx_time); -+ if (rx0 || rx1) { -+ ithc->last_rx_time = t; -+ if (dt > ms_to_ktime(100)) { -+ ithc->cur_rx_seq_count = 0; -+ ithc->cur_rx_seq_errors = 0; -+ } -+ ithc->cur_rx_seq_count++; -+ if (!ithc_use_polling && ithc_dma_latency_us >= 0) { -+ // Disable QoS, since the DMA transfer has completed (we re-enable it after a delay below) -+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ hrtimer_try_to_cancel(&ithc->activity_end_timer); -+ } -+ } -+ -+ // Read and clear error bits - u32 err = readl(&ithc->regs->error_flags); - if (err) { -- if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err); - writel(err, &ithc->regs->error_flags); -+ if (err & ~ERROR_FLAG_DMA_RX_TIMEOUT) -+ pci_err(ithc->pci, "error flags: 0x%08x\n", err); -+ if (err & ERROR_FLAG_DMA_RX_TIMEOUT) { -+ // Only log an error if we see a significant number of these errors. -+ ithc->cur_rx_seq_errors++; -+ if (ithc->cur_rx_seq_errors && ithc->cur_rx_seq_errors % 50 == 0 && ithc->cur_rx_seq_errors > ithc->cur_rx_seq_count / 10) -+ pci_err(ithc->pci, "High number of DMA RX timeouts/errors (%u/%u, dt=%lldus). Try adjusting dma_early_us and/or dma_latency_us.\n", -+ ithc->cur_rx_seq_errors, ithc->cur_rx_seq_count, ktime_to_us(dt)); -+ } - } - -- // process DMA rx -+ // Process DMA rx - if (ithc_use_rx0) { - ithc_clear_dma_rx_interrupts(ithc, 0); -- ithc_dma_rx(ithc, 0); -+ if (rx0) -+ ithc_dma_rx(ithc, 0); - } - if (ithc_use_rx1) { - ithc_clear_dma_rx_interrupts(ithc, 1); -- ithc_dma_rx(ithc, 1); -+ if (rx1) -+ ithc_dma_rx(ithc, 1); -+ } -+ -+ // Start timer to re-enable QoS for next rx, but only if we've seen an ERROR_FLAG_DMA_RX_TIMEOUT -+ if ((rx0 || rx1) && !ithc_use_polling && ithc_dma_latency_us >= 0 && ithc->cur_rx_seq_errors > 0) { -+ ktime_t expires = ktime_add(t, ktime_sub_us(dt, ithc_dma_early_us)); -+ hrtimer_start_range_ns(&ithc->activity_start_timer, expires, 10 * NSEC_PER_USEC, HRTIMER_MODE_ABS); - } - - ithc_log_regs(ithc); - } - --static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { -+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) -+{ - struct ithc *ithc = arg; - pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n", - readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags), -@@ -274,14 +394,21 @@ static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { - return IRQ_HANDLED; - } - --static int ithc_poll_thread(void *arg) { -+static int ithc_poll_thread(void *arg) -+{ - struct ithc *ithc = arg; -- unsigned sleep = 100; -+ unsigned int sleep = 100; - while (!kthread_should_stop()) { - u32 n = ithc->dma_rx[1].num_received; - ithc_process(ithc); -- if (n != ithc->dma_rx[1].num_received) sleep = 20; -- else sleep = min(200u, sleep + (sleep >> 4) + 1); -+ // Decrease polling interval to 20ms if we received data, otherwise slowly -+ // increase it up to 200ms. -+ if (n != ithc->dma_rx[1].num_received) { -+ ithc_set_active(ithc, 100 * USEC_PER_MSEC); -+ sleep = 20; -+ } else { -+ sleep = min(200u, sleep + (sleep >> 4) + 1); -+ } - msleep_interruptible(sleep); - } - return 0; -@@ -289,7 +416,8 @@ static int ithc_poll_thread(void *arg) { - - // Device initialization and shutdown - --static void ithc_disable(struct ithc *ithc) { -+static void ithc_disable(struct ithc *ithc) -+{ - bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE); - CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED); - bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -@@ -301,81 +429,112 @@ static void ithc_disable(struct ithc *ithc) { - ithc_clear_interrupts(ithc); - } - --static int ithc_init_device(struct ithc *ithc) { -+static int ithc_init_device(struct ithc *ithc) -+{ - ithc_log_regs(ithc); - bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0; - ithc_disable(ithc); - CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY); -+ -+ // Since we don't yet know which SPI config the device wants, use default speed and mode -+ // initially for reading config data. - ithc_set_spi_config(ithc, 10, 0); -- bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config - -- if (was_enabled) if (msleep_interruptible(100)) return -EINTR; -+ // Setting the following bit seems to make reading the config more reliable. -+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); -+ -+ // If the device was previously enabled, wait a bit to make sure it's fully shut down. -+ if (was_enabled) -+ if (msleep_interruptible(100)) -+ return -EINTR; -+ -+ // Take the touch device out of reset. - bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0); - CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0); - for (int retries = 0; ; retries++) { - ithc_log_regs(ithc); - bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET); -- if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break; -+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) -+ break; - if (retries > 5) { -- pci_err(ithc->pci, "too many retries, failed to reset device\n"); -+ pci_err(ithc->pci, "failed to reset device, state = 0x%08x\n", readl(&ithc->regs->state)); - return -ETIMEDOUT; - } -- pci_err(ithc->pci, "invalid state, retrying reset\n"); -+ pci_warn(ithc->pci, "invalid state, retrying reset\n"); - bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -- if (msleep_interruptible(1000)) return -EINTR; -+ if (msleep_interruptible(1000)) -+ return -EINTR; - } - ithc_log_regs(ithc); - -+ // Waiting for the following status bit makes reading config much more reliable, -+ // however the official driver does not seem to do this... - CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4); - -- // read config -+ // Read configuration data. - for (int retries = 0; ; retries++) { - ithc_log_regs(ithc); -- memset(&ithc->config, 0, sizeof ithc->config); -- CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config); -+ memset(&ithc->config, 0, sizeof(ithc->config)); -+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof(ithc->config), &ithc->config); - u32 *p = (void *)&ithc->config; - pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); -- if (ithc_is_config_valid(ithc)) break; -+ if (ithc_is_config_valid(ithc)) -+ break; - if (retries > 10) { -- pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id); -+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", -+ ithc->config.device_id); - return -EIO; - } -- pci_err(ithc->pci, "failed to read config, retrying\n"); -- if (msleep_interruptible(100)) return -EINTR; -+ pci_warn(ithc->pci, "failed to read config, retrying\n"); -+ if (msleep_interruptible(100)) -+ return -EINTR; - } - ithc_log_regs(ithc); - -- CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config)); -+ // Apply SPI config and enable touch device. -+ CHECK_RET(ithc_set_spi_config, ithc, -+ DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), -+ DEVCFG_SPI_MODE(ithc->config.spi_config)); - CHECK_RET(ithc_set_device_enabled, ithc, true); - ithc_log_regs(ithc); - return 0; - } - --int ithc_reset(struct ithc *ithc) { -- // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA -- // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking? -+int ithc_reset(struct ithc *ithc) -+{ -+ // FIXME This should probably do devres_release_group()+ithc_start(). -+ // But because this is called during DMA processing, that would have to be done -+ // asynchronously (schedule_work()?). And with extra locking? - pci_err(ithc->pci, "reset\n"); - CHECK(ithc_init_device, ithc); -- if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -- if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ if (ithc_use_rx0) -+ ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) -+ ithc_dma_rx_enable(ithc, 1); - ithc_log_regs(ithc); - pci_dbg(ithc->pci, "reset completed\n"); - return 0; - } - --static void ithc_stop(void *res) { -+static void ithc_stop(void *res) -+{ - struct ithc *ithc = res; - pci_dbg(ithc->pci, "stopping\n"); - ithc_log_regs(ithc); -- if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread); -- if (ithc->irq >= 0) disable_irq(ithc->irq); -+ -+ if (ithc->poll_thread) -+ CHECK(kthread_stop, ithc->poll_thread); -+ if (ithc->irq >= 0) -+ disable_irq(ithc->irq); - CHECK(ithc_set_device_enabled, ithc, false); - ithc_disable(ithc); -- del_timer_sync(&ithc->activity_timer); -+ hrtimer_cancel(&ithc->activity_start_timer); -+ hrtimer_cancel(&ithc->activity_end_timer); - cpu_latency_qos_remove_request(&ithc->activity_qos); -- // clear dma config -- for(unsigned i = 0; i < 2; i++) { -+ -+ // Clear DMA config. -+ for (unsigned int i = 0; i < 2; i++) { - CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0); - lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr); - writeb(0, &ithc->regs->dma_rx[i].num_bufs); -@@ -383,35 +542,43 @@ static void ithc_stop(void *res) { - } - lo_hi_writeq(0, &ithc->regs->dma_tx.addr); - writeb(0, &ithc->regs->dma_tx.num_prds); -+ - ithc_log_regs(ithc); - pci_dbg(ithc->pci, "stopped\n"); - } - --static void ithc_clear_drvdata(void *res) { -+static void ithc_clear_drvdata(void *res) -+{ - struct pci_dev *pci = res; - pci_set_drvdata(pci, NULL); - } - --static int ithc_start(struct pci_dev *pci) { -+static int ithc_start(struct pci_dev *pci) -+{ - pci_dbg(pci, "starting\n"); - if (pci_get_drvdata(pci)) { - pci_err(pci, "device already initialized\n"); - return -EINVAL; - } -- if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM; -+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) -+ return -ENOMEM; - -- struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL); -- if (!ithc) return -ENOMEM; -+ // Allocate/init main driver struct. -+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof(*ithc), GFP_KERNEL); -+ if (!ithc) -+ return -ENOMEM; - ithc->irq = -1; - ithc->pci = pci; -- snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci)); -+ snprintf(ithc->phys, sizeof(ithc->phys), "pci-%s/" DEVNAME, pci_name(pci)); - init_waitqueue_head(&ithc->wait_hid_parse); - init_waitqueue_head(&ithc->wait_hid_get_feature); - mutex_init(&ithc->hid_get_feature_mutex); - pci_set_drvdata(pci, ithc); - CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci); -- if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL); -+ if (ithc_log_regs_enabled) -+ ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof(*ithc->prev_regs), GFP_KERNEL); - -+ // PCI initialization. - CHECK_RET(pcim_enable_device, pci); - pci_set_master(pci); - CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs"); -@@ -419,29 +586,39 @@ static int ithc_start(struct pci_dev *pci) { - CHECK_RET(pci_set_power_state, pci, PCI_D0); - ithc->regs = pcim_iomap_table(pci)[0]; - -+ // Allocate IRQ. - if (!ithc_use_polling) { - CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); - ithc->irq = CHECK(pci_irq_vector, pci, 0); -- if (ithc->irq < 0) return ithc->irq; -+ if (ithc->irq < 0) -+ return ithc->irq; - } - -+ // Initialize THC and touch device. - CHECK_RET(ithc_init_device, ithc); - CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups); -- if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME); -- if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME); -+ if (ithc_use_rx0) -+ CHECK_RET(ithc_dma_rx_init, ithc, 0); -+ if (ithc_use_rx1) -+ CHECK_RET(ithc_dma_rx_init, ithc, 1); - CHECK_RET(ithc_dma_tx_init, ithc); - -- CHECK_RET(ithc_hid_init, ithc); -- - cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -- timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0); -+ hrtimer_init(&ithc->activity_start_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -+ ithc->activity_start_timer.function = ithc_activity_start_timer_callback; -+ hrtimer_init(&ithc->activity_end_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ ithc->activity_end_timer.function = ithc_activity_end_timer_callback; - -- // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed -+ // Add ithc_stop() callback AFTER setting up DMA buffers, so that polling/irqs/DMA are -+ // disabled BEFORE the buffers are freed. - CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc); - -+ CHECK_RET(ithc_hid_init, ithc); -+ -+ // Start polling/IRQ. - if (ithc_use_polling) { - pci_info(pci, "using polling instead of irq\n"); -- // use a thread instead of simple timer because we want to be able to sleep -+ // Use a thread instead of simple timer because we want to be able to sleep. - ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll"); - if (IS_ERR(ithc->poll_thread)) { - int err = PTR_ERR(ithc->poll_thread); -@@ -449,13 +626,17 @@ static int ithc_start(struct pci_dev *pci) { - return err; - } - } else { -- CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); -+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, -+ ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); - } - -- if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -- if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ if (ithc_use_rx0) -+ ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) -+ ithc_dma_rx_enable(ithc, 1); - -- // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA -+ // hid_add_device() can only be called after irq/polling is started and DMA is enabled, -+ // because it calls ithc_hid_parse() which reads the report descriptor via DMA. - CHECK_RET(hid_add_device, ithc->hid); - - CHECK(ithc_debug_init, ithc); -@@ -464,43 +645,54 @@ static int ithc_start(struct pci_dev *pci) { - return 0; - } - --static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) { -+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) -+{ - pci_dbg(pci, "device probe\n"); - return ithc_start(pci); - } - --static void ithc_remove(struct pci_dev *pci) { -+static void ithc_remove(struct pci_dev *pci) -+{ - pci_dbg(pci, "device remove\n"); - // all cleanup is handled by devres - } - --static int ithc_suspend(struct device *dev) { -+// For suspend/resume, we just deinitialize and reinitialize everything. -+// TODO It might be cleaner to keep the HID device around, however we would then have to signal -+// to userspace that the touch device has lost state and userspace needs to e.g. resend 'set -+// feature' requests. Hidraw does not seem to have a facility to do that. -+static int ithc_suspend(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm suspend\n"); - devres_release_group(dev, ithc_start); - return 0; - } - --static int ithc_resume(struct device *dev) { -+static int ithc_resume(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm resume\n"); - return ithc_start(pci); - } - --static int ithc_freeze(struct device *dev) { -+static int ithc_freeze(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm freeze\n"); - devres_release_group(dev, ithc_start); - return 0; - } - --static int ithc_thaw(struct device *dev) { -+static int ithc_thaw(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm thaw\n"); - return ithc_start(pci); - } - --static int ithc_restore(struct device *dev) { -+static int ithc_restore(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm restore\n"); - return ithc_start(pci); -@@ -521,11 +713,13 @@ static struct pci_driver ithc_driver = { - //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway - }; - --static int __init ithc_init(void) { -+static int __init ithc_init(void) -+{ - return pci_register_driver(&ithc_driver); - } - --static void __exit ithc_exit(void) { -+static void __exit ithc_exit(void) -+{ - pci_unregister_driver(&ithc_driver); - } - -diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c -index 85d567b05761f..e058721886e37 100644 ---- a/drivers/hid/ithc/ithc-regs.c -+++ b/drivers/hid/ithc/ithc-regs.c -@@ -1,63 +1,95 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - - #define reg_num(r) (0x1fff & (u16)(__force u64)(r)) - --void bitsl(__iomem u32 *reg, u32 mask, u32 val) { -- if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+void bitsl(__iomem u32 *reg, u32 mask, u32 val) -+{ -+ if (val & ~mask) -+ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", -+ reg_num(reg), val, mask); - writel((readl(reg) & ~mask) | (val & mask), reg); - } - --void bitsb(__iomem u8 *reg, u8 mask, u8 val) { -- if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+void bitsb(__iomem u8 *reg, u8 mask, u8 val) -+{ -+ if (val & ~mask) -+ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", -+ reg_num(reg), val, mask); - writeb((readb(reg) & ~mask) | (val & mask), reg); - } - --int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) { -- pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) -+{ -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", -+ reg_num(reg), mask, val); - u32 x; - if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -- pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", -+ reg_num(reg), mask, val); - return -ETIMEDOUT; - } - pci_dbg(ithc->pci, "done waiting\n"); - return 0; - } - --int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) { -- pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) -+{ -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", -+ reg_num(reg), mask, val); - u8 x; - if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -- pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", -+ reg_num(reg), mask, val); - return -ETIMEDOUT; - } - pci_dbg(ithc->pci, "done waiting\n"); - return 0; - } - --int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) { -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) -+{ - pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode); -- if (mode == 3) mode = 2; -+ if (mode == 3) -+ mode = 2; - bitsl(&ithc->regs->spi_config, - SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff), - SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed)); - return 0; - } - --int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) { -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) -+{ - pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset); -- if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL; -+ if (size > sizeof(ithc->regs->spi_cmd.data)) -+ return -EINVAL; -+ -+ // Wait if the device is still busy. - CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ // Clear result flags. - writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ -+ // Init SPI command data. - writeb(command, &ithc->regs->spi_cmd.code); - writew(size, &ithc->regs->spi_cmd.size); - writel(offset, &ithc->regs->spi_cmd.offset); - u32 *p = data, n = (size + 3) / 4; -- for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]); -+ for (u32 i = 0; i < n; i++) -+ writel(p[i], &ithc->regs->spi_cmd.data[i]); -+ -+ // Start transmission. - bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND); - CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -- if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO; -- if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE; -- for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]); -+ -+ // Read response. -+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) -+ return -EIO; -+ if (readw(&ithc->regs->spi_cmd.size) != size) -+ return -EMSGSIZE; -+ for (u32 i = 0; i < n; i++) -+ p[i] = readl(&ithc->regs->spi_cmd.data[i]); -+ - writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); - return 0; - } -diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h -index 1a96092ed7eed..d4007d9e2bacc 100644 ---- a/drivers/hid/ithc/ithc-regs.h -+++ b/drivers/hid/ithc/ithc-regs.h -@@ -1,3 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -+ - #define CONTROL_QUIESCE BIT(1) - #define CONTROL_IS_QUIESCED BIT(2) - #define CONTROL_NRESET BIT(3) -@@ -24,7 +26,7 @@ - - #define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9) - #define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10) --#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message -+#define ERROR_FLAG_DMA_RX_TIMEOUT BIT(12) // set when we receive a truncated DMA message - #define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13) - #define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16) - #define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17) -@@ -67,6 +69,7 @@ - #define DMA_RX_STATUS_HAVE_DATA BIT(5) - #define DMA_RX_STATUS_ENABLED BIT(8) - -+// COUNTER_RESET can be written to counter registers to reset them to zero. However, in some cases this can mess up the THC. - #define COUNTER_RESET BIT(31) - - struct ithc_registers { -@@ -147,15 +150,15 @@ static_assert(sizeof(struct ithc_registers) == 0x1300); - #define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode? - #define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3) - #define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f) --#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) --#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7) -+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) // TODO implement heartbeat -+#define DEVCFG_SPI_HEARTBEAT_INTERVAL(x) (((x) >> 21) & 7) - #define DEVCFG_SPI_UNKNOWN_25 BIT(25) - #define DEVCFG_SPI_UNKNOWN_26 BIT(26) - #define DEVCFG_SPI_UNKNOWN_27 BIT(27) --#define DEVCFG_SPI_DELAY (((x) >> 28) & 7) --#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) -+#define DEVCFG_SPI_DELAY(x) (((x) >> 28) & 7) // TODO use this -+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) // TODO use this? - --struct ithc_device_config { -+struct ithc_device_config { // (Example values are from an SP7+.) - u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET) - u32 _unknown_04; // 04 = 0x00000000 - u32 dma_buf_sizes; // 08 = 0x000a00ff -@@ -166,9 +169,9 @@ struct ithc_device_config { - u16 vendor_id; // 1c = 0x045e = Microsoft Corp. - u16 product_id; // 1e = 0x0c1a - u32 revision; // 20 = 0x00000001 -- u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 -+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 (this value looks more random on newer devices) - u32 _unknown_28; // 28 = 0x00000000 -- u32 fw_mode; // 2c = 0x00000000 -+ u32 fw_mode; // 2c = 0x00000000 (for fw update?) - u32 _unknown_30; // 30 = 0x00000000 - u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?) - u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET) -diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h -index 6a9b0d480bc15..028e55a4ec53e 100644 ---- a/drivers/hid/ithc/ithc.h -+++ b/drivers/hid/ithc/ithc.h -@@ -1,3 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -+ - #include - #include - #include -@@ -21,7 +23,7 @@ - #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - - #define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; }) --#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0) -+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while (0) - - #define NUM_RX_BUF 16 - -@@ -35,8 +37,13 @@ struct ithc { - struct pci_dev *pci; - int irq; - struct task_struct *poll_thread; -+ - struct pm_qos_request activity_qos; -- struct timer_list activity_timer; -+ struct hrtimer activity_start_timer; -+ struct hrtimer activity_end_timer; -+ ktime_t last_rx_time; -+ unsigned int cur_rx_seq_count; -+ unsigned int cur_rx_seq_errors; - - struct hid_device *hid; - bool hid_parse_done; -@@ -54,7 +61,7 @@ struct ithc { - }; - - int ithc_reset(struct ithc *ithc); --void ithc_set_active(struct ithc *ithc); -+void ithc_set_active(struct ithc *ithc, unsigned int duration_us); - int ithc_debug_init(struct ithc *ithc); - void ithc_log_regs(struct ithc *ithc); - --- -2.42.0 - -From c4cbbcd24ea10e6558753174ae6dabcc9b54e438 Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sun, 22 Oct 2023 14:57:11 +0200 -Subject: [PATCH] platform/surface: aggregator_registry: Add support for - Surface Laptop Go 3 - -Add SAM client device nodes for the Surface Laptop Go 3. It seems to use -the same SAM client devices as the Surface Laptop Go 1 and 2, so re-use -their node group. - -Signed-off-by: Maximilian Luz -Patchset: surface-sam ---- - drivers/platform/surface/surface_aggregator_registry.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c -index 0fe5be5396525..0d8c8395c5886 100644 ---- a/drivers/platform/surface/surface_aggregator_registry.c -+++ b/drivers/platform/surface/surface_aggregator_registry.c -@@ -367,6 +367,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { - /* Surface Laptop Go 2 */ - { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, - -+ /* Surface Laptop Go 3 */ -+ { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, -+ - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, - --- -2.42.0 - -From 0bb0adce3efad7a43fc3811f6cc24148c8c75253 Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Mon, 20 Nov 2023 19:47:00 +0100 -Subject: [PATCH] platform/surface: aggregator_registry: Add support for - Surface Laptop Studio 2 - -Add SAM client device nodes for the Surface Laptop Studio 2 (SLS2). The -SLS2 is quite similar to the SLS1, but it does not provide the touchpad -as a SAM-HID device. Therefore, add a new node group for the SLS2 and -update the comments accordingly - -Signed-off-by: Maximilian Luz -Patchset: surface-sam ---- - .../surface/surface_aggregator_registry.c | 25 ++++++++++++++++--- - 1 file changed, 21 insertions(+), 4 deletions(-) - -diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c -index 0d8c8395c5886..530db4db71aba 100644 ---- a/drivers/platform/surface/surface_aggregator_registry.c -+++ b/drivers/platform/surface/surface_aggregator_registry.c -@@ -247,8 +247,8 @@ static const struct software_node *ssam_node_group_sl5[] = { - NULL, - }; - --/* Devices for Surface Laptop Studio. */ --static const struct software_node *ssam_node_group_sls[] = { -+/* Devices for Surface Laptop Studio 1. */ -+static const struct software_node *ssam_node_group_sls1[] = { - &ssam_node_root, - &ssam_node_bat_ac, - &ssam_node_bat_main, -@@ -263,6 +263,20 @@ static const struct software_node *ssam_node_group_sls[] = { - NULL, - }; - -+/* Devices for Surface Laptop Studio 2. */ -+static const struct software_node *ssam_node_group_sls2[] = { -+ &ssam_node_root, -+ &ssam_node_bat_ac, -+ &ssam_node_bat_main, -+ &ssam_node_tmp_pprof, -+ &ssam_node_pos_tablet_switch, -+ &ssam_node_hid_sam_keyboard, -+ &ssam_node_hid_sam_penstash, -+ &ssam_node_hid_sam_sensors, -+ &ssam_node_hid_sam_ucm_ucsi, -+ NULL, -+}; -+ - /* Devices for Surface Laptop Go. */ - static const struct software_node *ssam_node_group_slg1[] = { - &ssam_node_root, -@@ -370,8 +384,11 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { - /* Surface Laptop Go 3 */ - { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, - -- /* Surface Laptop Studio */ -- { "MSHW0123", (unsigned long)ssam_node_group_sls }, -+ /* Surface Laptop Studio 1 */ -+ { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, -+ -+ /* Surface Laptop Studio 2 */ -+ { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, - - { }, - }; --- -2.42.0 - -From 3772b511c710c369b737fd0a111fbda63b028f1d Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sat, 25 Jul 2020 17:19:53 +0200 -Subject: [PATCH] i2c: acpi: Implement RawBytes read access - -Microsoft Surface Pro 4 and Book 1 devices access the MSHW0030 I2C -device via a generic serial bus operation region and RawBytes read -access. On the Surface Book 1, this access is required to turn on (and -off) the discrete GPU. - -Multiple things are to note here: - -a) The RawBytes access is device/driver dependent. The ACPI - specification states: - - > Raw accesses assume that the writer has knowledge of the bus that - > the access is made over and the device that is being accessed. The - > protocol may only ensure that the buffer is transmitted to the - > appropriate driver, but the driver must be able to interpret the - > buffer to communicate to a register. - - Thus this implementation may likely not work on other devices - accessing I2C via the RawBytes accessor type. - -b) The MSHW0030 I2C device is an HID-over-I2C device which seems to - serve multiple functions: - - 1. It is the main access point for the legacy-type Surface Aggregator - Module (also referred to as SAM-over-HID, as opposed to the newer - SAM-over-SSH/UART). It has currently not been determined on how - support for the legacy SAM should be implemented. Likely via a - custom HID driver. - - 2. It seems to serve as the HID device for the Integrated Sensor Hub. - This might complicate matters with regards to implementing a - SAM-over-HID driver required by legacy SAM. - -In light of this, the simplest approach has been chosen for now. -However, it may make more sense regarding breakage and compatibility to -either provide functionality for replacing or enhancing the default -operation region handler via some additional API functions, or even to -completely blacklist MSHW0030 from the I2C core and provide a custom -driver for it. - -Replacing/enhancing the default operation region handler would, however, -either require some sort of secondary driver and access point for it, -from which the new API functions would be called and the new handler -(part) would be installed, or hard-coding them via some sort of -quirk-like interface into the I2C core. - -Signed-off-by: Maximilian Luz -Patchset: surface-sam-over-hid ---- - drivers/i2c/i2c-core-acpi.c | 35 +++++++++++++++++++++++++++++++++++ - 1 file changed, 35 insertions(+) - -diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c -index d6037a3286690..a290ebc77aea2 100644 ---- a/drivers/i2c/i2c-core-acpi.c -+++ b/drivers/i2c/i2c-core-acpi.c -@@ -628,6 +628,28 @@ static int acpi_gsb_i2c_write_bytes(struct i2c_client *client, - return (ret == 1) ? 0 : -EIO; - } - -+static int acpi_gsb_i2c_write_raw_bytes(struct i2c_client *client, -+ u8 *data, u8 data_len) -+{ -+ struct i2c_msg msgs[1]; -+ int ret = AE_OK; -+ -+ msgs[0].addr = client->addr; -+ msgs[0].flags = client->flags; -+ msgs[0].len = data_len + 1; -+ msgs[0].buf = data; -+ -+ ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); -+ -+ if (ret < 0) { -+ dev_err(&client->adapter->dev, "i2c write failed: %d\n", ret); -+ return ret; -+ } -+ -+ /* 1 transfer must have completed successfully */ -+ return (ret == 1) ? 0 : -EIO; -+} -+ - static acpi_status - i2c_acpi_space_handler(u32 function, acpi_physical_address command, - u32 bits, u64 *value64, -@@ -729,6 +751,19 @@ i2c_acpi_space_handler(u32 function, acpi_physical_address command, - } - break; - -+ case ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES: -+ if (action == ACPI_READ) { -+ dev_warn(&adapter->dev, -+ "protocol 0x%02x not supported for client 0x%02x\n", -+ accessor_type, client->addr); -+ ret = AE_BAD_PARAMETER; -+ goto err; -+ } else { -+ status = acpi_gsb_i2c_write_raw_bytes(client, -+ gsb->data, info->access_length); -+ } -+ break; -+ - default: - dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n", - accessor_type, client->addr); --- -2.42.0 - -From f45a16750118da615fca44e7214204c83631ee7f Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sat, 13 Feb 2021 16:41:18 +0100 -Subject: [PATCH] platform/surface: Add driver for Surface Book 1 dGPU switch - -Add driver exposing the discrete GPU power-switch of the Microsoft -Surface Book 1 to user-space. - -On the Surface Book 1, the dGPU power is controlled via the Surface -System Aggregator Module (SAM). The specific SAM-over-HID command for -this is exposed via ACPI. This module provides a simple driver exposing -the ACPI call via a sysfs parameter to user-space, so that users can -easily power-on/-off the dGPU. - -Patchset: surface-sam-over-hid ---- - drivers/platform/surface/Kconfig | 7 + - drivers/platform/surface/Makefile | 1 + - .../surface/surfacebook1_dgpu_switch.c | 162 ++++++++++++++++++ - 3 files changed, 170 insertions(+) - create mode 100644 drivers/platform/surface/surfacebook1_dgpu_switch.c - -diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig -index b629e82af97c0..68656e8f309ed 100644 ---- a/drivers/platform/surface/Kconfig -+++ b/drivers/platform/surface/Kconfig -@@ -149,6 +149,13 @@ config SURFACE_AGGREGATOR_TABLET_SWITCH - Select M or Y here, if you want to provide tablet-mode switch input - events on the Surface Pro 8, Surface Pro X, and Surface Laptop Studio. - -+config SURFACE_BOOK1_DGPU_SWITCH -+ tristate "Surface Book 1 dGPU Switch Driver" -+ depends on SYSFS -+ help -+ This driver provides a sysfs switch to set the power-state of the -+ discrete GPU found on the Microsoft Surface Book 1. -+ - config SURFACE_DTX - tristate "Surface DTX (Detachment System) Driver" - depends on SURFACE_AGGREGATOR -diff --git a/drivers/platform/surface/Makefile b/drivers/platform/surface/Makefile -index 53344330939bf..7efcd0cdb5329 100644 ---- a/drivers/platform/surface/Makefile -+++ b/drivers/platform/surface/Makefile -@@ -12,6 +12,7 @@ obj-$(CONFIG_SURFACE_AGGREGATOR_CDEV) += surface_aggregator_cdev.o - obj-$(CONFIG_SURFACE_AGGREGATOR_HUB) += surface_aggregator_hub.o - obj-$(CONFIG_SURFACE_AGGREGATOR_REGISTRY) += surface_aggregator_registry.o - obj-$(CONFIG_SURFACE_AGGREGATOR_TABLET_SWITCH) += surface_aggregator_tabletsw.o -+obj-$(CONFIG_SURFACE_BOOK1_DGPU_SWITCH) += surfacebook1_dgpu_switch.o - obj-$(CONFIG_SURFACE_DTX) += surface_dtx.o - obj-$(CONFIG_SURFACE_GPE) += surface_gpe.o - obj-$(CONFIG_SURFACE_HOTPLUG) += surface_hotplug.o -diff --git a/drivers/platform/surface/surfacebook1_dgpu_switch.c b/drivers/platform/surface/surfacebook1_dgpu_switch.c -new file mode 100644 -index 0000000000000..8b816ed8f35c6 ---- /dev/null -+++ b/drivers/platform/surface/surfacebook1_dgpu_switch.c -@@ -0,0 +1,162 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+ -+#include -+#include -+#include -+#include -+ -+ -+#ifdef pr_fmt -+#undef pr_fmt -+#endif -+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ -+ -+ -+static const guid_t dgpu_sw_guid = GUID_INIT(0x6fd05c69, 0xcde3, 0x49f4, -+ 0x95, 0xed, 0xab, 0x16, 0x65, 0x49, 0x80, 0x35); -+ -+#define DGPUSW_ACPI_PATH_DSM "\\_SB_.PCI0.LPCB.EC0_.VGBI" -+#define DGPUSW_ACPI_PATH_HGON "\\_SB_.PCI0.RP05.HGON" -+#define DGPUSW_ACPI_PATH_HGOF "\\_SB_.PCI0.RP05.HGOF" -+ -+ -+static int sb1_dgpu_sw_dsmcall(void) -+{ -+ union acpi_object *ret; -+ acpi_handle handle; -+ acpi_status status; -+ -+ status = acpi_get_handle(NULL, DGPUSW_ACPI_PATH_DSM, &handle); -+ if (status) -+ return -EINVAL; -+ -+ ret = acpi_evaluate_dsm_typed(handle, &dgpu_sw_guid, 1, 1, NULL, ACPI_TYPE_BUFFER); -+ if (!ret) -+ return -EINVAL; -+ -+ ACPI_FREE(ret); -+ return 0; -+} -+ -+static int sb1_dgpu_sw_hgon(void) -+{ -+ struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; -+ acpi_status status; -+ -+ status = acpi_evaluate_object(NULL, DGPUSW_ACPI_PATH_HGON, NULL, &buf); -+ if (status) { -+ pr_err("failed to run HGON: %d\n", status); -+ return -EINVAL; -+ } -+ -+ if (buf.pointer) -+ ACPI_FREE(buf.pointer); -+ -+ pr_info("turned-on dGPU via HGON\n"); -+ return 0; -+} -+ -+static int sb1_dgpu_sw_hgof(void) -+{ -+ struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; -+ acpi_status status; -+ -+ status = acpi_evaluate_object(NULL, DGPUSW_ACPI_PATH_HGOF, NULL, &buf); -+ if (status) { -+ pr_err("failed to run HGOF: %d\n", status); -+ return -EINVAL; -+ } -+ -+ if (buf.pointer) -+ ACPI_FREE(buf.pointer); -+ -+ pr_info("turned-off dGPU via HGOF\n"); -+ return 0; -+} -+ -+ -+static ssize_t dgpu_dsmcall_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ int status, value; -+ -+ status = kstrtoint(buf, 0, &value); -+ if (status < 0) -+ return status; -+ -+ if (value != 1) -+ return -EINVAL; -+ -+ status = sb1_dgpu_sw_dsmcall(); -+ -+ return status < 0 ? status : len; -+} -+ -+static ssize_t dgpu_power_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ bool power; -+ int status; -+ -+ status = kstrtobool(buf, &power); -+ if (status < 0) -+ return status; -+ -+ if (power) -+ status = sb1_dgpu_sw_hgon(); -+ else -+ status = sb1_dgpu_sw_hgof(); -+ -+ return status < 0 ? status : len; -+} -+ -+static DEVICE_ATTR_WO(dgpu_dsmcall); -+static DEVICE_ATTR_WO(dgpu_power); -+ -+static struct attribute *sb1_dgpu_sw_attrs[] = { -+ &dev_attr_dgpu_dsmcall.attr, -+ &dev_attr_dgpu_power.attr, -+ NULL, -+}; -+ -+static const struct attribute_group sb1_dgpu_sw_attr_group = { -+ .attrs = sb1_dgpu_sw_attrs, -+}; -+ -+ -+static int sb1_dgpu_sw_probe(struct platform_device *pdev) -+{ -+ return sysfs_create_group(&pdev->dev.kobj, &sb1_dgpu_sw_attr_group); -+} -+ -+static int sb1_dgpu_sw_remove(struct platform_device *pdev) -+{ -+ sysfs_remove_group(&pdev->dev.kobj, &sb1_dgpu_sw_attr_group); -+ return 0; -+} -+ -+/* -+ * The dGPU power seems to be actually handled by MSHW0040. However, that is -+ * also the power-/volume-button device with a mainline driver. So let's use -+ * MSHW0041 instead for now, which seems to be the LTCH (latch/DTX) device. -+ */ -+static const struct acpi_device_id sb1_dgpu_sw_match[] = { -+ { "MSHW0041", }, -+ { }, -+}; -+MODULE_DEVICE_TABLE(acpi, sb1_dgpu_sw_match); -+ -+static struct platform_driver sb1_dgpu_sw = { -+ .probe = sb1_dgpu_sw_probe, -+ .remove = sb1_dgpu_sw_remove, -+ .driver = { -+ .name = "surfacebook1_dgpu_switch", -+ .acpi_match_table = sb1_dgpu_sw_match, -+ .probe_type = PROBE_PREFER_ASYNCHRONOUS, -+ }, -+}; -+module_platform_driver(sb1_dgpu_sw); -+ -+MODULE_AUTHOR("Maximilian Luz "); -+MODULE_DESCRIPTION("Discrete GPU Power-Switch for Surface Book 1"); -+MODULE_LICENSE("GPL"); --- -2.42.0 - -From a5d9cf4762a27e2bf7f38c0d5a223b9df8b4ba8a Mon Sep 17 00:00:00 2001 -From: Sachi King -Date: Tue, 5 Oct 2021 00:05:09 +1100 -Subject: [PATCH] Input: soc_button_array - support AMD variant Surface devices - -The power button on the AMD variant of the Surface Laptop uses the -same MSHW0040 device ID as the 5th and later generation of Surface -devices, however they report 0 for their OEM platform revision. As the -_DSM does not exist on the devices requiring special casing, check for -the existance of the _DSM to determine if soc_button_array should be -loaded. - -Fixes: c394159310d0 ("Input: soc_button_array - add support for newer surface devices") -Co-developed-by: Maximilian Luz - -Signed-off-by: Sachi King -Patchset: surface-button ---- - drivers/input/misc/soc_button_array.c | 33 +++++++-------------------- - 1 file changed, 8 insertions(+), 25 deletions(-) - -diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c -index e79f5497948b8..2bddbe6e9ea4d 100644 ---- a/drivers/input/misc/soc_button_array.c -+++ b/drivers/input/misc/soc_button_array.c -@@ -537,8 +537,8 @@ static const struct soc_device_data soc_device_MSHW0028 = { - * Both, the Surface Pro 4 (surfacepro3_button.c) and the above mentioned - * devices use MSHW0040 for power and volume buttons, however the way they - * have to be addressed differs. Make sure that we only load this drivers -- * for the correct devices by checking the OEM Platform Revision provided by -- * the _DSM method. -+ * for the correct devices by checking if the OEM Platform Revision DSM call -+ * exists. - */ - #define MSHW0040_DSM_REVISION 0x01 - #define MSHW0040_DSM_GET_OMPR 0x02 // get OEM Platform Revision -@@ -549,31 +549,14 @@ static const guid_t MSHW0040_DSM_UUID = - static int soc_device_check_MSHW0040(struct device *dev) - { - acpi_handle handle = ACPI_HANDLE(dev); -- union acpi_object *result; -- u64 oem_platform_rev = 0; // valid revisions are nonzero -- -- // get OEM platform revision -- result = acpi_evaluate_dsm_typed(handle, &MSHW0040_DSM_UUID, -- MSHW0040_DSM_REVISION, -- MSHW0040_DSM_GET_OMPR, NULL, -- ACPI_TYPE_INTEGER); -- -- if (result) { -- oem_platform_rev = result->integer.value; -- ACPI_FREE(result); -- } -- -- /* -- * If the revision is zero here, the _DSM evaluation has failed. This -- * indicates that we have a Pro 4 or Book 1 and this driver should not -- * be used. -- */ -- if (oem_platform_rev == 0) -- return -ENODEV; -+ bool exists; - -- dev_dbg(dev, "OEM Platform Revision %llu\n", oem_platform_rev); -+ // check if OEM platform revision DSM call exists -+ exists = acpi_check_dsm(handle, &MSHW0040_DSM_UUID, -+ MSHW0040_DSM_REVISION, -+ BIT(MSHW0040_DSM_GET_OMPR)); - -- return 0; -+ return exists ? 0 : -ENODEV; - } - - /* --- -2.42.0 - -From 66f0a34801ad81ff08cc3ae0e175e0958959c461 Mon Sep 17 00:00:00 2001 -From: Sachi King -Date: Tue, 5 Oct 2021 00:22:57 +1100 -Subject: [PATCH] platform/surface: surfacepro3_button: don't load on amd - variant - -The AMD variant of the Surface Laptop report 0 for their OEM platform -revision. The Surface devices that require the surfacepro3_button -driver do not have the _DSM that gets the OEM platform revision. If the -method does not exist, load surfacepro3_button. - -Fixes: 64dd243d7356 ("platform/x86: surfacepro3_button: Fix device check") -Co-developed-by: Maximilian Luz - -Signed-off-by: Sachi King -Patchset: surface-button ---- - drivers/platform/surface/surfacepro3_button.c | 30 ++++--------------- - 1 file changed, 6 insertions(+), 24 deletions(-) - -diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c -index 2755601f979cd..4240c98ca2265 100644 ---- a/drivers/platform/surface/surfacepro3_button.c -+++ b/drivers/platform/surface/surfacepro3_button.c -@@ -149,7 +149,8 @@ static int surface_button_resume(struct device *dev) - /* - * Surface Pro 4 and Surface Book 2 / Surface Pro 2017 use the same device - * ID (MSHW0040) for the power/volume buttons. Make sure this is the right -- * device by checking for the _DSM method and OEM Platform Revision. -+ * device by checking for the _DSM method and OEM Platform Revision DSM -+ * function. - * - * Returns true if the driver should bind to this device, i.e. the device is - * either MSWH0028 (Pro 3) or MSHW0040 on a Pro 4 or Book 1. -@@ -157,30 +158,11 @@ static int surface_button_resume(struct device *dev) - static bool surface_button_check_MSHW0040(struct acpi_device *dev) - { - acpi_handle handle = dev->handle; -- union acpi_object *result; -- u64 oem_platform_rev = 0; // valid revisions are nonzero -- -- // get OEM platform revision -- result = acpi_evaluate_dsm_typed(handle, &MSHW0040_DSM_UUID, -- MSHW0040_DSM_REVISION, -- MSHW0040_DSM_GET_OMPR, -- NULL, ACPI_TYPE_INTEGER); -- -- /* -- * If evaluating the _DSM fails, the method is not present. This means -- * that we have either MSHW0028 or MSHW0040 on Pro 4 or Book 1, so we -- * should use this driver. We use revision 0 indicating it is -- * unavailable. -- */ -- -- if (result) { -- oem_platform_rev = result->integer.value; -- ACPI_FREE(result); -- } -- -- dev_dbg(&dev->dev, "OEM Platform Revision %llu\n", oem_platform_rev); - -- return oem_platform_rev == 0; -+ // make sure that OEM platform revision DSM call does not exist -+ return !acpi_check_dsm(handle, &MSHW0040_DSM_UUID, -+ MSHW0040_DSM_REVISION, -+ BIT(MSHW0040_DSM_GET_OMPR)); - } - - --- -2.42.0 - -From a55587ce4f5065bedb604f9031082ad47612a163 Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sat, 18 Feb 2023 01:02:49 +0100 -Subject: [PATCH] USB: quirks: Add USB_QUIRK_DELAY_INIT for Surface Go 3 - Type-Cover - -The touchpad on the Type-Cover of the Surface Go 3 is sometimes not -being initialized properly. Apply USB_QUIRK_DELAY_INIT to fix this -issue. - -More specifically, the device in question is a fairly standard modern -touchpad with pointer and touchpad input modes. During setup, the device -needs to be switched from pointer- to touchpad-mode (which is done in -hid-multitouch) to fully utilize it as intended. Unfortunately, however, -this seems to occasionally fail silently, leaving the device in -pointer-mode. Applying USB_QUIRK_DELAY_INIT seems to fix this. - -Link: https://github.com/linux-surface/linux-surface/issues/1059 -Signed-off-by: Maximilian Luz -Patchset: surface-typecover ---- - drivers/usb/core/quirks.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c -index 15e9bd180a1d2..0d70461d01e16 100644 ---- a/drivers/usb/core/quirks.c -+++ b/drivers/usb/core/quirks.c -@@ -220,6 +220,9 @@ static const struct usb_device_id usb_quirk_list[] = { - /* Microsoft Surface Dock Ethernet (RTL8153 GigE) */ - { USB_DEVICE(0x045e, 0x07c6), .driver_info = USB_QUIRK_NO_LPM }, - -+ /* Microsoft Surface Go 3 Type-Cover */ -+ { USB_DEVICE(0x045e, 0x09b5), .driver_info = USB_QUIRK_DELAY_INIT }, -+ - /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */ - { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, - --- -2.42.0 - -From 678999792d6b1c72e56c6b63fc3909b93db47b32 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= -Date: Thu, 5 Nov 2020 13:09:45 +0100 -Subject: [PATCH] hid/multitouch: Turn off Type Cover keyboard backlight when - suspending - -The Type Cover for Microsoft Surface devices supports a special usb -control request to disable or enable the built-in keyboard backlight. -On Windows, this request happens when putting the device into suspend or -resuming it, without it the backlight of the Type Cover will remain -enabled for some time even though the computer is suspended, which looks -weird to the user. - -So add support for this special usb control request to hid-multitouch, -which is the driver that's handling the Type Cover. - -The reason we have to use a pm_notifier for this instead of the usual -suspend/resume methods is that those won't get called in case the usb -device is already autosuspended. - -Also, if the device is autosuspended, we have to briefly autoresume it -in order to send the request. Doing that should be fine, the usb-core -driver does something similar during suspend inside choose_wakeup(). - -To make sure we don't send that request to every device but only to -devices which support it, add a new quirk -MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER to hid-multitouch. For now this quirk -is only enabled for the usb id of the Surface Pro 2017 Type Cover, which -is where I confirmed that it's working. - -Patchset: surface-typecover ---- - drivers/hid/hid-multitouch.c | 100 ++++++++++++++++++++++++++++++++++- - 1 file changed, 98 insertions(+), 2 deletions(-) - -diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 8db4ae05febc8..99a5efef45258 100644 ---- a/drivers/hid/hid-multitouch.c -+++ b/drivers/hid/hid-multitouch.c -@@ -34,7 +34,10 @@ - #include - #include - #include -+#include - #include -+#include -+#include - #include - #include - #include -@@ -47,6 +50,7 @@ MODULE_DESCRIPTION("HID multitouch panels"); - MODULE_LICENSE("GPL"); - - #include "hid-ids.h" -+#include "usbhid/usbhid.h" - - /* quirks to control the device */ - #define MT_QUIRK_NOT_SEEN_MEANS_UP BIT(0) -@@ -72,12 +76,15 @@ MODULE_LICENSE("GPL"); - #define MT_QUIRK_FORCE_MULTI_INPUT BIT(20) - #define MT_QUIRK_DISABLE_WAKEUP BIT(21) - #define MT_QUIRK_ORIENTATION_INVERT BIT(22) -+#define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT BIT(23) - - #define MT_INPUTMODE_TOUCHSCREEN 0x02 - #define MT_INPUTMODE_TOUCHPAD 0x03 - - #define MT_BUTTONTYPE_CLICKPAD 0 - -+#define MS_TYPE_COVER_FEATURE_REPORT_USAGE 0xff050086 -+ - enum latency_mode { - HID_LATENCY_NORMAL = 0, - HID_LATENCY_HIGH = 1, -@@ -169,6 +176,8 @@ struct mt_device { - - struct list_head applications; - struct list_head reports; -+ -+ struct notifier_block pm_notifier; - }; - - static void mt_post_parse_default_settings(struct mt_device *td, -@@ -213,6 +222,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); - #define MT_CLS_GOOGLE 0x0111 - #define MT_CLS_RAZER_BLADE_STEALTH 0x0112 - #define MT_CLS_SMART_TECH 0x0113 -+#define MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER 0x0114 - - #define MT_DEFAULT_MAXCONTACT 10 - #define MT_MAX_MAXCONTACT 250 -@@ -397,6 +407,16 @@ static const struct mt_class mt_classes[] = { - MT_QUIRK_CONTACT_CNT_ACCURATE | - MT_QUIRK_SEPARATE_APP_REPORT, - }, -+ { .name = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER, -+ .quirks = MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT | -+ MT_QUIRK_ALWAYS_VALID | -+ MT_QUIRK_IGNORE_DUPLICATES | -+ MT_QUIRK_HOVERING | -+ MT_QUIRK_CONTACT_CNT_ACCURATE | -+ MT_QUIRK_STICKY_FINGERS | -+ MT_QUIRK_WIN8_PTP_BUTTONS, -+ .export_all_inputs = true -+ }, - { } - }; - -@@ -1721,6 +1741,69 @@ static void mt_expired_timeout(struct timer_list *t) - clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); - } - -+static void get_type_cover_backlight_field(struct hid_device *hdev, -+ struct hid_field **field) -+{ -+ struct hid_report_enum *rep_enum; -+ struct hid_report *rep; -+ struct hid_field *cur_field; -+ int i, j; -+ -+ rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; -+ list_for_each_entry(rep, &rep_enum->report_list, list) { -+ for (i = 0; i < rep->maxfield; i++) { -+ cur_field = rep->field[i]; -+ -+ for (j = 0; j < cur_field->maxusage; j++) { -+ if (cur_field->usage[j].hid -+ == MS_TYPE_COVER_FEATURE_REPORT_USAGE) { -+ *field = cur_field; -+ return; -+ } -+ } -+ } -+ } -+} -+ -+static void update_keyboard_backlight(struct hid_device *hdev, bool enabled) -+{ -+ struct usb_device *udev = hid_to_usb_dev(hdev); -+ struct hid_field *field = NULL; -+ -+ /* Wake up the device in case it's already suspended */ -+ pm_runtime_get_sync(&udev->dev); -+ -+ get_type_cover_backlight_field(hdev, &field); -+ if (!field) { -+ hid_err(hdev, "couldn't find backlight field\n"); -+ goto out; -+ } -+ -+ field->value[field->index] = enabled ? 0x01ff00ff : 0x00ff00ff; -+ hid_hw_request(hdev, field->report, HID_REQ_SET_REPORT); -+ -+out: -+ pm_runtime_put_sync(&udev->dev); -+} -+ -+static int mt_pm_notifier(struct notifier_block *notifier, -+ unsigned long pm_event, -+ void *unused) -+{ -+ struct mt_device *td = -+ container_of(notifier, struct mt_device, pm_notifier); -+ struct hid_device *hdev = td->hdev; -+ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT) { -+ if (pm_event == PM_SUSPEND_PREPARE) -+ update_keyboard_backlight(hdev, 0); -+ else if (pm_event == PM_POST_SUSPEND) -+ update_keyboard_backlight(hdev, 1); -+ } -+ -+ return NOTIFY_DONE; -+} -+ - static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - { - int ret, i; -@@ -1744,6 +1827,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - td->inputmode_value = MT_INPUTMODE_TOUCHSCREEN; - hid_set_drvdata(hdev, td); - -+ td->pm_notifier.notifier_call = mt_pm_notifier; -+ register_pm_notifier(&td->pm_notifier); -+ - INIT_LIST_HEAD(&td->applications); - INIT_LIST_HEAD(&td->reports); - -@@ -1782,15 +1868,19 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) - timer_setup(&td->release_timer, mt_expired_timeout, 0); - - ret = hid_parse(hdev); -- if (ret != 0) -+ if (ret != 0) { -+ unregister_pm_notifier(&td->pm_notifier); - return ret; -+ } - - if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) - mt_fix_const_fields(hdev, HID_DG_CONTACTID); - - ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); -- if (ret) -+ if (ret) { -+ unregister_pm_notifier(&td->pm_notifier); - return ret; -+ } - - ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group); - if (ret) -@@ -1842,6 +1932,7 @@ static void mt_remove(struct hid_device *hdev) - { - struct mt_device *td = hid_get_drvdata(hdev); - -+ unregister_pm_notifier(&td->pm_notifier); - del_timer_sync(&td->release_timer); - - sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group); -@@ -2223,6 +2314,11 @@ static const struct hid_device_id mt_devices[] = { - MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, - USB_DEVICE_ID_XIROKU_CSR2) }, - -+ /* Microsoft Surface type cover */ -+ { .driver_data = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER, -+ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, -+ USB_VENDOR_ID_MICROSOFT, 0x09c0) }, -+ - /* Google MT devices */ - { .driver_data = MT_CLS_GOOGLE, - HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, --- -2.42.0 - -From 12427f01e38ebf653ccf44faefdcb92110c43c20 Mon Sep 17 00:00:00 2001 -From: PJungkamp -Date: Fri, 25 Feb 2022 12:04:25 +0100 -Subject: [PATCH] hid/multitouch: Add support for surface pro type cover tablet - switch - -The Surface Pro Type Cover has several non standard HID usages in it's -hid report descriptor. -I noticed that, upon folding the typecover back, a vendor specific range -of 4 32 bit integer hid usages is transmitted. -Only the first byte of the message seems to convey reliable information -about the keyboard state. - -0x22 => Normal (keys enabled) -0x33 => Folded back (keys disabled) -0x53 => Rotated left/right side up (keys disabled) -0x13 => Cover closed (keys disabled) -0x43 => Folded back and Tablet upside down (keys disabled) -This list may not be exhaustive. - -The tablet mode switch will be disabled for a value of 0x22 and enabled -on any other value. - -Patchset: surface-typecover ---- - drivers/hid/hid-multitouch.c | 148 +++++++++++++++++++++++++++++------ - 1 file changed, 122 insertions(+), 26 deletions(-) - -diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 99a5efef45258..6ae43ea90bcd5 100644 ---- a/drivers/hid/hid-multitouch.c -+++ b/drivers/hid/hid-multitouch.c -@@ -77,6 +77,7 @@ MODULE_LICENSE("GPL"); - #define MT_QUIRK_DISABLE_WAKEUP BIT(21) - #define MT_QUIRK_ORIENTATION_INVERT BIT(22) - #define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT BIT(23) -+#define MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH BIT(24) - - #define MT_INPUTMODE_TOUCHSCREEN 0x02 - #define MT_INPUTMODE_TOUCHPAD 0x03 -@@ -84,6 +85,8 @@ MODULE_LICENSE("GPL"); - #define MT_BUTTONTYPE_CLICKPAD 0 - - #define MS_TYPE_COVER_FEATURE_REPORT_USAGE 0xff050086 -+#define MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE 0xff050072 -+#define MS_TYPE_COVER_APPLICATION 0xff050050 - - enum latency_mode { - HID_LATENCY_NORMAL = 0, -@@ -409,6 +412,7 @@ static const struct mt_class mt_classes[] = { - }, - { .name = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER, - .quirks = MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT | -+ MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH | - MT_QUIRK_ALWAYS_VALID | - MT_QUIRK_IGNORE_DUPLICATES | - MT_QUIRK_HOVERING | -@@ -1390,6 +1394,9 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, - field->application != HID_CP_CONSUMER_CONTROL && - field->application != HID_GD_WIRELESS_RADIO_CTLS && - field->application != HID_GD_SYSTEM_MULTIAXIS && -+ !(field->application == MS_TYPE_COVER_APPLICATION && -+ application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) && - !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS && - application->quirks & MT_QUIRK_ASUS_CUSTOM_UP)) - return -1; -@@ -1417,6 +1424,21 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, - return 1; - } - -+ /* -+ * The Microsoft Surface Pro Typecover has a non-standard HID -+ * tablet mode switch on a vendor specific usage page with vendor -+ * specific usage. -+ */ -+ if (field->application == MS_TYPE_COVER_APPLICATION && -+ application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) { -+ usage->type = EV_SW; -+ usage->code = SW_TABLET_MODE; -+ *max = SW_MAX; -+ *bit = hi->input->swbit; -+ return 1; -+ } -+ - if (rdata->is_mt_collection) - return mt_touch_input_mapping(hdev, hi, field, usage, bit, max, - application); -@@ -1438,6 +1460,7 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi, - { - struct mt_device *td = hid_get_drvdata(hdev); - struct mt_report_data *rdata; -+ struct input_dev *input; - - rdata = mt_find_report_data(td, field->report); - if (rdata && rdata->is_mt_collection) { -@@ -1445,6 +1468,19 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi, - return -1; - } - -+ /* -+ * We own an input device which acts as a tablet mode switch for -+ * the Surface Pro Typecover. -+ */ -+ if (field->application == MS_TYPE_COVER_APPLICATION && -+ rdata->application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) { -+ input = hi->input; -+ input_set_capability(input, EV_SW, SW_TABLET_MODE); -+ input_report_switch(input, SW_TABLET_MODE, 0); -+ return -1; -+ } -+ - /* let hid-core decide for the others */ - return 0; - } -@@ -1454,11 +1490,21 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, - { - struct mt_device *td = hid_get_drvdata(hid); - struct mt_report_data *rdata; -+ struct input_dev *input; - - rdata = mt_find_report_data(td, field->report); - if (rdata && rdata->is_mt_collection) - return mt_touch_event(hid, field, usage, value); - -+ if (field->application == MS_TYPE_COVER_APPLICATION && -+ rdata->application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH && -+ usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) { -+ input = field->hidinput->input; -+ input_report_switch(input, SW_TABLET_MODE, (value & 0xFF) != 0x22); -+ input_sync(input); -+ return 1; -+ } -+ - return 0; - } - -@@ -1611,6 +1657,42 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app) - app->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; - } - -+static int get_type_cover_field(struct hid_report_enum *rep_enum, -+ struct hid_field **field, int usage) -+{ -+ struct hid_report *rep; -+ struct hid_field *cur_field; -+ int i, j; -+ -+ list_for_each_entry(rep, &rep_enum->report_list, list) { -+ for (i = 0; i < rep->maxfield; i++) { -+ cur_field = rep->field[i]; -+ if (cur_field->application != MS_TYPE_COVER_APPLICATION) -+ continue; -+ for (j = 0; j < cur_field->maxusage; j++) { -+ if (cur_field->usage[j].hid == usage) { -+ *field = cur_field; -+ return true; -+ } -+ } -+ } -+ } -+ return false; -+} -+ -+static void request_type_cover_tablet_mode_switch(struct hid_device *hdev) -+{ -+ struct hid_field *field; -+ -+ if (get_type_cover_field(&hdev->report_enum[HID_INPUT_REPORT], -+ &field, -+ MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE)) { -+ hid_hw_request(hdev, field->report, HID_REQ_GET_REPORT); -+ } else { -+ hid_err(hdev, "couldn't find tablet mode field\n"); -+ } -+} -+ - static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) - { - struct mt_device *td = hid_get_drvdata(hdev); -@@ -1659,6 +1741,13 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) - /* force BTN_STYLUS to allow tablet matching in udev */ - __set_bit(BTN_STYLUS, hi->input->keybit); - break; -+ case MS_TYPE_COVER_APPLICATION: -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) { -+ suffix = "Tablet Mode Switch"; -+ request_type_cover_tablet_mode_switch(hdev); -+ break; -+ } -+ fallthrough; - default: - suffix = "UNKNOWN"; - break; -@@ -1741,30 +1830,6 @@ static void mt_expired_timeout(struct timer_list *t) - clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); - } - --static void get_type_cover_backlight_field(struct hid_device *hdev, -- struct hid_field **field) --{ -- struct hid_report_enum *rep_enum; -- struct hid_report *rep; -- struct hid_field *cur_field; -- int i, j; -- -- rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; -- list_for_each_entry(rep, &rep_enum->report_list, list) { -- for (i = 0; i < rep->maxfield; i++) { -- cur_field = rep->field[i]; -- -- for (j = 0; j < cur_field->maxusage; j++) { -- if (cur_field->usage[j].hid -- == MS_TYPE_COVER_FEATURE_REPORT_USAGE) { -- *field = cur_field; -- return; -- } -- } -- } -- } --} -- - static void update_keyboard_backlight(struct hid_device *hdev, bool enabled) - { - struct usb_device *udev = hid_to_usb_dev(hdev); -@@ -1773,8 +1838,9 @@ static void update_keyboard_backlight(struct hid_device *hdev, bool enabled) - /* Wake up the device in case it's already suspended */ - pm_runtime_get_sync(&udev->dev); - -- get_type_cover_backlight_field(hdev, &field); -- if (!field) { -+ if (!get_type_cover_field(&hdev->report_enum[HID_FEATURE_REPORT], -+ &field, -+ MS_TYPE_COVER_FEATURE_REPORT_USAGE)) { - hid_err(hdev, "couldn't find backlight field\n"); - goto out; - } -@@ -1909,13 +1975,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state) - - static int mt_reset_resume(struct hid_device *hdev) - { -+ struct mt_device *td = hid_get_drvdata(hdev); -+ - mt_release_contacts(hdev); - mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true); -+ -+ /* Request an update on the typecover folding state on resume -+ * after reset. -+ */ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) -+ request_type_cover_tablet_mode_switch(hdev); -+ - return 0; - } - - static int mt_resume(struct hid_device *hdev) - { -+ struct mt_device *td = hid_get_drvdata(hdev); -+ - /* Some Elan legacy devices require SET_IDLE to be set on resume. - * It should be safe to send it to other devices too. - * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */ -@@ -1924,6 +2001,10 @@ static int mt_resume(struct hid_device *hdev) - - mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true); - -+ /* Request an update on the typecover folding state on resume. */ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) -+ request_type_cover_tablet_mode_switch(hdev); -+ - return 0; - } - #endif -@@ -1931,6 +2012,21 @@ static int mt_resume(struct hid_device *hdev) - static void mt_remove(struct hid_device *hdev) - { - struct mt_device *td = hid_get_drvdata(hdev); -+ struct hid_field *field; -+ struct input_dev *input; -+ -+ /* Reset tablet mode switch on disconnect. */ -+ if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) { -+ if (get_type_cover_field(&hdev->report_enum[HID_INPUT_REPORT], -+ &field, -+ MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE)) { -+ input = field->hidinput->input; -+ input_report_switch(input, SW_TABLET_MODE, 0); -+ input_sync(input); -+ } else { -+ hid_err(hdev, "couldn't find tablet mode field\n"); -+ } -+ } - - unregister_pm_notifier(&td->pm_notifier); - del_timer_sync(&td->release_timer); --- -2.42.0 - -From 151f9dba2f3d6d066d160128da109a0173a3ff4c Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sun, 19 Feb 2023 22:12:24 +0100 -Subject: [PATCH] PCI: Add quirk to prevent calling shutdown mehtod - -Work around buggy EFI firmware: On some Microsoft Surface devices -(Surface Pro 9 and Surface Laptop 5) the EFI ResetSystem call with -EFI_RESET_SHUTDOWN doesn't function properly. Instead of shutting the -system down, it returns and the system stays on. - -It turns out that this only happens after PCI shutdown callbacks ran for -specific devices. Excluding those devices from the shutdown process -makes the ResetSystem call work as expected. - -TODO: Maybe we can find a better way or the root cause of this? - -Not-Signed-off-by: Maximilian Luz -Patchset: surface-shutdown ---- - drivers/pci/pci-driver.c | 3 +++ - drivers/pci/quirks.c | 36 ++++++++++++++++++++++++++++++++++++ - include/linux/pci.h | 1 + - 3 files changed, 40 insertions(+) - -diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c -index 51ec9e7e784f0..40554890d7211 100644 ---- a/drivers/pci/pci-driver.c -+++ b/drivers/pci/pci-driver.c -@@ -507,6 +507,9 @@ static void pci_device_shutdown(struct device *dev) - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; - -+ if (pci_dev->no_shutdown) -+ return; -+ - pm_runtime_resume(dev); - - if (drv && drv->shutdown) -diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index ae95d0950..7a6d76c41 100644 ---- a/drivers/pci/quirks.c -+++ b/drivers/pci/quirks.c -@@ -6212,6 +6212,42 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node); - DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node); - DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node); - -+static const struct dmi_system_id no_shutdown_dmi_table[] = { -+ /* -+ * Systems on which some devices should not be touched during shutdown. -+ */ -+ { -+ .ident = "Microsoft Surface Pro 9", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Pro 9"), -+ }, -+ }, -+ { -+ .ident = "Microsoft Surface Laptop 5", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 5"), -+ }, -+ }, -+ {} -+}; -+ -+static void quirk_no_shutdown(struct pci_dev *dev) -+{ -+ if (!dmi_check_system(no_shutdown_dmi_table)) -+ return; -+ -+ dev->no_shutdown = 1; -+ pci_info(dev, "disabling shutdown ops for [%04x:%04x]\n", -+ dev->vendor, dev->device); -+} -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x461e, quirk_no_shutdown); // Thunderbolt 4 USB Controller -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x461f, quirk_no_shutdown); // Thunderbolt 4 PCI Express Root Port -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x462f, quirk_no_shutdown); // Thunderbolt 4 PCI Express Root Port -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x466d, quirk_no_shutdown); // Thunderbolt 4 NHI -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x46a8, quirk_no_shutdown); // GPU -+ - /* - * Devices known to require a longer delay before first config space access - * after reset recovery or resume from D3cold: - -diff --git a/include/linux/pci.h b/include/linux/pci.h -index 8c7c2c3c6c652..0c223b04dff91 100644 ---- a/include/linux/pci.h -+++ b/include/linux/pci.h -@@ -465,6 +465,7 @@ struct pci_dev { - unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ - unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ - unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */ -+ unsigned int no_shutdown:1; /* Do not touch device on shutdown */ - pci_dev_flags_t dev_flags; - atomic_t enable_cnt; /* pci_enable_device has been called */ - --- -2.42.0 - -From 912e956823b3cadd7203d3ce94418d162ff701be Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Sun, 12 Mar 2023 01:41:57 +0100 -Subject: [PATCH] platform/surface: gpe: Add support for Surface Pro 9 - -Add the lid GPE used by the Surface Pro 9. - -Signed-off-by: Maximilian Luz -Patchset: surface-gpe ---- - drivers/platform/surface/surface_gpe.c | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c -index c219b840d491a..69c4352e8406b 100644 ---- a/drivers/platform/surface/surface_gpe.c -+++ b/drivers/platform/surface/surface_gpe.c -@@ -41,6 +41,11 @@ static const struct property_entry lid_device_props_l4F[] = { - {}, - }; - -+static const struct property_entry lid_device_props_l52[] = { -+ PROPERTY_ENTRY_U32("gpe", 0x52), -+ {}, -+}; -+ - static const struct property_entry lid_device_props_l57[] = { - PROPERTY_ENTRY_U32("gpe", 0x57), - {}, -@@ -107,6 +112,18 @@ static const struct dmi_system_id dmi_lid_device_table[] = { - }, - .driver_data = (void *)lid_device_props_l4B, - }, -+ { -+ /* -+ * We match for SKU here due to product name clash with the ARM -+ * version. -+ */ -+ .ident = "Surface Pro 9", -+ .matches = { -+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_9_2038"), -+ }, -+ .driver_data = (void *)lid_device_props_l52, -+ }, - { - .ident = "Surface Book 1", - .matches = { --- -2.42.0 - -From df083025f8c63824279c19de8ec3339440f819c9 Mon Sep 17 00:00:00 2001 -From: Hans de Goede -Date: Sun, 10 Oct 2021 20:56:57 +0200 -Subject: [PATCH] ACPI: delay enumeration of devices with a _DEP pointing to an - INT3472 device - -The clk and regulator frameworks expect clk/regulator consumer-devices -to have info about the consumed clks/regulators described in the device's -fw_node. - -To work around cases where this info is not present in the firmware tables, -which is often the case on x86/ACPI devices, both frameworks allow the -provider-driver to attach info about consumers to the clks/regulators -when registering these. - -This causes problems with the probe ordering wrt drivers for consumers -of these clks/regulators. Since the lookups are only registered when the -provider-driver binds, trying to get these clks/regulators before then -results in a -ENOENT error for clks and a dummy regulator for regulators. - -One case where we hit this issue is camera sensors such as e.g. the OV8865 -sensor found on the Microsoft Surface Go. The sensor uses clks, regulators -and GPIOs provided by a TPS68470 PMIC which is described in an INT3472 -ACPI device. There is special platform code handling this and setting -platform_data with the necessary consumer info on the MFD cells -instantiated for the PMIC under: drivers/platform/x86/intel/int3472. - -For this to work properly the ov8865 driver must not bind to the I2C-client -for the OV8865 sensor until after the TPS68470 PMIC gpio, regulator and -clk MFD cells have all been fully setup. - -The OV8865 on the Microsoft Surface Go is just one example, all X86 -devices using the Intel IPU3 camera block found on recent Intel SoCs -have similar issues where there is an INT3472 HID ACPI-device, which -describes the clks and regulators, and the driver for this INT3472 device -must be fully initialized before the sensor driver (any sensor driver) -binds for things to work properly. - -On these devices the ACPI nodes describing the sensors all have a _DEP -dependency on the matching INT3472 ACPI device (there is one per sensor). - -This allows solving the probe-ordering problem by delaying the enumeration -(instantiation of the I2C-client in the ov8865 example) of ACPI-devices -which have a _DEP dependency on an INT3472 device. - -The new acpi_dev_ready_for_enumeration() helper used for this is also -exported because for devices, which have the enumeration_by_parent flag -set, the parent-driver will do its own scan of child ACPI devices and -it will try to enumerate those during its probe(). Code doing this such -as e.g. the i2c-core-acpi.c code must call this new helper to ensure -that it too delays the enumeration until all the _DEP dependencies are -met on devices which have the new honor_deps flag set. - -Signed-off-by: Hans de Goede -Patchset: cameras ---- - drivers/acpi/scan.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c -index 691d4b7686ee7..9283217689279 100644 ---- a/drivers/acpi/scan.c -+++ b/drivers/acpi/scan.c -@@ -2108,6 +2108,9 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used, - - static void acpi_default_enumeration(struct acpi_device *device) - { -+ if (!acpi_dev_ready_for_enumeration(device)) -+ return; -+ - /* - * Do not enumerate devices with enumeration_by_parent flag set as - * they will be enumerated by their respective parents. --- -2.42.0 - -From 87650a001d3068a8b614fd688e21bb87c2d3a3e6 Mon Sep 17 00:00:00 2001 -From: zouxiaoh -Date: Fri, 25 Jun 2021 08:52:59 +0800 -Subject: [PATCH] iommu: intel-ipu: use IOMMU passthrough mode for Intel IPUs - -Intel IPU(Image Processing Unit) has its own (IO)MMU hardware, -The IPU driver allocates its own page table that is not mapped -via the DMA, and thus the Intel IOMMU driver blocks access giving -this error: DMAR: DRHD: handling fault status reg 3 DMAR: -[DMA Read] Request device [00:05.0] PASID ffffffff -fault addr 76406000 [fault reason 06] PTE Read access is not set -As IPU is not an external facing device which is not risky, so use -IOMMU passthrough mode for Intel IPUs. - -Change-Id: I6dcccdadac308cf42e20a18e1b593381391e3e6b -Depends-On: Iacd67578e8c6a9b9ac73285f52b4081b72fb68a6 -Tracked-On: #JIITL8-411 -Signed-off-by: Bingbu Cao -Signed-off-by: zouxiaoh -Signed-off-by: Xu Chongyang -Patchset: cameras ---- - drivers/iommu/intel/iommu.c | 30 ++++++++++++++++++++++++++++++ - 1 file changed, 30 insertions(+) - -diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index 5a627e081797c..da866ac6b30ba 100644 ---- a/drivers/iommu/intel/iommu.c -+++ b/drivers/iommu/intel/iommu.c -@@ -38,6 +38,12 @@ - #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) - #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) - #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) -+#define IS_INTEL_IPU(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL && \ -+ ((pdev)->device == 0x9a19 || \ -+ (pdev)->device == 0x9a39 || \ -+ (pdev)->device == 0x4e19 || \ -+ (pdev)->device == 0x465d || \ -+ (pdev)->device == 0x1919)) - #define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL && \ - ((pdev)->device == 0x9d3e)) - #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) -@@ -295,12 +301,14 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled); - - static int dmar_map_gfx = 1; - static int dmar_map_ipts = 1; -+static int dmar_map_ipu = 1; - static int intel_iommu_superpage = 1; - static int iommu_identity_mapping; - static int iommu_skip_te_disable; - - #define IDENTMAP_GFX 2 - #define IDENTMAP_AZALIA 4 -+#define IDENTMAP_IPU 8 - #define IDENTMAP_IPTS 16 - - const struct iommu_ops intel_iommu_ops; -@@ -2547,6 +2555,9 @@ static int device_def_domain_type(struct device *dev) - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return IOMMU_DOMAIN_IDENTITY; - -+ if ((iommu_identity_mapping & IDENTMAP_IPU) && IS_INTEL_IPU(pdev)) -+ return IOMMU_DOMAIN_IDENTITY; -+ - if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev)) - return IOMMU_DOMAIN_IDENTITY; - } -@@ -2856,6 +2867,9 @@ static int __init init_dmars(void) - if (!dmar_map_gfx) - iommu_identity_mapping |= IDENTMAP_GFX; - -+ if (!dmar_map_ipu) -+ iommu_identity_mapping |= IDENTMAP_IPU; -+ - if (!dmar_map_ipts) - iommu_identity_mapping |= IDENTMAP_IPTS; - -@@ -4838,6 +4852,18 @@ static void quirk_iommu_igfx(struct pci_dev *dev) - dmar_map_gfx = 0; - } - -+static void quirk_iommu_ipu(struct pci_dev *dev) -+{ -+ if (!IS_INTEL_IPU(dev)) -+ return; -+ -+ if (risky_device(dev)) -+ return; -+ -+ pci_info(dev, "Passthrough IOMMU for integrated Intel IPU\n"); -+ dmar_map_ipu = 0; -+} -+ - static void quirk_iommu_ipts(struct pci_dev *dev) - { - if (!IS_IPTS(dev)) -@@ -4849,6 +4875,7 @@ static void quirk_iommu_ipts(struct pci_dev *dev) - pci_info(dev, "Passthrough IOMMU for IPTS\n"); - dmar_map_ipts = 0; - } -+ - /* G4x/GM45 integrated gfx dmar support is totally busted. */ - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); -@@ -4884,6 +4911,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); - -+/* disable IPU dmar support */ -+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_iommu_ipu); -+ - /* disable IPTS dmar support */ - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts); - --- -2.42.0 - -From 76fec27d978bf7708a60862d4aab2e1fe7ec3f27 Mon Sep 17 00:00:00 2001 -From: Daniel Scally -Date: Sun, 10 Oct 2021 20:57:02 +0200 -Subject: [PATCH] platform/x86: int3472: Enable I2c daisy chain - -The TPS68470 PMIC has an I2C passthrough mode through which I2C traffic -can be forwarded to a device connected to the PMIC as though it were -connected directly to the system bus. Enable this mode when the chip -is initialised. - -Signed-off-by: Daniel Scally -Patchset: cameras ---- - drivers/platform/x86/intel/int3472/tps68470.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c -index 1e107fd49f828..e3e1696e7f0ee 100644 ---- a/drivers/platform/x86/intel/int3472/tps68470.c -+++ b/drivers/platform/x86/intel/int3472/tps68470.c -@@ -46,6 +46,13 @@ static int tps68470_chip_init(struct device *dev, struct regmap *regmap) - return ret; - } - -+ /* Enable I2C daisy chain */ -+ ret = regmap_write(regmap, TPS68470_REG_S_I2C_CTL, 0x03); -+ if (ret) { -+ dev_err(dev, "Failed to enable i2c daisy chain\n"); -+ return ret; -+ } -+ - dev_info(dev, "TPS68470 REVID: 0x%02x\n", version); - - return 0; --- -2.42.0 - -From 232a0f88ecc21141c6f0d94cc74eb63c7869c217 Mon Sep 17 00:00:00 2001 -From: Daniel Scally -Date: Thu, 2 Mar 2023 12:59:39 +0000 -Subject: [PATCH] platform/x86: int3472: Remap reset GPIO for INT347E - -ACPI _HID INT347E represents the OmniVision 7251 camera sensor. The -driver for this sensor expects a single pin named "enable", but on -some Microsoft Surface platforms the sensor is assigned a single -GPIO who's type flag is INT3472_GPIO_TYPE_RESET. - -Remap the GPIO pin's function from "reset" to "enable". This is done -outside of the existing remap table since it is a more widespread -discrepancy than that method is designed for. Additionally swap the -polarity of the pin to match the driver's expectation. - -Signed-off-by: Daniel Scally -Patchset: cameras ---- - drivers/platform/x86/intel/int3472/discrete.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c -index e33c2d75975cf..c0c90ae66b705 100644 ---- a/drivers/platform/x86/intel/int3472/discrete.c -+++ b/drivers/platform/x86/intel/int3472/discrete.c -@@ -57,6 +57,9 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 - const char *func, u32 polarity) - { - char *path = agpio->resource_source.string_ptr; -+ const struct acpi_device_id ov7251_ids[] = { -+ { "INT347E" }, -+ }; - struct gpiod_lookup *table_entry; - struct acpi_device *adev; - acpi_handle handle; -@@ -67,6 +70,17 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 - return -EINVAL; - } - -+ /* -+ * In addition to the function remap table we need to bulk remap the -+ * "reset" GPIO for the OmniVision 7251 sensor, as the driver for that -+ * expects its only GPIO pin to be called "enable" (and to have the -+ * opposite polarity). -+ */ -+ if (!strcmp(func, "reset") && !acpi_match_device_ids(int3472->sensor, ov7251_ids)) { -+ func = "enable"; -+ polarity = GPIO_ACTIVE_HIGH; -+ } -+ - status = acpi_get_handle(NULL, path, &handle); - if (ACPI_FAILURE(status)) - return -EINVAL; --- -2.42.0 - -From 0cfd5c05a675388bbb2edfa87423dc5ad931cc97 Mon Sep 17 00:00:00 2001 -From: Daniel Scally -Date: Tue, 21 Mar 2023 13:45:26 +0000 -Subject: [PATCH] media: i2c: Clarify that gain is Analogue gain in OV7251 - -Update the control ID for the gain control in the ov7251 driver to -V4L2_CID_ANALOGUE_GAIN. - -Signed-off-by: Daniel Scally -Patchset: cameras ---- - drivers/media/i2c/ov7251.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/media/i2c/ov7251.c b/drivers/media/i2c/ov7251.c -index 675fb37a6feae..43b30db08c9e4 100644 ---- a/drivers/media/i2c/ov7251.c -+++ b/drivers/media/i2c/ov7251.c -@@ -1051,7 +1051,7 @@ static int ov7251_s_ctrl(struct v4l2_ctrl *ctrl) - case V4L2_CID_EXPOSURE: - ret = ov7251_set_exposure(ov7251, ctrl->val); - break; -- case V4L2_CID_GAIN: -+ case V4L2_CID_ANALOGUE_GAIN: - ret = ov7251_set_gain(ov7251, ctrl->val); - break; - case V4L2_CID_TEST_PATTERN: -@@ -1551,7 +1551,7 @@ static int ov7251_init_ctrls(struct ov7251 *ov7251) - ov7251->exposure = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops, - V4L2_CID_EXPOSURE, 1, 32, 1, 32); - ov7251->gain = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops, -- V4L2_CID_GAIN, 16, 1023, 1, 16); -+ V4L2_CID_ANALOGUE_GAIN, 16, 1023, 1, 16); - v4l2_ctrl_new_std_menu_items(&ov7251->ctrls, &ov7251_ctrl_ops, - V4L2_CID_TEST_PATTERN, - ARRAY_SIZE(ov7251_test_pattern_menu) - 1, --- -2.42.0 - -From 18fa273c21f1dd86160f18242a81947392272443 Mon Sep 17 00:00:00 2001 -From: Daniel Scally -Date: Wed, 22 Mar 2023 11:01:42 +0000 -Subject: [PATCH] media: v4l2-core: Acquire privacy led in - v4l2_async_register_subdev() - -The current call to v4l2_subdev_get_privacy_led() is contained in -v4l2_async_register_subdev_sensor(), but that function isn't used by -all the sensor drivers. Move the acquisition of the privacy led to -v4l2_async_register_subdev() instead. - -Signed-off-by: Daniel Scally -Patchset: cameras ---- - drivers/media/v4l2-core/v4l2-async.c | 4 ++++ - drivers/media/v4l2-core/v4l2-fwnode.c | 4 ---- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c -index 091e8cf4114ba..cca10f5355844 100644 ---- a/drivers/media/v4l2-core/v4l2-async.c -+++ b/drivers/media/v4l2-core/v4l2-async.c -@@ -796,6 +796,10 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd) - - INIT_LIST_HEAD(&sd->asc_list); - -+ ret = v4l2_subdev_get_privacy_led(sd); -+ if (ret < 0) -+ return ret; -+ - /* - * No reference taken. The reference is held by the device (struct - * v4l2_subdev.dev), and async sub-device does not exist independently -diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c -index 7f181fbbb1407..1c0347de4e216 100644 ---- a/drivers/media/v4l2-core/v4l2-fwnode.c -+++ b/drivers/media/v4l2-core/v4l2-fwnode.c -@@ -1217,10 +1217,6 @@ int v4l2_async_register_subdev_sensor(struct v4l2_subdev *sd) - - v4l2_async_subdev_nf_init(notifier, sd); - -- ret = v4l2_subdev_get_privacy_led(sd); -- if (ret < 0) -- goto out_cleanup; -- - ret = v4l2_async_nf_parse_fwnode_sensor(sd->dev, notifier); - if (ret < 0) - goto out_cleanup; --- -2.42.0 - -From 07e01113f2641afab78b155d42e9d9d399a9e164 Mon Sep 17 00:00:00 2001 -From: Kate Hsuan -Date: Tue, 21 Mar 2023 23:37:16 +0800 -Subject: [PATCH] platform: x86: int3472: Add MFD cell for tps68470 LED - -Add MFD cell for tps68470-led. - -Reviewed-by: Daniel Scally -Signed-off-by: Kate Hsuan -Reviewed-by: Hans de Goede -Patchset: cameras ---- - drivers/platform/x86/intel/int3472/tps68470.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c -index e3e1696e7f0ee..423dc555093f7 100644 ---- a/drivers/platform/x86/intel/int3472/tps68470.c -+++ b/drivers/platform/x86/intel/int3472/tps68470.c -@@ -17,7 +17,7 @@ - #define DESIGNED_FOR_CHROMEOS 1 - #define DESIGNED_FOR_WINDOWS 2 - --#define TPS68470_WIN_MFD_CELL_COUNT 3 -+#define TPS68470_WIN_MFD_CELL_COUNT 4 - - static const struct mfd_cell tps68470_cros[] = { - { .name = "tps68470-gpio" }, -@@ -200,7 +200,8 @@ static int skl_int3472_tps68470_probe(struct i2c_client *client) - cells[1].name = "tps68470-regulator"; - cells[1].platform_data = (void *)board_data->tps68470_regulator_pdata; - cells[1].pdata_size = sizeof(struct tps68470_regulator_platform_data); -- cells[2].name = "tps68470-gpio"; -+ cells[2].name = "tps68470-led"; -+ cells[3].name = "tps68470-gpio"; - - for (i = 0; i < board_data->n_gpiod_lookups; i++) - gpiod_add_lookup_table(board_data->tps68470_gpio_lookup_tables[i]); --- -2.42.0 - -From a704bf822539e09b00015110b48bc997692c92ce Mon Sep 17 00:00:00 2001 -From: Kate Hsuan -Date: Tue, 21 Mar 2023 23:37:17 +0800 -Subject: [PATCH] include: mfd: tps68470: Add masks for LEDA and LEDB - -Add flags for both LEDA(TPS68470_ILEDCTL_ENA), LEDB -(TPS68470_ILEDCTL_ENB), and current control mask for LEDB -(TPS68470_ILEDCTL_CTRLB) - -Reviewed-by: Daniel Scally -Reviewed-by: Hans de Goede -Signed-off-by: Kate Hsuan -Patchset: cameras ---- - include/linux/mfd/tps68470.h | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h -index 7807fa329db00..2d2abb25b944f 100644 ---- a/include/linux/mfd/tps68470.h -+++ b/include/linux/mfd/tps68470.h -@@ -34,6 +34,7 @@ - #define TPS68470_REG_SGPO 0x22 - #define TPS68470_REG_GPDI 0x26 - #define TPS68470_REG_GPDO 0x27 -+#define TPS68470_REG_ILEDCTL 0x28 - #define TPS68470_REG_VCMVAL 0x3C - #define TPS68470_REG_VAUX1VAL 0x3D - #define TPS68470_REG_VAUX2VAL 0x3E -@@ -94,4 +95,8 @@ - #define TPS68470_GPIO_MODE_OUT_CMOS 2 - #define TPS68470_GPIO_MODE_OUT_ODRAIN 3 - -+#define TPS68470_ILEDCTL_ENA BIT(2) -+#define TPS68470_ILEDCTL_ENB BIT(6) -+#define TPS68470_ILEDCTL_CTRLB GENMASK(5, 4) -+ - #endif /* __LINUX_MFD_TPS68470_H */ --- -2.42.0 - -From c8a6ce96be3a4dca7e9e99613b28494d10b4ade0 Mon Sep 17 00:00:00 2001 -From: Kate Hsuan -Date: Tue, 21 Mar 2023 23:37:18 +0800 -Subject: [PATCH] leds: tps68470: Add LED control for tps68470 - -There are two LED controllers, LEDA indicator LED and LEDB flash LED for -tps68470. LEDA can be enabled by setting TPS68470_ILEDCTL_ENA. Moreover, -tps68470 provides four levels of power status for LEDB. If the -properties called "ti,ledb-current" can be found, the current will be -set according to the property values. These two LEDs can be controlled -through the LED class of sysfs (tps68470-leda and tps68470-ledb). - -Signed-off-by: Kate Hsuan -Reviewed-by: Hans de Goede -Patchset: cameras ---- - drivers/leds/Kconfig | 12 +++ - drivers/leds/Makefile | 1 + - drivers/leds/leds-tps68470.c | 185 +++++++++++++++++++++++++++++++++++ - 3 files changed, 198 insertions(+) - create mode 100644 drivers/leds/leds-tps68470.c - -diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig -index b92208eccdea9..312c0c21cc5ef 100644 ---- a/drivers/leds/Kconfig -+++ b/drivers/leds/Kconfig -@@ -873,6 +873,18 @@ config LEDS_TPS6105X - It is a single boost converter primarily for white LEDs and - audio amplifiers. - -+config LEDS_TPS68470 -+ tristate "LED support for TI TPS68470" -+ depends on LEDS_CLASS -+ depends on INTEL_SKL_INT3472 -+ help -+ This driver supports TPS68470 PMIC with LED chip. -+ It provides two LED controllers, with the ability to drive 2 -+ indicator LEDs and 2 flash LEDs. -+ -+ To compile this driver as a module, choose M and it will be -+ called leds-tps68470 -+ - config LEDS_IP30 - tristate "LED support for SGI Octane machines" - depends on LEDS_CLASS -diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile -index d7348e8bc019a..10caea4e7c614 100644 ---- a/drivers/leds/Makefile -+++ b/drivers/leds/Makefile -@@ -84,6 +84,7 @@ obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o - obj-$(CONFIG_LEDS_TI_LMU_COMMON) += leds-ti-lmu-common.o - obj-$(CONFIG_LEDS_TLC591XX) += leds-tlc591xx.o - obj-$(CONFIG_LEDS_TPS6105X) += leds-tps6105x.o -+obj-$(CONFIG_LEDS_TPS68470) += leds-tps68470.o - obj-$(CONFIG_LEDS_TURRIS_OMNIA) += leds-turris-omnia.o - obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o - obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o -diff --git a/drivers/leds/leds-tps68470.c b/drivers/leds/leds-tps68470.c -new file mode 100644 -index 0000000000000..35aeb5db89c8f ---- /dev/null -+++ b/drivers/leds/leds-tps68470.c -@@ -0,0 +1,185 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * LED driver for TPS68470 PMIC -+ * -+ * Copyright (C) 2023 Red Hat -+ * -+ * Authors: -+ * Kate Hsuan -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#define lcdev_to_led(led_cdev) \ -+ container_of(led_cdev, struct tps68470_led, lcdev) -+ -+#define led_to_tps68470(led, index) \ -+ container_of(led, struct tps68470_device, leds[index]) -+ -+enum tps68470_led_ids { -+ TPS68470_ILED_A, -+ TPS68470_ILED_B, -+ TPS68470_NUM_LEDS -+}; -+ -+static const char *tps68470_led_names[] = { -+ [TPS68470_ILED_A] = "tps68470-iled_a", -+ [TPS68470_ILED_B] = "tps68470-iled_b", -+}; -+ -+struct tps68470_led { -+ unsigned int led_id; -+ struct led_classdev lcdev; -+}; -+ -+struct tps68470_device { -+ struct device *dev; -+ struct regmap *regmap; -+ struct tps68470_led leds[TPS68470_NUM_LEDS]; -+}; -+ -+enum ctrlb_current { -+ CTRLB_2MA = 0, -+ CTRLB_4MA = 1, -+ CTRLB_8MA = 2, -+ CTRLB_16MA = 3, -+}; -+ -+static int tps68470_brightness_set(struct led_classdev *led_cdev, enum led_brightness brightness) -+{ -+ struct tps68470_led *led = lcdev_to_led(led_cdev); -+ struct tps68470_device *tps68470 = led_to_tps68470(led, led->led_id); -+ struct regmap *regmap = tps68470->regmap; -+ -+ switch (led->led_id) { -+ case TPS68470_ILED_A: -+ return regmap_update_bits(regmap, TPS68470_REG_ILEDCTL, TPS68470_ILEDCTL_ENA, -+ brightness ? TPS68470_ILEDCTL_ENA : 0); -+ case TPS68470_ILED_B: -+ return regmap_update_bits(regmap, TPS68470_REG_ILEDCTL, TPS68470_ILEDCTL_ENB, -+ brightness ? TPS68470_ILEDCTL_ENB : 0); -+ } -+ return -EINVAL; -+} -+ -+static enum led_brightness tps68470_brightness_get(struct led_classdev *led_cdev) -+{ -+ struct tps68470_led *led = lcdev_to_led(led_cdev); -+ struct tps68470_device *tps68470 = led_to_tps68470(led, led->led_id); -+ struct regmap *regmap = tps68470->regmap; -+ int ret = 0; -+ int value = 0; -+ -+ ret = regmap_read(regmap, TPS68470_REG_ILEDCTL, &value); -+ if (ret) -+ return dev_err_probe(led_cdev->dev, -EINVAL, "failed on reading register\n"); -+ -+ switch (led->led_id) { -+ case TPS68470_ILED_A: -+ value = value & TPS68470_ILEDCTL_ENA; -+ break; -+ case TPS68470_ILED_B: -+ value = value & TPS68470_ILEDCTL_ENB; -+ break; -+ } -+ -+ return value ? LED_ON : LED_OFF; -+} -+ -+ -+static int tps68470_ledb_current_init(struct platform_device *pdev, -+ struct tps68470_device *tps68470) -+{ -+ int ret = 0; -+ unsigned int curr; -+ -+ /* configure LEDB current if the properties can be got */ -+ if (!device_property_read_u32(&pdev->dev, "ti,ledb-current", &curr)) { -+ if (curr > CTRLB_16MA) { -+ dev_err(&pdev->dev, -+ "Invalid LEDB current value: %d\n", -+ curr); -+ return -EINVAL; -+ } -+ ret = regmap_update_bits(tps68470->regmap, TPS68470_REG_ILEDCTL, -+ TPS68470_ILEDCTL_CTRLB, curr); -+ } -+ return ret; -+} -+ -+static int tps68470_leds_probe(struct platform_device *pdev) -+{ -+ int i = 0; -+ int ret = 0; -+ struct tps68470_device *tps68470; -+ struct tps68470_led *led; -+ struct led_classdev *lcdev; -+ -+ tps68470 = devm_kzalloc(&pdev->dev, sizeof(struct tps68470_device), -+ GFP_KERNEL); -+ if (!tps68470) -+ return -ENOMEM; -+ -+ tps68470->dev = &pdev->dev; -+ tps68470->regmap = dev_get_drvdata(pdev->dev.parent); -+ -+ for (i = 0; i < TPS68470_NUM_LEDS; i++) { -+ led = &tps68470->leds[i]; -+ lcdev = &led->lcdev; -+ -+ led->led_id = i; -+ -+ lcdev->name = devm_kasprintf(tps68470->dev, GFP_KERNEL, "%s::%s", -+ tps68470_led_names[i], LED_FUNCTION_INDICATOR); -+ if (!lcdev->name) -+ return -ENOMEM; -+ -+ lcdev->max_brightness = 1; -+ lcdev->brightness = 0; -+ lcdev->brightness_set_blocking = tps68470_brightness_set; -+ lcdev->brightness_get = tps68470_brightness_get; -+ lcdev->dev = &pdev->dev; -+ -+ ret = devm_led_classdev_register(tps68470->dev, lcdev); -+ if (ret) { -+ dev_err_probe(tps68470->dev, ret, -+ "error registering led\n"); -+ goto err_exit; -+ } -+ -+ if (i == TPS68470_ILED_B) { -+ ret = tps68470_ledb_current_init(pdev, tps68470); -+ if (ret) -+ goto err_exit; -+ } -+ } -+ -+err_exit: -+ if (ret) { -+ for (i = 0; i < TPS68470_NUM_LEDS; i++) { -+ if (tps68470->leds[i].lcdev.name) -+ devm_led_classdev_unregister(&pdev->dev, -+ &tps68470->leds[i].lcdev); -+ } -+ } -+ -+ return ret; -+} -+static struct platform_driver tps68470_led_driver = { -+ .driver = { -+ .name = "tps68470-led", -+ }, -+ .probe = tps68470_leds_probe, -+}; -+ -+module_platform_driver(tps68470_led_driver); -+ -+MODULE_ALIAS("platform:tps68470-led"); -+MODULE_DESCRIPTION("LED driver for TPS68470 PMIC"); -+MODULE_LICENSE("GPL v2"); --- -2.42.0 - -From 82252c3764ecee6c09218077759072f15001f9ee Mon Sep 17 00:00:00 2001 -From: Sachi King -Date: Sat, 29 May 2021 17:47:38 +1000 -Subject: [PATCH] ACPI: Add quirk for Surface Laptop 4 AMD missing irq 7 - override - -This patch is the work of Thomas Gleixner and is -copied from: -https://lore.kernel.org/lkml/87lf8ddjqx.ffs@nanos.tec.linutronix.de/ - -This patch adds a quirk to the ACPI setup to patch in the the irq 7 pin -setup that is missing in the laptops ACPI table. - -This patch was used for validation of the issue, and is not a proper -fix, but is probably a better temporary hack than continuing to probe -the Legacy PIC and run with the PIC in an unknown state. - -Patchset: amd-gpio ---- - arch/x86/kernel/acpi/boot.c | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index c55c0ef47a187..f29740cf89ff6 100644 ---- a/arch/x86/kernel/acpi/boot.c -+++ b/arch/x86/kernel/acpi/boot.c -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -1255,6 +1256,17 @@ static void __init mp_config_acpi_legacy_irqs(void) - } - } - -+static const struct dmi_system_id surface_quirk[] __initconst = { -+ { -+ .ident = "Microsoft Surface Laptop 4 (AMD)", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1952:1953") -+ }, -+ }, -+ {} -+}; -+ - /* - * Parse IOAPIC related entries in MADT - * returns 0 on success, < 0 on error -@@ -1310,6 +1322,11 @@ static int __init acpi_parse_madt_ioapic_entries(void) - acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, - acpi_gbl_FADT.sci_interrupt); - -+ if (dmi_check_system(surface_quirk)) { -+ pr_warn("Surface hack: Override irq 7\n"); -+ mp_override_legacy_irq(7, 3, 3, 7); -+ } -+ - /* Fill in identity legacy mappings where no override */ - mp_config_acpi_legacy_irqs(); - --- -2.42.0 - -From 52e3f50633128a93bf99ca5c97f98929da66a9ed Mon Sep 17 00:00:00 2001 -From: Maximilian Luz -Date: Thu, 3 Jun 2021 14:04:26 +0200 -Subject: [PATCH] ACPI: Add AMD 13" Surface Laptop 4 model to irq 7 override - quirk - -The 13" version of the Surface Laptop 4 has the same problem as the 15" -version, but uses a different SKU. Add that SKU to the quirk as well. - -Patchset: amd-gpio ---- - arch/x86/kernel/acpi/boot.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index f29740cf89ff6..247d2a8bcdf4b 100644 ---- a/arch/x86/kernel/acpi/boot.c -+++ b/arch/x86/kernel/acpi/boot.c -@@ -1258,12 +1258,19 @@ static void __init mp_config_acpi_legacy_irqs(void) - - static const struct dmi_system_id surface_quirk[] __initconst = { - { -- .ident = "Microsoft Surface Laptop 4 (AMD)", -+ .ident = "Microsoft Surface Laptop 4 (AMD 15\")", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), - DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1952:1953") - }, - }, -+ { -+ .ident = "Microsoft Surface Laptop 4 (AMD 13\")", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), -+ DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1958:1959") -+ }, -+ }, - {} - }; - --- -2.42.0 - -From 8cd23b1bb3a8b7a3ef7cec2c37e7e46e6397a858 Mon Sep 17 00:00:00 2001 -From: "Bart Groeneveld | GPX Solutions B.V" -Date: Mon, 5 Dec 2022 16:08:46 +0100 -Subject: [PATCH] acpi: allow usage of acpi_tad on HW-reduced platforms - -The specification [1] allows so-called HW-reduced platforms, -which do not implement everything, especially the wakeup related stuff. - -In that case, it is still usable as a RTC. This is helpful for [2] -and [3], which is about a device with no other working RTC, -but it does have an HW-reduced TAD, which can be used as a RTC instead. - -[1]: https://uefi.org/specs/ACPI/6.5/09_ACPI_Defined_Devices_and_Device_Specific_Objects.html#time-and-alarm-device -[2]: https://bugzilla.kernel.org/show_bug.cgi?id=212313 -[3]: https://github.com/linux-surface/linux-surface/issues/415 - -Signed-off-by: Bart Groeneveld | GPX Solutions B.V. -Patchset: rtc ---- - drivers/acpi/acpi_tad.c | 36 ++++++++++++++++++++++++------------ - 1 file changed, 24 insertions(+), 12 deletions(-) - -diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c -index 33c3b16af556b..900445d06623d 100644 ---- a/drivers/acpi/acpi_tad.c -+++ b/drivers/acpi/acpi_tad.c -@@ -432,6 +432,14 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr, - - static DEVICE_ATTR_RO(caps); - -+static struct attribute *acpi_tad_attrs[] = { -+ &dev_attr_caps.attr, -+ NULL, -+}; -+static const struct attribute_group acpi_tad_attr_group = { -+ .attrs = acpi_tad_attrs, -+}; -+ - static ssize_t ac_alarm_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) - { -@@ -480,15 +488,14 @@ static ssize_t ac_status_show(struct device *dev, struct device_attribute *attr, - - static DEVICE_ATTR_RW(ac_status); - --static struct attribute *acpi_tad_attrs[] = { -- &dev_attr_caps.attr, -+static struct attribute *acpi_tad_ac_attrs[] = { - &dev_attr_ac_alarm.attr, - &dev_attr_ac_policy.attr, - &dev_attr_ac_status.attr, - NULL, - }; --static const struct attribute_group acpi_tad_attr_group = { -- .attrs = acpi_tad_attrs, -+static const struct attribute_group acpi_tad_ac_attr_group = { -+ .attrs = acpi_tad_ac_attrs, - }; - - static ssize_t dc_alarm_store(struct device *dev, struct device_attribute *attr, -@@ -564,13 +571,18 @@ static int acpi_tad_remove(struct platform_device *pdev) - - pm_runtime_get_sync(dev); - -+ if (dd->capabilities & ACPI_TAD_AC_WAKE) -+ sysfs_remove_group(&dev->kobj, &acpi_tad_ac_attr_group); -+ - if (dd->capabilities & ACPI_TAD_DC_WAKE) - sysfs_remove_group(&dev->kobj, &acpi_tad_dc_attr_group); - - sysfs_remove_group(&dev->kobj, &acpi_tad_attr_group); - -- acpi_tad_disable_timer(dev, ACPI_TAD_AC_TIMER); -- acpi_tad_clear_status(dev, ACPI_TAD_AC_TIMER); -+ if (dd->capabilities & ACPI_TAD_AC_WAKE) { -+ acpi_tad_disable_timer(dev, ACPI_TAD_AC_TIMER); -+ acpi_tad_clear_status(dev, ACPI_TAD_AC_TIMER); -+ } - if (dd->capabilities & ACPI_TAD_DC_WAKE) { - acpi_tad_disable_timer(dev, ACPI_TAD_DC_TIMER); - acpi_tad_clear_status(dev, ACPI_TAD_DC_TIMER); -@@ -613,12 +625,6 @@ static int acpi_tad_probe(struct platform_device *pdev) - goto remove_handler; - } - -- if (!acpi_has_method(handle, "_PRW")) { -- dev_info(dev, "Missing _PRW\n"); -- ret = -ENODEV; -- goto remove_handler; -- } -- - dd = devm_kzalloc(dev, sizeof(*dd), GFP_KERNEL); - if (!dd) { - ret = -ENOMEM; -@@ -649,6 +655,12 @@ static int acpi_tad_probe(struct platform_device *pdev) - if (ret) - goto fail; - -+ if (caps & ACPI_TAD_AC_WAKE) { -+ ret = sysfs_create_group(&dev->kobj, &acpi_tad_ac_attr_group); -+ if (ret) -+ goto fail; -+ } -+ - if (caps & ACPI_TAD_DC_WAKE) { - ret = sysfs_create_group(&dev->kobj, &acpi_tad_dc_attr_group); - if (ret) --- -2.42.0 - diff --git a/patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch b/patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch deleted file mode 100644 index a397014..0000000 --- a/patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jan200101 -Date: Mon, 27 Nov 2023 15:25:48 +0100 -Subject: [PATCH] mt76: mt7921: Disable powersave features by default - -This brings WiFi latency down considerably and makes latency consistent by -disabling runtime PM and typical powersave features by default. The actual -power consumption difference is inconsequential on desktops and laptops, -while the performance difference is monumental. Latencies of 20+ ms are no -longer observed after this change, and the connection is much more stable. - -Signed-off-by: Jan200101 ---- - drivers/net/wireless/mediatek/mt76/mt7921/init.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - -diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c -index ff63f37f67d9..840b4c606c83 100644 ---- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c -+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c -@@ -220,12 +220,6 @@ int mt7921_register_device(struct mt792x_dev *dev) - dev->pm.idle_timeout = MT792x_PM_TIMEOUT; - dev->pm.stats.last_wake_event = jiffies; - dev->pm.stats.last_doze_event = jiffies; -- if (!mt76_is_usb(&dev->mt76)) { -- dev->pm.enable_user = true; -- dev->pm.enable = true; -- dev->pm.ds_enable_user = true; -- dev->pm.ds_enable = true; -- } - - if (!mt76_is_mmio(&dev->mt76)) - hw->extra_tx_headroom += MT_SDIO_TXD_SIZE + MT_SDIO_HDR_SIZE; -@@ -240,6 +234,8 @@ int mt7921_register_device(struct mt792x_dev *dev) - if (ret) - return ret; - -+ hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; -+ - hw->wiphy->reg_notifier = mt7921_regd_notifier; - dev->mphy.sband_2g.sband.ht_cap.cap |= - IEEE80211_HT_CAP_LDPC_CODING | diff --git a/patches/nobara/set-ps4-bt-poll-rate-1000hz.patch b/patches/nobara/set-ps4-bt-poll-rate-1000hz.patch deleted file mode 100644 index 8431cf7..0000000 --- a/patches/nobara/set-ps4-bt-poll-rate-1000hz.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 0f2c07ab93dca496a1f34399ad2ff8a954690a72 Mon Sep 17 00:00:00 2001 -From: GloriousEggroll -Date: Mon, 29 May 2023 17:15:14 -0600 -Subject: [PATCH] set ds controller bluetooth pollrate to 1 ms - ---- - drivers/hid/hid-playstation.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c -index 8ac8f7b8e..1130663c3 100644 ---- a/drivers/hid/hid-playstation.c -+++ b/drivers/hid/hid-playstation.c -@@ -330,8 +330,8 @@ struct dualsense_output_report { - * 0x3F - disabled - */ - #define DS4_OUTPUT_HWCTL_BT_POLL_MASK 0x3F --/* Default to 4ms poll interval, which is same as USB (not adjustable). */ --#define DS4_BT_DEFAULT_POLL_INTERVAL_MS 4 -+/* Default to 1ms poll interval (1000Hz, lower latency). */ -+#define DS4_BT_DEFAULT_POLL_INTERVAL_MS 1 - #define DS4_OUTPUT_HWCTL_CRC32 0x40 - #define DS4_OUTPUT_HWCTL_HID 0x80 - --- -2.40.1 - diff --git a/patches/nobara/steam-deck.patch b/patches/nobara/steam-deck.patch deleted file mode 100644 index 9eba750..0000000 --- a/patches/nobara/steam-deck.patch +++ /dev/null @@ -1,2497 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrey Smirnov -Date: Sat, 19 Feb 2022 16:08:36 -0800 -Subject: [PATCH] mfd: Add MFD core driver for Steam Deck - -Add MFD core driver for Steam Deck. Doesn't really do much so far -besides instantiating a number of MFD cells that implement all the -interesting functionality. - -(cherry picked from commit 5f534c2d6ebdefccb9c024eb0f013bc1c0c622d9) -Signed-off-by: Cristian Ciocaltea -Signed-off-by: Jan200101 ---- - drivers/mfd/Kconfig | 11 ++++ - drivers/mfd/Makefile | 2 + - drivers/mfd/steamdeck.c | 127 ++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 140 insertions(+) - create mode 100644 drivers/mfd/steamdeck.c - -diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig -index 8b93856de432..af335d9150e9 100644 ---- a/drivers/mfd/Kconfig -+++ b/drivers/mfd/Kconfig -@@ -2260,5 +2260,16 @@ config MFD_RSMU_SPI - Additional drivers must be enabled in order to use the functionality - of the device. - -+config MFD_STEAMDECK -+ tristate "Valve Steam Deck" -+ select MFD_CORE -+ depends on ACPI -+ depends on X86_64 || COMPILE_TEST -+ help -+ This driver registers various MFD cells that expose aspects -+ of Steam Deck specific ACPI functionality. -+ -+ Say N here, unless you are running on Steam Deck hardware. -+ - endmenu - endif -diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile -index 7ed3ef4a698c..d01254ef0106 100644 ---- a/drivers/mfd/Makefile -+++ b/drivers/mfd/Makefile -@@ -280,3 +280,5 @@ rsmu-i2c-objs := rsmu_core.o rsmu_i2c.o - rsmu-spi-objs := rsmu_core.o rsmu_spi.o - obj-$(CONFIG_MFD_RSMU_I2C) += rsmu-i2c.o - obj-$(CONFIG_MFD_RSMU_SPI) += rsmu-spi.o -+ -+obj-$(CONFIG_MFD_STEAMDECK) += steamdeck.o -diff --git a/drivers/mfd/steamdeck.c b/drivers/mfd/steamdeck.c -new file mode 100644 -index 000000000000..0e504b3c2796 ---- /dev/null -+++ b/drivers/mfd/steamdeck.c -@@ -0,0 +1,127 @@ -+// SPDX-License-Identifier: GPL-2.0+ -+ -+/* -+ * Steam Deck EC MFD core driver -+ * -+ * Copyright (C) 2021-2022 Valve Corporation -+ * -+ */ -+ -+#include -+#include -+#include -+ -+#define STEAMDECK_STA_OK \ -+ (ACPI_STA_DEVICE_ENABLED | \ -+ ACPI_STA_DEVICE_PRESENT | \ -+ ACPI_STA_DEVICE_FUNCTIONING) -+ -+struct steamdeck { -+ struct acpi_device *adev; -+ struct device *dev; -+}; -+ -+#define STEAMDECK_ATTR_RO(_name, _method) \ -+ static ssize_t _name##_show(struct device *dev, \ -+ struct device_attribute *attr, \ -+ char *buf) \ -+ { \ -+ struct steamdeck *sd = dev_get_drvdata(dev); \ -+ unsigned long long val; \ -+ \ -+ if (ACPI_FAILURE(acpi_evaluate_integer( \ -+ sd->adev->handle, \ -+ _method, NULL, &val))) \ -+ return -EIO; \ -+ \ -+ return sysfs_emit(buf, "%llu\n", val); \ -+ } \ -+ static DEVICE_ATTR_RO(_name) -+ -+STEAMDECK_ATTR_RO(firmware_version, "PDFW"); -+STEAMDECK_ATTR_RO(board_id, "BOID"); -+ -+static struct attribute *steamdeck_attrs[] = { -+ &dev_attr_firmware_version.attr, -+ &dev_attr_board_id.attr, -+ NULL -+}; -+ -+ATTRIBUTE_GROUPS(steamdeck); -+ -+static const struct mfd_cell steamdeck_cells[] = { -+ { .name = "steamdeck-hwmon" }, -+ { .name = "steamdeck-leds" }, -+ { .name = "steamdeck-extcon" }, -+}; -+ -+static void steamdeck_remove_sysfs_groups(void *data) -+{ -+ struct steamdeck *sd = data; -+ -+ sysfs_remove_groups(&sd->dev->kobj, steamdeck_groups); -+} -+ -+static int steamdeck_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ unsigned long long sta; -+ struct steamdeck *sd; -+ acpi_status status; -+ int ret; -+ -+ sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); -+ if (!sd) -+ return -ENOMEM; -+ sd->adev = ACPI_COMPANION(dev); -+ sd->dev = dev; -+ platform_set_drvdata(pdev, sd); -+ -+ status = acpi_evaluate_integer(sd->adev->handle, "_STA", -+ NULL, &sta); -+ if (ACPI_FAILURE(status)) { -+ dev_err(dev, "Status check failed (0x%x)\n", status); -+ return -EINVAL; -+ } -+ -+ if ((sta & STEAMDECK_STA_OK) != STEAMDECK_STA_OK) { -+ dev_err(dev, "Device is not ready\n"); -+ return -EINVAL; -+ } -+ -+ ret = sysfs_create_groups(&dev->kobj, steamdeck_groups); -+ if (ret) { -+ dev_err(dev, "Failed to create sysfs group\n"); -+ return ret; -+ } -+ -+ ret = devm_add_action_or_reset(dev, steamdeck_remove_sysfs_groups, -+ sd); -+ if (ret) { -+ dev_err(dev, "Failed to register devres action\n"); -+ return ret; -+ } -+ -+ return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, -+ steamdeck_cells, ARRAY_SIZE(steamdeck_cells), -+ NULL, 0, NULL); -+} -+ -+static const struct acpi_device_id steamdeck_device_ids[] = { -+ { "VLV0100", 0 }, -+ { "", 0 }, -+}; -+MODULE_DEVICE_TABLE(acpi, steamdeck_device_ids); -+ -+static struct platform_driver steamdeck_driver = { -+ .probe = steamdeck_probe, -+ .driver = { -+ .name = "steamdeck", -+ .acpi_match_table = steamdeck_device_ids, -+ }, -+}; -+module_platform_driver(steamdeck_driver); -+ -+MODULE_AUTHOR("Andrey Smirnov "); -+MODULE_DESCRIPTION("Steam Deck EC MFD core driver"); -+MODULE_LICENSE("GPL"); - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrey Smirnov -Date: Sat, 19 Feb 2022 16:09:45 -0800 -Subject: [PATCH] hwmon: Add driver for Steam Deck's EC sensors - -Add driver for sensors exposed by EC firmware on Steam Deck hardware. - -(cherry picked from commit 6917aac77bee6185ae3920b936cdbe7876118c0b) -Signed-off-by: Cristian Ciocaltea -Signed-off-by: Jan200101 ---- - drivers/hwmon/Kconfig | 11 ++ - drivers/hwmon/Makefile | 1 + - drivers/hwmon/steamdeck-hwmon.c | 224 ++++++++++++++++++++++++++++++++ - 3 files changed, 236 insertions(+) - create mode 100644 drivers/hwmon/steamdeck-hwmon.c - -diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig -index 7ac3daaf59ce..d784c78417cf 100644 ---- a/drivers/hwmon/Kconfig -+++ b/drivers/hwmon/Kconfig -@@ -1900,6 +1900,17 @@ config SENSORS_SCH5636 - This driver can also be built as a module. If so, the module - will be called sch5636. - -+config SENSORS_STEAMDECK -+ tristate "Steam Deck EC sensors" -+ depends on MFD_STEAMDECK -+ help -+ If you say yes here you get support for the hardware -+ monitoring features exposed by EC firmware on Steam Deck -+ devices -+ -+ This driver can also be built as a module. If so, the module -+ will be called steamdeck-hwmon. -+ - config SENSORS_STTS751 - tristate "ST Microelectronics STTS751" - depends on I2C -diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile -index 11d076cad8a2..d03c1e1d339f 100644 ---- a/drivers/hwmon/Makefile -+++ b/drivers/hwmon/Makefile -@@ -191,6 +191,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o - obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o - obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o - obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o -+obj-$(CONFIG_SENSORS_STEAMDECK) += steamdeck-hwmon.o - obj-$(CONFIG_SENSORS_STTS751) += stts751.o - obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o - obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o -diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c -new file mode 100644 -index 000000000000..fab9e9460bd4 ---- /dev/null -+++ b/drivers/hwmon/steamdeck-hwmon.c -@@ -0,0 +1,224 @@ -+// SPDX-License-Identifier: GPL-2.0+ -+/* -+ * Steam Deck EC sensors driver -+ * -+ * Copyright (C) 2021-2022 Valve Corporation -+ */ -+ -+#include -+#include -+#include -+ -+#define STEAMDECK_HWMON_NAME "steamdeck-hwmon" -+ -+struct steamdeck_hwmon { -+ struct acpi_device *adev; -+}; -+ -+static long -+steamdeck_hwmon_get(struct steamdeck_hwmon *sd, const char *method) -+{ -+ unsigned long long val; -+ if (ACPI_FAILURE(acpi_evaluate_integer(sd->adev->handle, -+ (char *)method, NULL, &val))) -+ return -EIO; -+ -+ return val; -+} -+ -+static int -+steamdeck_hwmon_read(struct device *dev, enum hwmon_sensor_types type, -+ u32 attr, int channel, long *out) -+{ -+ struct steamdeck_hwmon *sd = dev_get_drvdata(dev); -+ -+ switch (type) { -+ case hwmon_curr: -+ if (attr != hwmon_curr_input) -+ return -EOPNOTSUPP; -+ -+ *out = steamdeck_hwmon_get(sd, "PDAM"); -+ if (*out < 0) -+ return *out; -+ break; -+ case hwmon_in: -+ if (attr != hwmon_in_input) -+ return -EOPNOTSUPP; -+ -+ *out = steamdeck_hwmon_get(sd, "PDVL"); -+ if (*out < 0) -+ return *out; -+ break; -+ case hwmon_temp: -+ if (attr != hwmon_temp_input) -+ return -EOPNOTSUPP; -+ -+ *out = steamdeck_hwmon_get(sd, "BATT"); -+ if (*out < 0) -+ return *out; -+ /* -+ * Assuming BATT returns deg C we need to mutiply it -+ * by 1000 to convert to mC -+ */ -+ *out *= 1000; -+ break; -+ case hwmon_fan: -+ switch (attr) { -+ case hwmon_fan_input: -+ *out = steamdeck_hwmon_get(sd, "FANR"); -+ if (*out < 0) -+ return *out; -+ break; -+ case hwmon_fan_target: -+ *out = steamdeck_hwmon_get(sd, "FSSR"); -+ if (*out < 0) -+ return *out; -+ break; -+ case hwmon_fan_fault: -+ *out = steamdeck_hwmon_get(sd, "FANC"); -+ if (*out < 0) -+ return *out; -+ /* -+ * FANC (Fan check): -+ * 0: Abnormal -+ * 1: Normal -+ */ -+ *out = !*out; -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return 0; -+} -+ -+static int -+steamdeck_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, -+ u32 attr, int channel, const char **str) -+{ -+ switch (type) { -+ /* -+ * These two aren't, strictly speaking, measured. EC -+ * firmware just reports what PD negotiation resulted -+ * in. -+ */ -+ case hwmon_curr: -+ *str = "PD Contract Current"; -+ break; -+ case hwmon_in: -+ *str = "PD Contract Voltage"; -+ break; -+ case hwmon_temp: -+ *str = "Battery Temp"; -+ break; -+ case hwmon_fan: -+ *str = "System Fan"; -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return 0; -+} -+ -+static int -+steamdeck_hwmon_write(struct device *dev, enum hwmon_sensor_types type, -+ u32 attr, int channel, long val) -+{ -+ struct steamdeck_hwmon *sd = dev_get_drvdata(dev); -+ -+ if (type != hwmon_fan || -+ attr != hwmon_fan_target) -+ return -EOPNOTSUPP; -+ -+ val = clamp_val(val, 0, 7300); -+ -+ if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, -+ "FANS", val))) -+ return -EIO; -+ -+ return 0; -+} -+ -+static umode_t -+steamdeck_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, -+ u32 attr, int channel) -+{ -+ if (type == hwmon_fan && -+ attr == hwmon_fan_target) -+ return 0644; -+ -+ return 0444; -+} -+ -+static const struct hwmon_channel_info *steamdeck_hwmon_info[] = { -+ HWMON_CHANNEL_INFO(in, -+ HWMON_I_INPUT | HWMON_I_LABEL), -+ HWMON_CHANNEL_INFO(curr, -+ HWMON_C_INPUT | HWMON_C_LABEL), -+ HWMON_CHANNEL_INFO(temp, -+ HWMON_T_INPUT | HWMON_T_LABEL), -+ HWMON_CHANNEL_INFO(fan, -+ HWMON_F_INPUT | HWMON_F_LABEL | -+ HWMON_F_TARGET | HWMON_F_FAULT), -+ NULL -+}; -+ -+static const struct hwmon_ops steamdeck_hwmon_ops = { -+ .is_visible = steamdeck_hwmon_is_visible, -+ .read = steamdeck_hwmon_read, -+ .read_string = steamdeck_hwmon_read_string, -+ .write = steamdeck_hwmon_write, -+}; -+ -+static const struct hwmon_chip_info steamdeck_hwmon_chip_info = { -+ .ops = &steamdeck_hwmon_ops, -+ .info = steamdeck_hwmon_info, -+}; -+ -+static int steamdeck_hwmon_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct steamdeck_hwmon *sd; -+ struct device *hwmon; -+ -+ sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); -+ if (!sd) -+ return -ENOMEM; -+ -+ sd->adev = ACPI_COMPANION(dev->parent); -+ hwmon = devm_hwmon_device_register_with_info(dev, -+ "steamdeck_hwmon", -+ sd, -+ &steamdeck_hwmon_chip_info, -+ NULL); -+ if (IS_ERR(hwmon)) { -+ dev_err(dev, "Failed to register HWMON device"); -+ return PTR_ERR(hwmon); -+ } -+ -+ return 0; -+} -+ -+static const struct platform_device_id steamdeck_hwmon_id_table[] = { -+ { .name = STEAMDECK_HWMON_NAME }, -+ {} -+}; -+MODULE_DEVICE_TABLE(platform, steamdeck_hwmon_id_table); -+ -+static struct platform_driver steamdeck_hwmon_driver = { -+ .probe = steamdeck_hwmon_probe, -+ .driver = { -+ .name = STEAMDECK_HWMON_NAME, -+ }, -+ .id_table = steamdeck_hwmon_id_table, -+}; -+module_platform_driver(steamdeck_hwmon_driver); -+ -+MODULE_AUTHOR("Andrey Smirnov "); -+MODULE_DESCRIPTION("Steam Deck EC sensors driver"); -+MODULE_LICENSE("GPL"); - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrey Smirnov -Date: Sun, 27 Feb 2022 12:58:05 -0800 -Subject: [PATCH] leds: steamdeck: Add support for Steam Deck LED - -(cherry picked from commit 85a86d19aa7022ff0555023d53aef78323a42d0c) -Signed-off-by: Cristian Ciocaltea -Signed-off-by: Jan200101 ---- - drivers/leds/Kconfig | 7 ++++ - drivers/leds/Makefile | 1 + - drivers/leds/leds-steamdeck.c | 74 +++++++++++++++++++++++++++++++++++ - 3 files changed, 82 insertions(+) - create mode 100644 drivers/leds/leds-steamdeck.c - -diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig -index 499d0f215a8b..d1d761695cd6 100644 ---- a/drivers/leds/Kconfig -+++ b/drivers/leds/Kconfig -@@ -864,6 +864,13 @@ config LEDS_ACER_A500 - This option enables support for the Power Button LED of - Acer Iconia Tab A500. - -+config LEDS_STEAMDECK -+ tristate "LED support for Steam Deck" -+ depends on LEDS_CLASS && MFD_STEAMDECK -+ help -+ This option enabled support for the status LED (next to the -+ power button) on Steam Deck -+ - source "drivers/leds/blink/Kconfig" - - comment "Flash and Torch LED drivers" -diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile -index 4fd2f92cd198..130a1c175dde 100644 ---- a/drivers/leds/Makefile -+++ b/drivers/leds/Makefile -@@ -75,6 +75,7 @@ - obj-$(CONFIG_LEDS_PWM) += leds-pwm.o - obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o - obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o -+obj-$(CONFIG_LEDS_STEAMDECK) += leds-steamdeck.o - obj-$(CONFIG_LEDS_SUNFIRE) += leds-sunfire.o - obj-$(CONFIG_LEDS_SYSCON) += leds-syscon.o - obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o -diff --git a/drivers/leds/leds-steamdeck.c b/drivers/leds/leds-steamdeck.c -new file mode 100644 -index 000000000000..686500b8de73 ---- /dev/null -+++ b/drivers/leds/leds-steamdeck.c -@@ -0,0 +1,74 @@ -+// SPDX-License-Identifier: GPL-2.0+ -+ -+/* -+ * Steam Deck EC MFD LED cell driver -+ * -+ * Copyright (C) 2021-2022 Valve Corporation -+ * -+ */ -+ -+#include -+#include -+#include -+ -+struct steamdeck_led { -+ struct acpi_device *adev; -+ struct led_classdev cdev; -+}; -+ -+static int steamdeck_leds_brightness_set(struct led_classdev *cdev, -+ enum led_brightness value) -+{ -+ struct steamdeck_led *sd = container_of(cdev, struct steamdeck_led, -+ cdev); -+ -+ if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, -+ "CHBV", value))) -+ return -EIO; -+ -+ return 0; -+} -+ -+static int steamdeck_leds_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct steamdeck_led *sd; -+ int ret; -+ -+ sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); -+ if (!sd) -+ return -ENOMEM; -+ -+ sd->adev = ACPI_COMPANION(dev->parent); -+ -+ sd->cdev.name = "status:white"; -+ sd->cdev.brightness_set_blocking = steamdeck_leds_brightness_set; -+ sd->cdev.max_brightness = 100; -+ -+ ret = devm_led_classdev_register(dev, &sd->cdev); -+ if (ret) { -+ dev_err(dev, "Failed to register LEDs device: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static const struct platform_device_id steamdeck_leds_id_table[] = { -+ { .name = "steamdeck-leds" }, -+ {} -+}; -+MODULE_DEVICE_TABLE(platform, steamdeck_leds_id_table); -+ -+static struct platform_driver steamdeck_leds_driver = { -+ .probe = steamdeck_leds_probe, -+ .driver = { -+ .name = "steamdeck-leds", -+ }, -+ .id_table = steamdeck_leds_id_table, -+}; -+module_platform_driver(steamdeck_leds_driver); -+ -+MODULE_AUTHOR("Andrey Smirnov "); -+MODULE_DESCRIPTION("Steam Deck LEDs driver"); -+MODULE_LICENSE("GPL"); - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrey Smirnov -Date: Sun, 27 Feb 2022 14:46:08 -0800 -Subject: [PATCH] extcon: Add driver for Steam Deck - -(cherry picked from commit f9f2eddae582ae39d5f89c1218448fc259b90aa8) -Signed-off-by: Cristian Ciocaltea -Signed-off-by: Jan200101 ---- - drivers/extcon/Kconfig | 7 ++ - drivers/extcon/Makefile | 1 + - drivers/extcon/extcon-steamdeck.c | 180 ++++++++++++++++++++++++++++++ - 3 files changed, 188 insertions(+) - create mode 100644 drivers/extcon/extcon-steamdeck.c - -diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig -index 290186e44e6b..4d444a9e2c1f 100644 ---- a/drivers/extcon/Kconfig -+++ b/drivers/extcon/Kconfig -@@ -189,4 +189,11 @@ config EXTCON_USBC_TUSB320 - Say Y here to enable support for USB Type C cable detection extcon - support using a TUSB320. - -+config EXTCON_STEAMDECK -+ tristate "Steam Deck extcon support" -+ depends on MFD_STEAMDECK -+ help -+ Say Y here to enable support of USB Type C cable detection extcon -+ support on Steam Deck devices -+ - endif -diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile -index 1b390d934ca9..1c7e217f29e4 100644 ---- a/drivers/extcon/Makefile -+++ b/drivers/extcon/Makefile -@@ -25,3 +25,4 @@ obj-$(CONFIG_EXTCON_SM5502) += extcon-sm5502.o - obj-$(CONFIG_EXTCON_USB_GPIO) += extcon-usb-gpio.o - obj-$(CONFIG_EXTCON_USBC_CROS_EC) += extcon-usbc-cros-ec.o - obj-$(CONFIG_EXTCON_USBC_TUSB320) += extcon-usbc-tusb320.o -+obj-$(CONFIG_EXTCON_STEAMDECK) += extcon-steamdeck.o -diff --git a/drivers/extcon/extcon-steamdeck.c b/drivers/extcon/extcon-steamdeck.c -new file mode 100644 -index 000000000000..74f190adc8ea ---- /dev/null -+++ b/drivers/extcon/extcon-steamdeck.c -@@ -0,0 +1,180 @@ -+ -+#include -+#include -+#include -+ -+#define ACPI_STEAMDECK_NOTIFY_STATUS 0x80 -+ -+/* 0 - port connected, 1 -port disconnected */ -+#define ACPI_STEAMDECK_PORT_CONNECT BIT(0) -+/* 0 - Upstream Facing Port, 1 - Downdstream Facing Port */ -+#define ACPI_STEAMDECK_CUR_DATA_ROLE BIT(3) -+/* -+ * Debouncing delay to allow negotiation process to settle. 2s value -+ * was arrived at via trial and error. -+ */ -+#define STEAMDECK_ROLE_SWITCH_DELAY (msecs_to_jiffies(2000)) -+ -+struct steamdeck_extcon { -+ struct acpi_device *adev; -+ struct delayed_work role_work; -+ struct extcon_dev *edev; -+ struct device *dev; -+}; -+ -+static int steamdeck_read_pdcs(struct steamdeck_extcon *sd, unsigned long long *pdcs) -+{ -+ acpi_status status; -+ -+ status = acpi_evaluate_integer(sd->adev->handle, "PDCS", NULL, pdcs); -+ if (ACPI_FAILURE(status)) { -+ dev_err(sd->dev, "PDCS evaluation failed: %s\n", -+ acpi_format_exception(status)); -+ return -EIO; -+ } -+ -+ return 0; -+} -+ -+static void steamdeck_usb_role_work(struct work_struct *work) -+{ -+ struct steamdeck_extcon *sd = -+ container_of(work, struct steamdeck_extcon, role_work.work); -+ unsigned long long pdcs; -+ bool usb_host; -+ -+ if (steamdeck_read_pdcs(sd, &pdcs)) -+ return; -+ -+ /* -+ * We only care about these two -+ */ -+ pdcs &= ACPI_STEAMDECK_PORT_CONNECT | ACPI_STEAMDECK_CUR_DATA_ROLE; -+ -+ /* -+ * For "connect" events our role is determined by a bit in -+ * PDCS, for "disconnect" we switch to being a gadget -+ * unconditionally. The thinking for the latter is we don't -+ * want to start acting as a USB host until we get -+ * confirmation from the firmware that we are a USB host -+ */ -+ usb_host = (pdcs & ACPI_STEAMDECK_PORT_CONNECT) ? -+ pdcs & ACPI_STEAMDECK_CUR_DATA_ROLE : false; -+ -+ dev_dbg(sd->dev, "USB role is %s\n", usb_host ? "host" : "device"); -+ WARN_ON(extcon_set_state_sync(sd->edev, EXTCON_USB_HOST, -+ usb_host)); -+ -+} -+ -+static void steamdeck_notify(acpi_handle handle, u32 event, void *context) -+{ -+ struct device *dev = context; -+ struct steamdeck_extcon *sd = dev_get_drvdata(dev); -+ unsigned long long pdcs; -+ unsigned long delay; -+ -+ switch (event) { -+ case ACPI_STEAMDECK_NOTIFY_STATUS: -+ if (steamdeck_read_pdcs(sd, &pdcs)) -+ return; -+ /* -+ * We process "disconnect" events immediately and -+ * "connect" events with a delay to give the HW time -+ * to settle. For example attaching USB hub (at least -+ * for HW used for testing) will generate intermediary -+ * event with "host" bit not set, followed by the one -+ * that does have it set. -+ */ -+ delay = (pdcs & ACPI_STEAMDECK_PORT_CONNECT) ? -+ STEAMDECK_ROLE_SWITCH_DELAY : 0; -+ -+ queue_delayed_work(system_long_wq, &sd->role_work, delay); -+ break; -+ default: -+ dev_warn(dev, "Unsupported event [0x%x]\n", event); -+ } -+} -+ -+static void steamdeck_remove_notify_handler(void *data) -+{ -+ struct steamdeck_extcon *sd = data; -+ -+ acpi_remove_notify_handler(sd->adev->handle, ACPI_DEVICE_NOTIFY, -+ steamdeck_notify); -+ cancel_delayed_work_sync(&sd->role_work); -+} -+ -+static const unsigned int steamdeck_extcon_cable[] = { -+ EXTCON_USB, -+ EXTCON_USB_HOST, -+ EXTCON_CHG_USB_SDP, -+ EXTCON_CHG_USB_CDP, -+ EXTCON_CHG_USB_DCP, -+ EXTCON_CHG_USB_ACA, -+ EXTCON_NONE, -+}; -+ -+static int steamdeck_extcon_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct steamdeck_extcon *sd; -+ acpi_status status; -+ int ret; -+ -+ sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL); -+ if (!sd) -+ return -ENOMEM; -+ -+ INIT_DELAYED_WORK(&sd->role_work, steamdeck_usb_role_work); -+ platform_set_drvdata(pdev, sd); -+ sd->adev = ACPI_COMPANION(dev->parent); -+ sd->dev = dev; -+ sd->edev = devm_extcon_dev_allocate(dev, steamdeck_extcon_cable); -+ if (IS_ERR(sd->edev)) -+ return PTR_ERR(sd->edev); -+ -+ ret = devm_extcon_dev_register(dev, sd->edev); -+ if (ret < 0) { -+ dev_err(dev, "Failed to register extcon device: %d\n", ret); -+ return ret; -+ } -+ -+ /* -+ * Set initial role value -+ */ -+ queue_delayed_work(system_long_wq, &sd->role_work, 0); -+ flush_delayed_work(&sd->role_work); -+ -+ status = acpi_install_notify_handler(sd->adev->handle, -+ ACPI_DEVICE_NOTIFY, -+ steamdeck_notify, -+ dev); -+ if (ACPI_FAILURE(status)) { -+ dev_err(dev, "Error installing ACPI notify handler\n"); -+ return -EIO; -+ } -+ -+ ret = devm_add_action_or_reset(dev, steamdeck_remove_notify_handler, -+ sd); -+ return ret; -+} -+ -+static const struct platform_device_id steamdeck_extcon_id_table[] = { -+ { .name = "steamdeck-extcon" }, -+ {} -+}; -+MODULE_DEVICE_TABLE(platform, steamdeck_extcon_id_table); -+ -+static struct platform_driver steamdeck_extcon_driver = { -+ .probe = steamdeck_extcon_probe, -+ .driver = { -+ .name = "steamdeck-extcon", -+ }, -+ .id_table = steamdeck_extcon_id_table, -+}; -+module_platform_driver(steamdeck_extcon_driver); -+ -+MODULE_AUTHOR("Andrey Smirnov "); -+MODULE_DESCRIPTION("Steam Deck extcon driver"); -+MODULE_LICENSE("GPL"); - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrey Smirnov -Date: Sat, 15 Jul 2023 12:58:54 -0700 -Subject: [PATCH] hwmon: steamdeck-hwmon: Add support for max battery - level/rate - -Add support for max battery level/charge rate attributes. - -Signed-off-by: Andrey Smirnov -(cherry picked from commit 50af83e8fd75dc52221edd3fb6fd7a7f70c4d8a4) -Signed-off-by: Cristian Ciocaltea -Signed-off-by: Jan200101 ---- - drivers/hwmon/steamdeck-hwmon.c | 72 ++++++++++++++++++++++++++++++++- - 1 file changed, 71 insertions(+), 1 deletion(-) - -diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c -index fab9e9460bd4..9d0a5471b181 100644 ---- a/drivers/hwmon/steamdeck-hwmon.c -+++ b/drivers/hwmon/steamdeck-hwmon.c -@@ -180,6 +180,76 @@ static const struct hwmon_chip_info steamdeck_hwmon_chip_info = { - .info = steamdeck_hwmon_info, - }; - -+ -+static ssize_t -+steamdeck_hwmon_simple_store(struct device *dev, const char *buf, size_t count, -+ const char *method, -+ unsigned long upper_limit) -+{ -+ struct steamdeck_hwmon *sd = dev_get_drvdata(dev); -+ unsigned long value; -+ -+ if (kstrtoul(buf, 10, &value) || value >= upper_limit) -+ return -EINVAL; -+ -+ if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, -+ (char *)method, value))) -+ return -EIO; -+ -+ return count; -+} -+ -+static ssize_t -+steamdeck_hwmon_simple_show(struct device *dev, char *buf, -+ const char *method) -+{ -+ struct steamdeck_hwmon *sd = dev_get_drvdata(dev); -+ unsigned long value; -+ -+ value = steamdeck_hwmon_get(sd, method); -+ if (value < 0) -+ return value; -+ -+ return sprintf(buf, "%ld\n", value); -+} -+ -+#define STEAMDECK_HWMON_ATTR_RW(_name, _set_method, _get_method, \ -+ _upper_limit) \ -+ static ssize_t _name##_show(struct device *dev, \ -+ struct device_attribute *attr, \ -+ char *buf) \ -+ { \ -+ return steamdeck_hwmon_simple_show(dev, buf, \ -+ _get_method); \ -+ } \ -+ static ssize_t _name##_store(struct device *dev, \ -+ struct device_attribute *attr, \ -+ const char *buf, size_t count) \ -+ { \ -+ return steamdeck_hwmon_simple_store(dev, buf, count, \ -+ _set_method, \ -+ _upper_limit); \ -+ } \ -+ static DEVICE_ATTR_RW(_name) -+ -+STEAMDECK_HWMON_ATTR_RW(max_battery_charge_level, "FCBL", "SFBL", 101); -+STEAMDECK_HWMON_ATTR_RW(max_battery_charge_rate, "CHGR", "GCHR", 101); -+ -+static struct attribute *steamdeck_hwmon_attributes[] = { -+ &dev_attr_max_battery_charge_level.attr, -+ &dev_attr_max_battery_charge_rate.attr, -+ NULL -+}; -+ -+static const struct attribute_group steamdeck_hwmon_group = { -+ .attrs = steamdeck_hwmon_attributes, -+}; -+ -+static const struct attribute_group *steamdeck_hwmon_groups[] = { -+ &steamdeck_hwmon_group, -+ NULL -+}; -+ - static int steamdeck_hwmon_probe(struct platform_device *pdev) - { - struct device *dev = &pdev->dev; -@@ -195,7 +265,7 @@ static int steamdeck_hwmon_probe(struct platform_device *pdev) - "steamdeck_hwmon", - sd, - &steamdeck_hwmon_chip_info, -- NULL); -+ steamdeck_hwmon_groups); - if (IS_ERR(hwmon)) { - dev_err(dev, "Failed to register HWMON device"); - return PTR_ERR(hwmon); - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrey Smirnov -Date: Sun, 24 Sep 2023 15:02:33 -0700 -Subject: [PATCH] mfd: steamdeck: Expose controller board power in sysfs - -As of version 118 Deck's BIOS implements "SCBP" method that allows -gating power of the controller board (VBUS). Add a basic WO method to -our root MFD device to allow toggling that. - -Signed-off-by: Andrey Smirnov -(cherry picked from commit f97f32718acc10cbb51fef925842392e80904d74) -Signed-off-by: Cristian Ciocaltea -Signed-off-by: Jan200101 ---- - drivers/mfd/steamdeck.c | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) - -diff --git a/drivers/mfd/steamdeck.c b/drivers/mfd/steamdeck.c -index 0e504b3c2796..a60fa7db9141 100644 ---- a/drivers/mfd/steamdeck.c -+++ b/drivers/mfd/steamdeck.c -@@ -41,9 +41,29 @@ struct steamdeck { - STEAMDECK_ATTR_RO(firmware_version, "PDFW"); - STEAMDECK_ATTR_RO(board_id, "BOID"); - -+static ssize_t controller_board_power_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct steamdeck *sd = dev_get_drvdata(dev); -+ bool enabled; -+ ssize_t ret = kstrtobool(buf, &enabled); -+ -+ if (ret) -+ return ret; -+ -+ if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle, -+ "SCBP", enabled))) -+ return -EIO; -+ -+ return count; -+} -+static DEVICE_ATTR_WO(controller_board_power); -+ - static struct attribute *steamdeck_attrs[] = { - &dev_attr_firmware_version.attr, - &dev_attr_board_id.attr, -+ &dev_attr_controller_board_power.attr, - NULL - }; - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Thu, 30 Jun 2022 18:42:10 -0700 -Subject: [PATCH 01/10] USB: gadget: f_hid: Add Get-Feature report - -While the HID gadget implementation has been sufficient for devices that only -use INTERRUPT transfers, the USB HID standard includes provisions for Set- and -Get-Feature report CONTROL transfers that go over endpoint 0. These were -previously impossible with the existing implementation, and would either send -an empty reply, or stall out. - -As the feature is a standard part of USB HID, it stands to reason that devices -would use it, and that the HID gadget should support it. This patch adds -support for (polled) device-to-host Get-Feature reports through a new ioctl -interface to the hidg class dev nodes. - -Signed-off-by: Vicki Pfau -(cherry picked from commit 8437fa3861c7198a3e286f393c8637c4fc08d2bc) -Signed-off-by: Cristian Ciocaltea ---- - drivers/usb/gadget/function/f_hid.c | 121 ++++++++++++++++++++++++++-- - include/uapi/linux/usb/g_hid.h | 38 +++++++++ - include/uapi/linux/usb/gadgetfs.h | 2 +- - 3 files changed, 154 insertions(+), 7 deletions(-) - create mode 100644 include/uapi/linux/usb/g_hid.h - -diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c -index ea85e2c701a15..6fec92b5a0bd9 100644 ---- a/drivers/usb/gadget/function/f_hid.c -+++ b/drivers/usb/gadget/function/f_hid.c -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - - #include "u_f.h" - #include "u_hid.h" -@@ -75,6 +76,13 @@ struct f_hidg { - wait_queue_head_t write_queue; - struct usb_request *req; - -+ /* get report */ -+ struct usb_request *get_req; -+ struct usb_hidg_report get_report; -+ spinlock_t get_spinlock; -+ bool get_pending; -+ wait_queue_head_t get_queue; -+ - struct device dev; - struct cdev cdev; - struct usb_function func; -@@ -523,6 +531,64 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, - return status; - } - -+ -+static int f_hidg_get_report(struct file *file, struct usb_hidg_report __user *buffer) -+{ -+ struct f_hidg *hidg = file->private_data; -+ struct usb_composite_dev *cdev = hidg->func.config->cdev; -+ -+ int status = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&hidg->get_spinlock, flags); -+ -+#define GET_REPORT_COND (!hidg->get_pending) -+ -+ while (!GET_REPORT_COND) { -+ spin_unlock_irqrestore(&hidg->get_spinlock, flags); -+ -+ if (file->f_flags & O_NONBLOCK) -+ return -EAGAIN; -+ -+ if (wait_event_interruptible_exclusive(hidg->get_queue, -+ GET_REPORT_COND)) -+ return -ERESTARTSYS; -+ -+ spin_lock_irqsave(&hidg->get_spinlock, flags); -+ if (!hidg->get_pending) { -+ spin_unlock_irqrestore(&hidg->get_spinlock, flags); -+ return -EINVAL; -+ } -+ } -+ -+ hidg->get_pending = true; -+ spin_unlock_irqrestore(&hidg->get_spinlock, flags); -+ -+ status = copy_from_user(&hidg->get_report, buffer, -+ sizeof(struct usb_hidg_report)); -+ if (status != 0) { -+ ERROR(cdev, "copy_from_user error\n"); -+ status = -EINVAL; -+ } -+ -+ spin_lock_irqsave(&hidg->get_spinlock, flags); -+ hidg->get_pending = false; -+ spin_unlock_irqrestore(&hidg->get_spinlock, flags); -+ -+ wake_up(&hidg->get_queue); -+ return status; -+} -+ -+static long f_hidg_ioctl(struct file *file, unsigned int code, unsigned long arg) -+{ -+ switch (code) { -+ case GADGET_HID_WRITE_GET_REPORT: -+ return f_hidg_get_report(file, (struct usb_hidg_report __user *)arg); -+ default: -+ return -ENOTTY; -+ } -+} -+ - static __poll_t f_hidg_poll(struct file *file, poll_table *wait) - { - struct f_hidg *hidg = file->private_data; -@@ -548,6 +614,7 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait) - #undef WRITE_COND - #undef READ_COND_SSREPORT - #undef READ_COND_INTOUT -+#undef GET_REPORT_COND - - static int f_hidg_release(struct inode *inode, struct file *fd) - { -@@ -640,6 +707,10 @@ static void hidg_ssreport_complete(struct usb_ep *ep, struct usb_request *req) - wake_up(&hidg->read_queue); - } - -+static void hidg_get_report_complete(struct usb_ep *ep, struct usb_request *req) -+{ -+} -+ - static int hidg_setup(struct usb_function *f, - const struct usb_ctrlrequest *ctrl) - { -@@ -647,6 +718,8 @@ static int hidg_setup(struct usb_function *f, - struct usb_composite_dev *cdev = f->config->cdev; - struct usb_request *req = cdev->req; - int status = 0; -+ unsigned long flags; -+ bool do_wake = false; - __u16 value, length; - - value = __le16_to_cpu(ctrl->wValue); -@@ -659,14 +732,29 @@ static int hidg_setup(struct usb_function *f, - switch ((ctrl->bRequestType << 8) | ctrl->bRequest) { - case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 - | HID_REQ_GET_REPORT): -- VDBG(cdev, "get_report\n"); -+ VDBG(cdev, "get_report | wLength=%d\n", ctrl->wLength); - -- /* send an empty report */ -- length = min_t(unsigned, length, hidg->report_length); -- memset(req->buf, 0x0, length); -+ req = hidg->get_req; -+ req->zero = 0; -+ req->length = min_t(unsigned, length, hidg->report_length); -+ status = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC); -+ if (status < 0) { -+ ERROR(cdev, "usb_ep_queue error on get_report %d\n", -+ status); - -- goto respond; -- break; -+ spin_lock_irqsave(&hidg->get_spinlock, flags); -+ if (hidg->get_pending) { -+ hidg->get_pending = false; -+ do_wake = true; -+ } -+ spin_unlock_irqrestore(&hidg->get_spinlock, flags); -+ -+ if (do_wake) { -+ wake_up(&hidg->get_queue); -+ } -+ } -+ -+ return status; - - case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 - | HID_REQ_GET_PROTOCOL): -@@ -800,6 +888,14 @@ static void hidg_disable(struct usb_function *f) - - hidg->req = NULL; - spin_unlock_irqrestore(&hidg->write_spinlock, flags); -+ -+ spin_lock_irqsave(&hidg->get_spinlock, flags); -+ if (!hidg->get_pending) { -+ usb_ep_free_request(f->config->cdev->gadget->ep0, hidg->get_req); -+ hidg->get_pending = true; -+ } -+ hidg->get_req = NULL; -+ spin_unlock_irqrestore(&hidg->get_spinlock, flags); - } - - static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) -@@ -908,6 +1004,7 @@ static const struct file_operations f_hidg_fops = { - .write = f_hidg_write, - .read = f_hidg_read, - .poll = f_hidg_poll, -+ .unlocked_ioctl = f_hidg_ioctl, - .llseek = noop_llseek, - }; - -@@ -918,6 +1015,14 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) - struct usb_string *us; - int status; - -+ hidg->get_req = usb_ep_alloc_request(c->cdev->gadget->ep0, GFP_ATOMIC); -+ if (!hidg->get_req) -+ return -ENOMEM; -+ hidg->get_req->buf = hidg->get_report.data; -+ hidg->get_req->zero = 0; -+ hidg->get_req->complete = hidg_get_report_complete; -+ hidg->get_req->context = hidg; -+ - /* maybe allocate device-global string IDs, and patch descriptors */ - us = usb_gstrings_attach(c->cdev, ct_func_strings, - ARRAY_SIZE(ct_func_string_defs)); -@@ -1003,8 +1108,10 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) - hidg->write_pending = 1; - hidg->req = NULL; - spin_lock_init(&hidg->read_spinlock); -+ spin_lock_init(&hidg->get_spinlock); - init_waitqueue_head(&hidg->write_queue); - init_waitqueue_head(&hidg->read_queue); -+ init_waitqueue_head(&hidg->get_queue); - INIT_LIST_HEAD(&hidg->completed_out_req); - - /* create char device */ -@@ -1021,6 +1128,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) - if (hidg->req != NULL) - free_ep_req(hidg->in_ep, hidg->req); - -+ usb_ep_free_request(c->cdev->gadget->ep0, hidg->get_req); -+ - return status; - } - -diff --git a/include/uapi/linux/usb/g_hid.h b/include/uapi/linux/usb/g_hid.h -new file mode 100644 -index 0000000000000..c6068b4863543 ---- /dev/null -+++ b/include/uapi/linux/usb/g_hid.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -+/* -+ * g_hid.h -- Header file for USB HID gadget driver -+ * -+ * Copyright (C) 2022 Valve Software -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+#ifndef __UAPI_LINUX_USB_G_HID_H -+#define __UAPI_LINUX_USB_G_HID_H -+ -+#include -+ -+struct usb_hidg_report { -+ __u16 length; -+ __u8 data[512]; -+}; -+ -+/* The 'g' code is also used by gadgetfs and hid gadget ioctl requests. -+ * Don't add any colliding codes to either driver, and keep -+ * them in unique ranges (size 0x20 for now). -+ */ -+#define GADGET_HID_WRITE_GET_REPORT _IOW('g', 0x42, struct usb_hidg_report) -+ -+#endif /* __UAPI_LINUX_USB_G_HID_H */ -diff --git a/include/uapi/linux/usb/gadgetfs.h b/include/uapi/linux/usb/gadgetfs.h -index 835473910a498..9754822b2a409 100644 ---- a/include/uapi/linux/usb/gadgetfs.h -+++ b/include/uapi/linux/usb/gadgetfs.h -@@ -62,7 +62,7 @@ struct usb_gadgetfs_event { - }; - - --/* The 'g' code is also used by printer gadget ioctl requests. -+/* The 'g' code is also used by printer and hid gadget ioctl requests. - * Don't add any colliding codes to either driver, and keep - * them in unique ranges (size 0x20 for now). - */ --- -2.41.0 - - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Thu, 30 Jun 2022 18:43:10 -0700 -Subject: [PATCH 02/10] USB: gadget: f_hid: Add Set-Feature report - -While the HID gadget implementation has been sufficient for devices that only -use INTERRUPT transfers, the USB HID standard includes provisions for Set- and -Get-Feature report CONTROL transfers that go over endpoint 0. These were -previously impossible with the existing implementation, and would either send -an empty reply, or stall out. - -As the feature is a standard part of USB HID, it stands to reason that devices -would use it, and that the HID gadget should support it. This patch adds -support for host-to-device Set-Feature reports through a new ioctl -interface to the hidg class dev nodes. - -Signed-off-by: Vicki Pfau -(cherry picked from commit 3d82be0ec3aa3b947d9c927d7b06c433de15be8b) -Signed-off-by: Cristian Ciocaltea ---- - drivers/usb/gadget/function/f_hid.c | 110 ++++++++++++++++++++++++++-- - include/uapi/linux/usb/g_hid.h | 24 +----- - 2 files changed, 106 insertions(+), 28 deletions(-) - -diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c -index 6fec92b5a0bd9..172cba91aded1 100644 ---- a/drivers/usb/gadget/function/f_hid.c -+++ b/drivers/usb/gadget/function/f_hid.c -@@ -76,6 +76,11 @@ struct f_hidg { - wait_queue_head_t write_queue; - struct usb_request *req; - -+ /* set report */ -+ struct list_head completed_set_req; -+ spinlock_t set_spinlock; -+ wait_queue_head_t set_queue; -+ - /* get report */ - struct usb_request *get_req; - struct usb_hidg_report get_report; -@@ -531,6 +536,54 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, - return status; - } - -+static int f_hidg_set_report(struct file *file, struct usb_hidg_report __user *buffer) -+{ -+ struct f_hidg *hidg = file->private_data; -+ struct f_hidg_req_list *list; -+ struct usb_request *req; -+ unsigned long flags; -+ unsigned short length; -+ int status; -+ -+ spin_lock_irqsave(&hidg->set_spinlock, flags); -+ -+#define SET_REPORT_COND (!list_empty(&hidg->completed_set_req)) -+ -+ /* wait for at least one buffer to complete */ -+ while (!SET_REPORT_COND) { -+ spin_unlock_irqrestore(&hidg->set_spinlock, flags); -+ if (file->f_flags & O_NONBLOCK) -+ return -EAGAIN; -+ -+ if (wait_event_interruptible(hidg->set_queue, SET_REPORT_COND)) -+ return -ERESTARTSYS; -+ -+ spin_lock_irqsave(&hidg->set_spinlock, flags); -+ } -+ -+ /* pick the first one */ -+ list = list_first_entry(&hidg->completed_set_req, -+ struct f_hidg_req_list, list); -+ -+ /* -+ * Remove this from list to protect it from being free() -+ * while host disables our function -+ */ -+ list_del(&list->list); -+ -+ req = list->req; -+ spin_unlock_irqrestore(&hidg->set_spinlock, flags); -+ -+ /* copy to user outside spinlock */ -+ length = min_t(unsigned short, sizeof(buffer->data), req->actual); -+ status = copy_to_user(&buffer->length, &length, sizeof(buffer->length)); -+ if (!status) { -+ status = copy_to_user(&buffer->data, req->buf, length); -+ } -+ kfree(list); -+ free_ep_req(hidg->func.config->cdev->gadget->ep0, req); -+ return status; -+} - - static int f_hidg_get_report(struct file *file, struct usb_hidg_report __user *buffer) - { -@@ -582,6 +635,8 @@ static int f_hidg_get_report(struct file *file, struct usb_hidg_report __user *b - static long f_hidg_ioctl(struct file *file, unsigned int code, unsigned long arg) - { - switch (code) { -+ case GADGET_HID_READ_SET_REPORT: -+ return f_hidg_set_report(file, (struct usb_hidg_report __user *)arg); - case GADGET_HID_WRITE_GET_REPORT: - return f_hidg_get_report(file, (struct usb_hidg_report __user *)arg); - default: -@@ -596,6 +651,7 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait) - - poll_wait(file, &hidg->read_queue, wait); - poll_wait(file, &hidg->write_queue, wait); -+ poll_wait(file, &hidg->set_queue, wait); - - if (WRITE_COND) - ret |= EPOLLOUT | EPOLLWRNORM; -@@ -608,12 +664,16 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait) - ret |= EPOLLIN | EPOLLRDNORM; - } - -+ if (SET_REPORT_COND) -+ ret |= EPOLLPRI; -+ - return ret; - } - - #undef WRITE_COND - #undef READ_COND_SSREPORT - #undef READ_COND_INTOUT -+#undef SET_REPORT_COND - #undef GET_REPORT_COND - - static int f_hidg_release(struct inode *inode, struct file *fd) -@@ -658,11 +718,19 @@ static void hidg_intout_complete(struct usb_ep *ep, struct usb_request *req) - - req_list->req = req; - -- spin_lock_irqsave(&hidg->read_spinlock, flags); -- list_add_tail(&req_list->list, &hidg->completed_out_req); -- spin_unlock_irqrestore(&hidg->read_spinlock, flags); -+ if (ep == cdev->gadget->ep0) { -+ spin_lock_irqsave(&hidg->set_spinlock, flags); -+ list_add_tail(&req_list->list, &hidg->completed_set_req); -+ spin_unlock_irqrestore(&hidg->set_spinlock, flags); - -- wake_up(&hidg->read_queue); -+ wake_up(&hidg->set_queue); -+ } else { -+ spin_lock_irqsave(&hidg->read_spinlock, flags); -+ list_add_tail(&req_list->list, &hidg->completed_out_req); -+ spin_unlock_irqrestore(&hidg->read_spinlock, flags); -+ -+ wake_up(&hidg->read_queue); -+ } - break; - default: - ERROR(cdev, "Set report failed %d\n", req->status); -@@ -775,12 +843,27 @@ static int hidg_setup(struct usb_function *f, - case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 - | HID_REQ_SET_REPORT): - VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength); -- if (hidg->use_out_ep) -+ if (!hidg->use_out_ep) { -+ req->complete = hidg_ssreport_complete; -+ req->context = hidg; -+ goto respond; -+ } -+ if (!length) - goto stall; -- req->complete = hidg_ssreport_complete; -+ req = alloc_ep_req(cdev->gadget->ep0, GFP_ATOMIC); -+ if (!req) -+ return -ENOMEM; -+ req->complete = hidg_intout_complete; - req->context = hidg; -- goto respond; -- break; -+ req->zero = 0; -+ req->length = length; -+ status = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC); -+ if (status < 0) { -+ ERROR(cdev, "usb_ep_queue error on set_report %d\n", status); -+ free_ep_req(cdev->gadget->ep0, req); -+ } -+ -+ return status; - - case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 - | HID_REQ_SET_PROTOCOL): -@@ -880,6 +963,14 @@ static void hidg_disable(struct usb_function *f) - spin_unlock_irqrestore(&hidg->read_spinlock, flags); - } - -+ spin_lock_irqsave(&hidg->set_spinlock, flags); -+ list_for_each_entry_safe(list, next, &hidg->completed_set_req, list) { -+ free_ep_req(f->config->cdev->gadget->ep0, list->req); -+ list_del(&list->list); -+ kfree(list); -+ } -+ spin_unlock_irqrestore(&hidg->set_spinlock, flags); -+ - spin_lock_irqsave(&hidg->write_spinlock, flags); - if (!hidg->write_pending) { - free_ep_req(hidg->in_ep, hidg->req); -@@ -1108,11 +1199,14 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) - hidg->write_pending = 1; - hidg->req = NULL; - spin_lock_init(&hidg->read_spinlock); -+ spin_lock_init(&hidg->set_spinlock); - spin_lock_init(&hidg->get_spinlock); - init_waitqueue_head(&hidg->write_queue); - init_waitqueue_head(&hidg->read_queue); -+ init_waitqueue_head(&hidg->set_queue); - init_waitqueue_head(&hidg->get_queue); - INIT_LIST_HEAD(&hidg->completed_out_req); -+ INIT_LIST_HEAD(&hidg->completed_set_req); - - /* create char device */ - cdev_init(&hidg->cdev, &f_hidg_fops); -diff --git a/include/uapi/linux/usb/g_hid.h b/include/uapi/linux/usb/g_hid.h -index c6068b4863543..54814c2c68d60 100644 ---- a/include/uapi/linux/usb/g_hid.h -+++ b/include/uapi/linux/usb/g_hid.h -@@ -1,38 +1,22 @@ - /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ --/* -- * g_hid.h -- Header file for USB HID gadget driver -- * -- * Copyright (C) 2022 Valve Software -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- */ - - #ifndef __UAPI_LINUX_USB_G_HID_H - #define __UAPI_LINUX_USB_G_HID_H - - #include - -+#define HIDG_REPORT_SIZE_MAX 64 -+ - struct usb_hidg_report { - __u16 length; -- __u8 data[512]; -+ __u8 data[HIDG_REPORT_SIZE_MAX]; - }; - - /* The 'g' code is also used by gadgetfs and hid gadget ioctl requests. - * Don't add any colliding codes to either driver, and keep - * them in unique ranges (size 0x20 for now). - */ -+#define GADGET_HID_READ_SET_REPORT _IOR('g', 0x41, struct usb_hidg_report) - #define GADGET_HID_WRITE_GET_REPORT _IOW('g', 0x42, struct usb_hidg_report) - - #endif /* __UAPI_LINUX_USB_G_HID_H */ --- -2.41.0 - - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Tue, 29 Nov 2022 18:32:58 -0800 -Subject: [PATCH 03/10] HID: hid-steam: Update list of identifiers from SDL - -SDL includes a list of settings (registers), reports (cmds), and various other -identifiers that were provided by Valve. This commit imports a significant -chunk of that list as well as updating the guessed names and replacing a -handful of magic constants. It also replaces bitmask definitions that used hex -with the BIT macro. - -Signed-off-by: Vicki Pfau ---- - drivers/hid/hid-steam.c | 156 +++++++++++++++++++++++++++++++--------- - 1 file changed, 121 insertions(+), 35 deletions(-) - -diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c -index b110818fc9458..39a9bf3b7f77d 100644 ---- a/drivers/hid/hid-steam.c -+++ b/drivers/hid/hid-steam.c -@@ -71,7 +71,7 @@ static LIST_HEAD(steam_devices); - - /* - * Commands that can be sent in a feature report. -- * Thanks to Valve for some valuable hints. -+ * Thanks to Valve and SDL for some valuable hints. - */ - #define STEAM_CMD_SET_MAPPINGS 0x80 - #define STEAM_CMD_CLEAR_MAPPINGS 0x81 -@@ -80,27 +80,98 @@ static LIST_HEAD(steam_devices); - #define STEAM_CMD_GET_ATTRIB_LABEL 0x84 - #define STEAM_CMD_DEFAULT_MAPPINGS 0x85 - #define STEAM_CMD_FACTORY_RESET 0x86 --#define STEAM_CMD_WRITE_REGISTER 0x87 -+#define STEAM_CMD_SET_REGISTER 0x87 - #define STEAM_CMD_CLEAR_REGISTER 0x88 --#define STEAM_CMD_READ_REGISTER 0x89 -+#define STEAM_CMD_GET_REGISTER 0x89 - #define STEAM_CMD_GET_REGISTER_LABEL 0x8a - #define STEAM_CMD_GET_REGISTER_MAX 0x8b - #define STEAM_CMD_GET_REGISTER_DEFAULT 0x8c - #define STEAM_CMD_SET_MODE 0x8d --#define STEAM_CMD_DEFAULT_MOUSE 0x8e --#define STEAM_CMD_FORCEFEEDBAK 0x8f --#define STEAM_CMD_REQUEST_COMM_STATUS 0xb4 --#define STEAM_CMD_GET_SERIAL 0xae -+#define STEAM_CMD_DEFAULT_REGISTER 0x8e -+#define STEAM_CMD_HAPTIC_PULSE 0x8f -+#define STEAM_CMD_TURN_OFF_CONTROLLER 0x9f -+#define STEAM_CMD_GET_DEVICE_IFNO 0xa1 -+#define STEAM_CMD_CALIBRATE_TRACKPADS 0xa7 -+#define STEAM_CMD_SET_SERIAL 0xa9 -+#define STEAM_CMD_GET_TRACKPAD_CALIB 0xaa -+#define STEAM_CMD_GET_TRACKPAD_FACTORY_CALIB 0xab -+#define STEAM_CMD_GET_TRACKPAD_RAW_DATA 0xac -+#define STEAM_CMD_ENABLE_PAIRING 0xad -+#define STEAM_CMD_GET_STRING_ATTRIB 0xae -+#define STEAM_CMD_RADIO_ERASE_RECORDS 0xaf -+#define STEAM_CMD_RADIO_WRITE_RECORD 0xb0 -+#define STEAM_CMD_SET_DONGLE_SETTING 0xb1 -+#define STEAM_CMD_DONGLE_DISCONNECT_DEV 0xb2 -+#define STEAM_CMD_DONGLE_COMMIT_DEV 0xb3 -+#define STEAM_CMD_DONGLE_GET_STATE 0xb4 -+#define STEAM_CMD_CALIBRATE_GYRO 0xb5 -+#define STEAM_CMD_PLAY_AUDIO 0xb6 -+#define STEAM_CMD_AUDIO_UPDATE_START 0xb7 -+#define STEAM_CMD_AUDIO_UPDATE_DATA 0xb8 -+#define STEAM_CMD_AUDIO_UPDATE_COMPLETE 0xb9 -+#define STEAM_CMD_GET_CHIPID 0xba -+#define STEAM_CMD_CALIBRATE_JOYSTICK 0xbf -+#define STEAM_CMD_CALIBRATE_TRIGGERS 0xc0 -+#define STEAM_CMD_SET_AUDIO_MAPPING 0xc1 -+#define STEAM_CMD_CHECK_GYRO_FW_LOAD 0xc2 -+#define STEAM_CMD_CALIBRATE_ANALOG 0xc3 -+#define STEAM_CMD_DONGLE_GET_CONN_SLOTS 0xc4 -+#define STEAM_CMD_HAPTIC_CMD 0xea - #define STEAM_CMD_HAPTIC_RUMBLE 0xeb - - /* Some useful register ids */ --#define STEAM_REG_LPAD_MODE 0x07 --#define STEAM_REG_RPAD_MODE 0x08 --#define STEAM_REG_RPAD_MARGIN 0x18 --#define STEAM_REG_LED 0x2d --#define STEAM_REG_GYRO_MODE 0x30 --#define STEAM_REG_LPAD_CLICK_PRESSURE 0x34 --#define STEAM_REG_RPAD_CLICK_PRESSURE 0x35 -+#define STEAM_REG_MOUSE_SENSITIVITY 0x00 -+#define STEAM_REG_MOUSE_ACCELERATION 0x01 -+#define STEAM_REG_TRACKBALL_ROTATION_ANGLE 0x02 -+#define STEAM_REG_HAPTIC_INTENSITY 0x03 -+#define STEAM_REG_LEFT_GAMEPAD_STICK_ENABLED 0x04 -+#define STEAM_REG_RIGHT_GAMEPAD_STICK_ENABLED 0x05 -+#define STEAM_REG_USB_DEBUG_MODE 0x06 -+#define STEAM_REG_LEFT_TRACKPAD_MODE 0x07 -+#define STEAM_REG_RIGHT_TRACKPAD_MODE 0x08 -+#define STEAM_REG_MOUSE_POINTER_ENABLED 0x09 -+#define STEAM_REG_DPAD_DEADZONE 0x0a -+#define STEAM_REG_MINIMUM_MOMENTUM_VEL 0x0b -+#define STEAM_REG_MOMENTUM_DECAY_AMOUNT 0x0c -+#define STEAM_REG_PAD_REL_MODE_TICKS_PER_PIXEL 0x0d -+#define STEAM_REG_HAPTIC_INCREMENT 0x0e -+#define STEAM_REG_DPAD_ANGLE_SIN 0x0f -+#define STEAM_REG_DPAD_ANGLE_COS 0x10 -+#define STEAM_REG_MOMENTUM_VERTICAL_DIVISOR 0x11 -+#define STEAM_REG_MOMENTUM_MAXIMUM_VELOCITY 0x12 -+#define STEAM_REG_TRACKPAD_Z_ON 0x13 -+#define STEAM_REG_TRACKPAD_Z_OFF 0x14 -+#define STEAM_REG_SENSITIVY_SCALE_AMOUNT 0x15 -+#define STEAM_REG_LEFT_TRACKPAD_SECONDARY_MODE 0x16 -+#define STEAM_REG_RIGHT_TRACKPAD_SECONDARY_MODE 0x17 -+#define STEAM_REG_SMOOTH_ABSOLUTE_MOUSE 0x18 -+#define STEAM_REG_STEAMBUTTON_POWEROFF_TIME 0x19 -+#define STEAM_REG_TRACKPAD_OUTER_RADIUS 0x1b -+#define STEAM_REG_TRACKPAD_Z_ON_LEFT 0x1c -+#define STEAM_REG_TRACKPAD_Z_OFF_LEFT 0x1d -+#define STEAM_REG_TRACKPAD_OUTER_SPIN_VEL 0x1e -+#define STEAM_REG_TRACKPAD_OUTER_SPIN_RADIUS 0x1f -+#define STEAM_REG_TRACKPAD_OUTER_SPIN_HORIZONTAL_ONLY 0x20 -+#define STEAM_REG_TRACKPAD_RELATIVE_MODE_DEADZONE 0x21 -+#define STEAM_REG_TRACKPAD_RELATIVE_MODE_MAX_VEL 0x22 -+#define STEAM_REG_TRACKPAD_RELATIVE_MODE_INVERT_Y 0x23 -+#define STEAM_REG_TRACKPAD_DOUBLE_TAP_BEEP_ENABLED 0x24 -+#define STEAM_REG_TRACKPAD_DOUBLE_TAP_BEEP_PERIOD 0x25 -+#define STEAM_REG_TRACKPAD_DOUBLE_TAP_BEEP_COUNT 0x26 -+#define STEAM_REG_TRACKPAD_OUTER_RADIUS_RELEASE_ON_TRANSITION 0x27 -+#define STEAM_REG_RADIAL_MODE_ANGLE 0x28 -+#define STEAM_REG_HAPTIC_INTENSITY_MOUSE_MODE 0x29 -+#define STEAM_REG_LEFT_DPAD_REQUIRES_CLICK 0x2a -+#define STEAM_REG_RIGHT_DPAD_REQUIRES_CLICK 0x2b -+#define STEAM_REG_LED_BASELINE_BRIGHTNESS 0x2c -+#define STEAM_REG_LED_USER_BRIGHTNESS 0x2d -+#define STEAM_REG_ENABLE_RAW_JOYSTICK 0x2e -+#define STEAM_REG_ENABLE_FAST_SCAN 0x2f -+#define STEAM_REG_GYRO_MODE 0x30 -+#define STEAM_REG_WIRELESS_PACKET_VERSION 0x31 -+#define STEAM_REG_SLEEP_INACTIVITY_TIMEOUT 0x32 -+#define STEAM_REG_LEFT_TRACKPAD_CLICK_PRESSURE 0x34 -+#define STEAM_REG_RIGHT_TRACKPAD_CLICK_PRESSURE 0x35 - - /* Raw event identifiers */ - #define STEAM_EV_INPUT_DATA 0x01 -@@ -108,13 +179,28 @@ static LIST_HEAD(steam_devices); - #define STEAM_EV_BATTERY 0x04 - #define STEAM_EV_DECK_INPUT_DATA 0x09 - -+/* String attribute idenitifiers */ -+#define STEAM_ATTRIB_STR_BOARD_SERIAL 0x00 -+#define STEAM_ATTRIB_STR_UNIT_SERIAL 0x01 -+ - /* Values for GYRO_MODE (bitmask) */ --#define STEAM_GYRO_MODE_OFF 0x0000 --#define STEAM_GYRO_MODE_STEERING 0x0001 --#define STEAM_GYRO_MODE_TILT 0x0002 --#define STEAM_GYRO_MODE_SEND_ORIENTATION 0x0004 --#define STEAM_GYRO_MODE_SEND_RAW_ACCEL 0x0008 --#define STEAM_GYRO_MODE_SEND_RAW_GYRO 0x0010 -+#define STEAM_GYRO_MODE_OFF 0 -+#define STEAM_GYRO_MODE_STEERING BIT(0) -+#define STEAM_GYRO_MODE_TILT BIT(1) -+#define STEAM_GYRO_MODE_SEND_ORIENTATION BIT(2) -+#define STEAM_GYRO_MODE_SEND_RAW_ACCEL BIT(3) -+#define STEAM_GYRO_MODE_SEND_RAW_GYRO BIT(4) -+ -+/* Trackpad modes */ -+#define STEAM_TRACKPAD_ABSOLUTE_MOUSE 0x00 -+#define STEAM_TRACKPAD_RELATIVE_MOUSE 0x01 -+#define STEAM_TRACKPAD_DPAD_FOUR_WAY_DISCRETE 0x02 -+#define STEAM_TRACKPAD_DPAD_FOUR_WAY_OVERLAP 0x03 -+#define STEAM_TRACKPAD_DPAD_EIGHT_WAY 0x04 -+#define STEAM_TRACKPAD_RADIAL_MODE 0x05 -+#define STEAM_TRACKPAD_ABSOLUTE_DPAD 0x06 -+#define STEAM_TRACKPAD_NONE 0x07 -+#define STEAM_TRACKPAD_GESTURE_KEYBOARD 0x08 - - /* Other random constants */ - #define STEAM_SERIAL_LEN 10 -@@ -232,7 +318,7 @@ static int steam_write_registers(struct steam_device *steam, - /* Send: 0x87 len (reg valLo valHi)* */ - u8 reg; - u16 val; -- u8 cmd[64] = {STEAM_CMD_WRITE_REGISTER, 0x00}; -+ u8 cmd[64] = {STEAM_CMD_SET_REGISTER, 0x00}; - int ret; - va_list args; - -@@ -268,7 +354,7 @@ static int steam_get_serial(struct steam_device *steam) - * Recv: 0xae 0x15 0x01 serialnumber (10 chars) - */ - int ret; -- u8 cmd[] = {STEAM_CMD_GET_SERIAL, 0x15, 0x01}; -+ u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, 0x15, STEAM_ATTRIB_STR_UNIT_SERIAL}; - u8 reply[3 + STEAM_SERIAL_LEN + 1]; - - ret = steam_send_report(steam, cmd, sizeof(cmd)); -@@ -277,7 +363,7 @@ static int steam_get_serial(struct steam_device *steam) - ret = steam_recv_report(steam, reply, sizeof(reply)); - if (ret < 0) - return ret; -- if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != 0x01) -+ if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) - return -EIO; - reply[3 + STEAM_SERIAL_LEN] = 0; - strscpy(steam->serial_no, reply + 3, sizeof(steam->serial_no)); -@@ -291,7 +377,7 @@ static int steam_get_serial(struct steam_device *steam) - */ - static inline int steam_request_conn_status(struct steam_device *steam) - { -- return steam_send_report_byte(steam, STEAM_CMD_REQUEST_COMM_STATUS); -+ return steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE); - } - - static inline int steam_haptic_rumble(struct steam_device *steam, -@@ -339,9 +425,9 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - /* enable esc, enter, cursors */ - steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS); - /* enable mouse */ -- steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MOUSE); -+ steam_send_report_byte(steam, STEAM_CMD_DEFAULT_REGISTER); - steam_write_registers(steam, -- STEAM_REG_RPAD_MARGIN, 0x01, /* enable margin */ -+ STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x01, /* enable smooth */ - 0); - - cancel_delayed_work_sync(&steam->heartbeat); -@@ -351,11 +437,11 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - - if (steam->quirks & STEAM_QUIRK_DECK) { - steam_write_registers(steam, -- STEAM_REG_RPAD_MARGIN, 0x00, /* disable margin */ -- STEAM_REG_LPAD_MODE, 0x07, /* disable mouse */ -- STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */ -- STEAM_REG_LPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */ -- STEAM_REG_RPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */ -+ STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x00, /* disable smooth */ -+ STEAM_REG_LEFT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ -+ STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ -+ STEAM_REG_LEFT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */ -+ STEAM_REG_RIGHT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */ - 0); - /* - * The Steam Deck has a watchdog that automatically enables -@@ -365,9 +451,9 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - schedule_delayed_work(&steam->heartbeat, 5 * HZ); - } else { - steam_write_registers(steam, -- STEAM_REG_RPAD_MARGIN, 0x00, /* disable margin */ -- STEAM_REG_LPAD_MODE, 0x07, /* disable mouse */ -- STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */ -+ STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x00, /* disable smooth */ -+ STEAM_REG_LEFT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ -+ STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ - 0); - } - } -@@ -747,7 +833,7 @@ static void steam_lizard_mode_heartbeat(struct work_struct *work) - if (!steam->client_opened && steam->client_hdev) { - steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS); - steam_write_registers(steam, -- STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */ -+ STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ - 0); - schedule_delayed_work(&steam->heartbeat, 5 * HZ); - } --- -2.41.0 - - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Wed, 16 Nov 2022 19:54:26 -0800 -Subject: [PATCH 04/10] HID: hid-steam: Add gamepad-only mode switched to by - holding options - -Signed-off-by: Vicki Pfau ---- - drivers/hid/hid-steam.c | 72 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 72 insertions(+) - -diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c -index 39a9bf3b7f77d..0620046b142ef 100644 ---- a/drivers/hid/hid-steam.c -+++ b/drivers/hid/hid-steam.c -@@ -202,6 +202,11 @@ static LIST_HEAD(steam_devices); - #define STEAM_TRACKPAD_NONE 0x07 - #define STEAM_TRACKPAD_GESTURE_KEYBOARD 0x08 - -+/* Pad identifiers for the deck */ -+#define STEAM_PAD_LEFT 0 -+#define STEAM_PAD_RIGHT 1 -+#define STEAM_PAD_BOTH 2 -+ - /* Other random constants */ - #define STEAM_SERIAL_LEN 10 - -@@ -221,6 +226,9 @@ struct steam_device { - u8 battery_charge; - u16 voltage; - struct delayed_work heartbeat; -+ struct delayed_work mode_switch; -+ bool did_mode_switch; -+ bool gamepad_mode; - struct work_struct rumble_work; - u16 rumble_left; - u16 rumble_right; -@@ -380,6 +388,33 @@ static inline int steam_request_conn_status(struct steam_device *steam) - return steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE); - } - -+/* -+ * Send a haptic pulse to the trackpads -+ * Duration and interval are measured in microseconds, count is the number -+ * of pulses to send for duration time with interval microseconds between them -+ * and gain is measured in decibels, ranging from -24 to +6 -+ */ -+static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad, -+ u16 duration, u16 interval, u16 count, u8 gain) -+{ -+ u8 report[10] = {STEAM_CMD_HAPTIC_PULSE, 8}; -+ -+ /* Left and right are swapped on this report for legacy reasons */ -+ if (pad < STEAM_PAD_BOTH) -+ pad ^= 1; -+ -+ report[2] = pad; -+ report[3] = duration & 0xFF; -+ report[4] = duration >> 8; -+ report[5] = interval & 0xFF; -+ report[6] = interval >> 8; -+ report[7] = count & 0xFF; -+ report[8] = count >> 8; -+ report[9] = gain; -+ -+ return steam_send_report(steam, report, sizeof(report)); -+} -+ - static inline int steam_haptic_rumble(struct steam_device *steam, - u16 intensity, u16 left_speed, u16 right_speed, - u8 left_gain, u8 right_gain) -@@ -421,6 +456,9 @@ static int steam_play_effect(struct input_dev *dev, void *data, - - static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - { -+ if (steam->gamepad_mode) -+ enable = false; -+ - if (enable) { - /* enable esc, enter, cursors */ - steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS); -@@ -805,6 +843,29 @@ static void steam_work_connect_cb(struct work_struct *work) - } - } - -+static void steam_mode_switch_cb(struct work_struct *work) -+{ -+ struct steam_device *steam = container_of(to_delayed_work(work), -+ struct steam_device, mode_switch); -+ steam->gamepad_mode = !steam->gamepad_mode; -+ if (!lizard_mode) -+ return; -+ -+ mutex_lock(&steam->mutex); -+ if (steam->gamepad_mode) -+ steam_set_lizard_mode(steam, false); -+ else if (!steam->client_opened) -+ steam_set_lizard_mode(steam, lizard_mode); -+ mutex_unlock(&steam->mutex); -+ -+ steam_haptic_pulse(steam, STEAM_PAD_RIGHT, 0x190, 0, 1, 0); -+ if (steam->gamepad_mode) { -+ steam_haptic_pulse(steam, STEAM_PAD_LEFT, 0x14D, 0x14D, 0x2D, 0); -+ } else { -+ steam_haptic_pulse(steam, STEAM_PAD_LEFT, 0x1F4, 0x1F4, 0x1E, 0); -+ } -+} -+ - static bool steam_is_valve_interface(struct hid_device *hdev) - { - struct hid_report_enum *rep_enum; -@@ -977,6 +1038,7 @@ static int steam_probe(struct hid_device *hdev, - mutex_init(&steam->mutex); - steam->quirks = id->driver_data; - INIT_WORK(&steam->work_connect, steam_work_connect_cb); -+ INIT_DELAYED_WORK(&steam->mode_switch, steam_mode_switch_cb); - INIT_LIST_HEAD(&steam->list); - INIT_DEFERRABLE_WORK(&steam->heartbeat, steam_lizard_mode_heartbeat); - INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb); -@@ -1036,6 +1098,7 @@ static int steam_probe(struct hid_device *hdev, - client_hdev_fail: - cancel_work_sync(&steam->work_connect); - cancel_delayed_work_sync(&steam->heartbeat); -+ cancel_delayed_work_sync(&steam->mode_switch); - cancel_work_sync(&steam->rumble_work); - steam_alloc_fail: - hid_err(hdev, "%s: failed with error %d\n", -@@ -1059,6 +1122,7 @@ static void steam_remove(struct hid_device *hdev) - cancel_delayed_work_sync(&steam->heartbeat); - mutex_unlock(&steam->mutex); - cancel_work_sync(&steam->work_connect); -+ cancel_delayed_work_sync(&steam->mode_switch); - if (steam->quirks & STEAM_QUIRK_WIRELESS) { - hid_info(hdev, "Steam wireless receiver disconnected"); - } -@@ -1393,6 +1457,14 @@ static void steam_do_deck_input_event(struct steam_device *steam, - input_event(input, EV_KEY, BTN_BASE, !!(b14 & BIT(2))); - - input_sync(input); -+ -+ if (!(b9 & BIT(6)) && steam->did_mode_switch) { -+ steam->did_mode_switch = false; -+ cancel_delayed_work_sync(&steam->mode_switch); -+ } else if (!steam->client_opened && (b9 & BIT(6)) && !steam->did_mode_switch) { -+ steam->did_mode_switch = true; -+ schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100); -+ } - } - - /* --- -2.41.0 - - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Mon, 8 May 2023 20:24:56 -0700 -Subject: [PATCH 05/10] HID: hid-steam: Clean up locking - -This cleans up the locking logic so that the spinlock is consistently used for -access to a small handful of struct variables, and the mutex is exclusively and -consistently used for ensuring that mutliple threads aren't trying to -send/receive reports at the same time. Previously, only some report -transactions were guarded by this mutex, potentially breaking atomicity. The -mutex has been renamed to reflect this usage. - -Signed-off-by: Vicki Pfau ---- - drivers/hid/hid-steam.c | 148 ++++++++++++++++++++++++---------------- - 1 file changed, 90 insertions(+), 58 deletions(-) - -diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c -index 0620046b142ef..845ca71b8bd3a 100644 ---- a/drivers/hid/hid-steam.c -+++ b/drivers/hid/hid-steam.c -@@ -214,7 +214,7 @@ struct steam_device { - struct list_head list; - spinlock_t lock; - struct hid_device *hdev, *client_hdev; -- struct mutex mutex; -+ struct mutex report_mutex; - bool client_opened; - struct input_dev __rcu *input; - unsigned long quirks; -@@ -361,21 +361,26 @@ static int steam_get_serial(struct steam_device *steam) - * Send: 0xae 0x15 0x01 - * Recv: 0xae 0x15 0x01 serialnumber (10 chars) - */ -- int ret; -+ int ret = 0; - u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, 0x15, STEAM_ATTRIB_STR_UNIT_SERIAL}; - u8 reply[3 + STEAM_SERIAL_LEN + 1]; - -+ mutex_lock(&steam->report_mutex); - ret = steam_send_report(steam, cmd, sizeof(cmd)); - if (ret < 0) -- return ret; -+ goto out; - ret = steam_recv_report(steam, reply, sizeof(reply)); - if (ret < 0) -- return ret; -- if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) -- return -EIO; -+ goto out; -+ if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) { -+ ret = -EIO; -+ goto out; -+ } - reply[3 + STEAM_SERIAL_LEN] = 0; - strscpy(steam->serial_no, reply + 3, sizeof(steam->serial_no)); -- return 0; -+out: -+ mutex_unlock(&steam->report_mutex); -+ return ret; - } - - /* -@@ -385,7 +390,11 @@ static int steam_get_serial(struct steam_device *steam) - */ - static inline int steam_request_conn_status(struct steam_device *steam) - { -- return steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE); -+ int ret; -+ mutex_lock(&steam->report_mutex); -+ ret = steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE); -+ mutex_unlock(&steam->report_mutex); -+ return ret; - } - - /* -@@ -397,6 +406,7 @@ static inline int steam_request_conn_status(struct steam_device *steam) - static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad, - u16 duration, u16 interval, u16 count, u8 gain) - { -+ int ret; - u8 report[10] = {STEAM_CMD_HAPTIC_PULSE, 8}; - - /* Left and right are swapped on this report for legacy reasons */ -@@ -412,13 +422,17 @@ static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad, - report[8] = count >> 8; - report[9] = gain; - -- return steam_send_report(steam, report, sizeof(report)); -+ mutex_lock(&steam->report_mutex); -+ ret = steam_send_report(steam, report, sizeof(report)); -+ mutex_unlock(&steam->report_mutex); -+ return ret; - } - - static inline int steam_haptic_rumble(struct steam_device *steam, - u16 intensity, u16 left_speed, u16 right_speed, - u8 left_gain, u8 right_gain) - { -+ int ret; - u8 report[11] = {STEAM_CMD_HAPTIC_RUMBLE, 9}; - - report[3] = intensity & 0xFF; -@@ -430,7 +444,10 @@ static inline int steam_haptic_rumble(struct steam_device *steam, - report[9] = left_gain; - report[10] = right_gain; - -- return steam_send_report(steam, report, sizeof(report)); -+ mutex_lock(&steam->report_mutex); -+ ret = steam_send_report(steam, report, sizeof(report)); -+ mutex_unlock(&steam->report_mutex); -+ return ret; - } - - static void steam_haptic_rumble_cb(struct work_struct *work) -@@ -460,6 +477,7 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - enable = false; - - if (enable) { -+ mutex_lock(&steam->report_mutex); - /* enable esc, enter, cursors */ - steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS); - /* enable mouse */ -@@ -467,9 +485,11 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - steam_write_registers(steam, - STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x01, /* enable smooth */ - 0); -+ mutex_unlock(&steam->report_mutex); - - cancel_delayed_work_sync(&steam->heartbeat); - } else { -+ mutex_lock(&steam->report_mutex); - /* disable esc, enter, cursor */ - steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS); - -@@ -481,18 +501,19 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - STEAM_REG_LEFT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */ - STEAM_REG_RIGHT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */ - 0); -+ mutex_unlock(&steam->report_mutex); - /* - * The Steam Deck has a watchdog that automatically enables - * lizard mode if it doesn't see any traffic for too long - */ -- if (!work_busy(&steam->heartbeat.work)) -- schedule_delayed_work(&steam->heartbeat, 5 * HZ); -+ schedule_delayed_work(&steam->heartbeat, 5 * HZ); - } else { - steam_write_registers(steam, - STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x00, /* disable smooth */ - STEAM_REG_LEFT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ - STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ - 0); -+ mutex_unlock(&steam->report_mutex); - } - } - } -@@ -500,22 +521,29 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) - static int steam_input_open(struct input_dev *dev) - { - struct steam_device *steam = input_get_drvdata(dev); -+ unsigned long flags; -+ bool set_lizard_mode; - -- mutex_lock(&steam->mutex); -- if (!steam->client_opened && lizard_mode) -+ spin_lock_irqsave(&steam->lock, flags); -+ set_lizard_mode = !steam->client_opened && lizard_mode; -+ spin_unlock_irqrestore(&steam->lock, flags); -+ if (set_lizard_mode) - steam_set_lizard_mode(steam, false); -- mutex_unlock(&steam->mutex); -+ - return 0; - } - - static void steam_input_close(struct input_dev *dev) - { - struct steam_device *steam = input_get_drvdata(dev); -+ unsigned long flags; -+ bool set_lizard_mode; - -- mutex_lock(&steam->mutex); -- if (!steam->client_opened && lizard_mode) -+ spin_lock_irqsave(&steam->lock, flags); -+ set_lizard_mode = !steam->client_opened && lizard_mode; -+ spin_unlock_irqrestore(&steam->lock, flags); -+ if (set_lizard_mode) - steam_set_lizard_mode(steam, true); -- mutex_unlock(&steam->mutex); - } - - static enum power_supply_property steam_battery_props[] = { -@@ -760,6 +788,7 @@ static int steam_register(struct steam_device *steam) - { - int ret; - bool client_opened; -+ unsigned long flags; - - /* - * This function can be called several times in a row with the -@@ -772,11 +801,9 @@ static int steam_register(struct steam_device *steam) - * Unlikely, but getting the serial could fail, and it is not so - * important, so make up a serial number and go on. - */ -- mutex_lock(&steam->mutex); - if (steam_get_serial(steam) < 0) - strscpy(steam->serial_no, "XXXXXXXXXX", - sizeof(steam->serial_no)); -- mutex_unlock(&steam->mutex); - - hid_info(steam->hdev, "Steam Controller '%s' connected", - steam->serial_no); -@@ -791,11 +818,11 @@ static int steam_register(struct steam_device *steam) - mutex_unlock(&steam_devices_lock); - } - -- mutex_lock(&steam->mutex); -+ spin_lock_irqsave(&steam->lock, flags); - client_opened = steam->client_opened; -+ spin_unlock_irqrestore(&steam->lock, flags); - if (!client_opened) - steam_set_lizard_mode(steam, lizard_mode); -- mutex_unlock(&steam->mutex); - - if (!client_opened) - ret = steam_input_register(steam); -@@ -847,16 +874,21 @@ static void steam_mode_switch_cb(struct work_struct *work) - { - struct steam_device *steam = container_of(to_delayed_work(work), - struct steam_device, mode_switch); -+ unsigned long flags; -+ bool client_opened; - steam->gamepad_mode = !steam->gamepad_mode; - if (!lizard_mode) - return; - -- mutex_lock(&steam->mutex); - if (steam->gamepad_mode) - steam_set_lizard_mode(steam, false); -- else if (!steam->client_opened) -- steam_set_lizard_mode(steam, lizard_mode); -- mutex_unlock(&steam->mutex); -+ else { -+ spin_lock_irqsave(&steam->lock, flags); -+ client_opened = steam->client_opened; -+ spin_unlock_irqrestore(&steam->lock, flags); -+ if (!client_opened) -+ steam_set_lizard_mode(steam, lizard_mode); -+ } - - steam_haptic_pulse(steam, STEAM_PAD_RIGHT, 0x190, 0, 1, 0); - if (steam->gamepad_mode) { -@@ -889,16 +921,21 @@ static void steam_lizard_mode_heartbeat(struct work_struct *work) - { - struct steam_device *steam = container_of(work, struct steam_device, - heartbeat.work); -+ bool client_opened; -+ unsigned long flags; - -- mutex_lock(&steam->mutex); -- if (!steam->client_opened && steam->client_hdev) { -+ spin_lock_irqsave(&steam->lock, flags); -+ client_opened = steam->client_opened; -+ spin_unlock_irqrestore(&steam->lock, flags); -+ if (!client_opened) { -+ mutex_lock(&steam->report_mutex); - steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS); - steam_write_registers(steam, - STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */ - 0); -+ mutex_unlock(&steam->report_mutex); - schedule_delayed_work(&steam->heartbeat, 5 * HZ); - } -- mutex_unlock(&steam->mutex); - } - - static int steam_client_ll_parse(struct hid_device *hdev) -@@ -921,10 +958,11 @@ static void steam_client_ll_stop(struct hid_device *hdev) - static int steam_client_ll_open(struct hid_device *hdev) - { - struct steam_device *steam = hdev->driver_data; -+ unsigned long flags; - -- mutex_lock(&steam->mutex); -+ spin_lock_irqsave(&steam->lock, flags); - steam->client_opened = true; -- mutex_unlock(&steam->mutex); -+ spin_unlock_irqrestore(&steam->lock, flags); - - steam_input_unregister(steam); - -@@ -939,14 +977,12 @@ static void steam_client_ll_close(struct hid_device *hdev) - bool connected; - - spin_lock_irqsave(&steam->lock, flags); -- connected = steam->connected; -+ steam->client_opened = false; -+ connected = steam->connected && !steam->client_opened; - spin_unlock_irqrestore(&steam->lock, flags); - -- mutex_lock(&steam->mutex); -- steam->client_opened = false; - if (connected) - steam_set_lizard_mode(steam, lizard_mode); -- mutex_unlock(&steam->mutex); - - if (connected) - steam_input_register(steam); -@@ -1035,7 +1071,7 @@ static int steam_probe(struct hid_device *hdev, - steam->hdev = hdev; - hid_set_drvdata(hdev, steam); - spin_lock_init(&steam->lock); -- mutex_init(&steam->mutex); -+ mutex_init(&steam->report_mutex); - steam->quirks = id->driver_data; - INIT_WORK(&steam->work_connect, steam_work_connect_cb); - INIT_DELAYED_WORK(&steam->mode_switch, steam_mode_switch_cb); -@@ -1043,13 +1079,6 @@ static int steam_probe(struct hid_device *hdev, - INIT_DEFERRABLE_WORK(&steam->heartbeat, steam_lizard_mode_heartbeat); - INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb); - -- steam->client_hdev = steam_create_client_hid(hdev); -- if (IS_ERR(steam->client_hdev)) { -- ret = PTR_ERR(steam->client_hdev); -- goto client_hdev_fail; -- } -- steam->client_hdev->driver_data = steam; -- - /* - * With the real steam controller interface, do not connect hidraw. - * Instead, create the client_hid and connect that. -@@ -1058,10 +1087,6 @@ static int steam_probe(struct hid_device *hdev, - if (ret) - goto hid_hw_start_fail; - -- ret = hid_add_device(steam->client_hdev); -- if (ret) -- goto client_hdev_add_fail; -- - ret = hid_hw_open(hdev); - if (ret) { - hid_err(hdev, -@@ -1087,15 +1112,26 @@ static int steam_probe(struct hid_device *hdev, - } - } - -+ steam->client_hdev = steam_create_client_hid(hdev); -+ if (IS_ERR(steam->client_hdev)) { -+ ret = PTR_ERR(steam->client_hdev); -+ goto client_hdev_fail; -+ } -+ steam->client_hdev->driver_data = steam; -+ -+ ret = hid_add_device(steam->client_hdev); -+ if (ret) -+ goto client_hdev_add_fail; -+ - return 0; - --input_register_fail: --hid_hw_open_fail: - client_hdev_add_fail: - hid_hw_stop(hdev); --hid_hw_start_fail: -- hid_destroy_device(steam->client_hdev); - client_hdev_fail: -+ hid_destroy_device(steam->client_hdev); -+input_register_fail: -+hid_hw_open_fail: -+hid_hw_start_fail: - cancel_work_sync(&steam->work_connect); - cancel_delayed_work_sync(&steam->heartbeat); - cancel_delayed_work_sync(&steam->mode_switch); -@@ -1115,14 +1151,12 @@ static void steam_remove(struct hid_device *hdev) - return; - } - -+ cancel_delayed_work_sync(&steam->heartbeat); -+ cancel_delayed_work_sync(&steam->mode_switch); -+ cancel_work_sync(&steam->work_connect); - hid_destroy_device(steam->client_hdev); -- mutex_lock(&steam->mutex); - steam->client_hdev = NULL; - steam->client_opened = false; -- cancel_delayed_work_sync(&steam->heartbeat); -- mutex_unlock(&steam->mutex); -- cancel_work_sync(&steam->work_connect); -- cancel_delayed_work_sync(&steam->mode_switch); - if (steam->quirks & STEAM_QUIRK_WIRELESS) { - hid_info(hdev, "Steam wireless receiver disconnected"); - } -@@ -1597,10 +1631,8 @@ static int steam_param_set_lizard_mode(const char *val, - - mutex_lock(&steam_devices_lock); - list_for_each_entry(steam, &steam_devices, list) { -- mutex_lock(&steam->mutex); - if (!steam->client_opened) - steam_set_lizard_mode(steam, lizard_mode); -- mutex_unlock(&steam->mutex); - } - mutex_unlock(&steam_devices_lock); - return 0; --- -2.41.0 - - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Wed, 10 May 2023 17:27:12 -0700 -Subject: [PATCH 06/10] HID: hid-steam: Make client_opened a counter - -The client_opened variable was used to track if the hidraw was opened by any -clients to silence keyboard/mouse events while opened. However, there was no -counting of how many clients were opened, so opening two at the same time and -then closing one would fool the driver into thinking it had no remaining opened -clients. - -Signed-off-by: Vicki Pfau ---- - drivers/hid/hid-steam.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c -index 845ca71b8bd3a..0c2fe51b29bc1 100644 ---- a/drivers/hid/hid-steam.c -+++ b/drivers/hid/hid-steam.c -@@ -215,7 +215,7 @@ struct steam_device { - spinlock_t lock; - struct hid_device *hdev, *client_hdev; - struct mutex report_mutex; -- bool client_opened; -+ unsigned long client_opened; - struct input_dev __rcu *input; - unsigned long quirks; - struct work_struct work_connect; -@@ -787,7 +787,7 @@ static void steam_battery_unregister(struct steam_device *steam) - static int steam_register(struct steam_device *steam) - { - int ret; -- bool client_opened; -+ unsigned long client_opened; - unsigned long flags; - - /* -@@ -961,7 +961,7 @@ static int steam_client_ll_open(struct hid_device *hdev) - unsigned long flags; - - spin_lock_irqsave(&steam->lock, flags); -- steam->client_opened = true; -+ steam->client_opened++; - spin_unlock_irqrestore(&steam->lock, flags); - - steam_input_unregister(steam); -@@ -977,7 +977,7 @@ static void steam_client_ll_close(struct hid_device *hdev) - bool connected; - - spin_lock_irqsave(&steam->lock, flags); -- steam->client_opened = false; -+ steam->client_opened--; - connected = steam->connected && !steam->client_opened; - spin_unlock_irqrestore(&steam->lock, flags); - -@@ -1156,7 +1156,7 @@ static void steam_remove(struct hid_device *hdev) - cancel_work_sync(&steam->work_connect); - hid_destroy_device(steam->client_hdev); - steam->client_hdev = NULL; -- steam->client_opened = false; -+ steam->client_opened = 0; - if (steam->quirks & STEAM_QUIRK_WIRELESS) { - hid_info(hdev, "Steam wireless receiver disconnected"); - } --- -2.41.0 - - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vicki Pfau -Date: Thu, 18 May 2023 18:00:35 -0700 -Subject: [PATCH 07/10] HID: hid-steam: Better handling of serial number length - -The second byte of the GET_STRING_ATTRIB report is a length, so we should set -the size of the buffer to be the size we're actually requesting, and only -reject the reply if the length out is nonsensical. - -Signed-off-by: Vicki Pfau ---- - drivers/hid/hid-steam.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c -index 0c2fe51b29bc1..92e3e1052fa42 100644 ---- a/drivers/hid/hid-steam.c -+++ b/drivers/hid/hid-steam.c -@@ -208,7 +208,7 @@ static LIST_HEAD(steam_devices); - #define STEAM_PAD_BOTH 2 - - /* Other random constants */ --#define STEAM_SERIAL_LEN 10 -+#define STEAM_SERIAL_LEN 0x15 - - struct steam_device { - struct list_head list; -@@ -359,10 +359,10 @@ static int steam_get_serial(struct steam_device *steam) - { - /* - * Send: 0xae 0x15 0x01 -- * Recv: 0xae 0x15 0x01 serialnumber (10 chars) -+ * Recv: 0xae 0x15 0x01 serialnumber - */ - int ret = 0; -- u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, 0x15, STEAM_ATTRIB_STR_UNIT_SERIAL}; -+ u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, sizeof(steam->serial_no), STEAM_ATTRIB_STR_UNIT_SERIAL}; - u8 reply[3 + STEAM_SERIAL_LEN + 1]; - - mutex_lock(&steam->report_mutex); -@@ -372,12 +372,12 @@ static int steam_get_serial(struct steam_device *steam) - ret = steam_recv_report(steam, reply, sizeof(reply)); - if (ret < 0) - goto out; -- if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) { -+ if (reply[0] != 0xae || reply[1] < 1 || reply[1] > sizeof(steam->serial_no) || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) { - ret = -EIO; - goto out; - } - reply[3 + STEAM_SERIAL_LEN] = 0; -- strscpy(steam->serial_no, reply + 3, sizeof(steam->serial_no)); -+ strscpy(steam->serial_no, reply + 3, reply[1]); - out: - mutex_unlock(&steam->report_mutex); - return ret; --- -2.41.0 diff --git a/patches/nobara/uinput.patch b/patches/nobara/uinput.patch deleted file mode 100644 index c5666a8..0000000 --- a/patches/nobara/uinput.patch +++ /dev/null @@ -1,133 +0,0 @@ ---- - drivers/input/misc/uinput.c | 48 +++++++++++++++++++++++++------------ - include/uapi/linux/uinput.h | 5 ++++ - 2 files changed, 38 insertions(+), 15 deletions(-) - - -diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c -index 84051f20b18a..2c3180370a02 100644 ---- a/drivers/input/misc/uinput.c -+++ b/drivers/input/misc/uinput.c -@@ -20,6 +20,7 @@ - */ - #include - #include -+#include - #include - #include - #include -@@ -280,7 +281,7 @@ static int uinput_dev_flush(struct input_dev *dev, struct file *file) - - static void uinput_destroy_device(struct uinput_device *udev) - { -- const char *name, *phys; -+ const char *name, *phys, *uniq; - struct input_dev *dev = udev->dev; - enum uinput_state old_state = udev->state; - -@@ -289,6 +290,7 @@ static void uinput_destroy_device(struct uinput_device *udev) - if (dev) { - name = dev->name; - phys = dev->phys; -+ uniq = dev->uniq; - if (old_state == UIST_CREATED) { - uinput_flush_requests(udev); - input_unregister_device(dev); -@@ -297,6 +299,7 @@ static void uinput_destroy_device(struct uinput_device *udev) - } - kfree(name); - kfree(phys); -+ kfree(uniq); - udev->dev = NULL; - } - } -@@ -831,6 +834,24 @@ static int uinput_str_to_user(void __user *dest, const char *str, - return ret ? -EFAULT : len; - } - -+static int uinput_get_user_str(struct uinput_device *udev, const char **kptr, -+ const char *uptr, unsigned int size) -+{ -+ char *tmp; -+ -+ if (udev->state == UIST_CREATED) -+ return -EINVAL; -+ -+ tmp = strndup_user(uptr, size); -+ if (IS_ERR(tmp)) -+ return PTR_ERR(tmp); -+ -+ kfree(*kptr); -+ *kptr = tmp; -+ -+ return 0; -+} -+ - static long uinput_ioctl_handler(struct file *file, unsigned int cmd, - unsigned long arg, void __user *p) - { -@@ -839,7 +860,6 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd, - struct uinput_ff_upload ff_up; - struct uinput_ff_erase ff_erase; - struct uinput_request *req; -- char *phys; - const char *name; - unsigned int size; - -@@ -916,19 +936,8 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd, - goto out; - - case UI_SET_PHYS: -- if (udev->state == UIST_CREATED) { -- retval = -EINVAL; -- goto out; -- } -- -- phys = strndup_user(p, 1024); -- if (IS_ERR(phys)) { -- retval = PTR_ERR(phys); -- goto out; -- } -- -- kfree(udev->dev->phys); -- udev->dev->phys = phys; -+ pr_warn_once("uinput: UI_SET_PHYS is deprecated. Use UI_SET_PHYS_STR"); -+ retval = uinput_get_user_str(udev, &udev->dev->phys, p, 1024); - goto out; - - case UI_BEGIN_FF_UPLOAD: -@@ -1023,6 +1032,15 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd, - case UI_ABS_SETUP & ~IOCSIZE_MASK: - retval = uinput_abs_setup(udev, p, size); - goto out; -+ -+ case UI_SET_PHYS_STR(0): -+ retval = uinput_get_user_str(udev, &udev->dev->phys, p, size); -+ goto out; -+ -+ case UI_SET_UNIQ_STR(0): -+ retval = uinput_get_user_str(udev, &udev->dev->uniq, p, size); -+ goto out; -+ - } - - retval = -EINVAL; -diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h -index c9e677e3af1d..84d4fa142830 100644 ---- a/include/uapi/linux/uinput.h -+++ b/include/uapi/linux/uinput.h -@@ -142,9 +142,14 @@ struct uinput_abs_setup { - #define UI_SET_LEDBIT _IOW(UINPUT_IOCTL_BASE, 105, int) - #define UI_SET_SNDBIT _IOW(UINPUT_IOCTL_BASE, 106, int) - #define UI_SET_FFBIT _IOW(UINPUT_IOCTL_BASE, 107, int) -+ -+/* DEPRECATED: Data size is ambiguous. Use UI_SET_PHYS_STR instead. */ - #define UI_SET_PHYS _IOW(UINPUT_IOCTL_BASE, 108, char*) -+ - #define UI_SET_SWBIT _IOW(UINPUT_IOCTL_BASE, 109, int) - #define UI_SET_PROPBIT _IOW(UINPUT_IOCTL_BASE, 110, int) -+#define UI_SET_PHYS_STR(len) _IOC(_IOC_WRITE, UINPUT_IOCTL_BASE, 111, len) -+#define UI_SET_UNIQ_STR(len) _IOC(_IOC_WRITE, UINPUT_IOCTL_BASE, 112, len) - - #define UI_BEGIN_FF_UPLOAD _IOWR(UINPUT_IOCTL_BASE, 200, struct uinput_ff_upload) - #define UI_END_FF_UPLOAD _IOW(UINPUT_IOCTL_BASE, 201, struct uinput_ff_upload) diff --git a/patches/series b/patches/series deleted file mode 100644 index 6d09e36..0000000 --- a/patches/series +++ /dev/null @@ -1,15 +0,0 @@ -cachyos/0001-cachyos-base-all.patch -cachyos/0001-bore-cachy.patch -# nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch -# nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch -# nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch -# nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch -# nobara/0001-acpi-proc-idle-skip-dummy-wait.patch -# nobara/0001-add-acpi_call.patch -# nobara/amdgpu-si-cik-default.patch -# nobara/lenovo-legion-laptop.patch -# asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch -# asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch -# asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch -# asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch -# asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch diff --git a/release.sh b/release.sh index f38fc8e..1e45a35 100755 --- a/release.sh +++ b/release.sh @@ -1,2 +1,2 @@ # send debs to server -rsync -azP --include './' --include '*.deb' --exclude '*' ./output/ ferreo@direct.pika-os.com:/srv/www/cockatiel-incoming/ \ No newline at end of file +rsync -azP --include './' --include '*.deb' --exclude '*' ./output/ ferreo@direct.pika-os.com:/srv/www/cockatiel-incoming/⏎ \ No newline at end of file diff --git a/scripts/build.sh b/scripts-v3/build.sh similarity index 100% rename from scripts/build.sh rename to scripts-v3/build.sh diff --git a/config b/scripts-v3/config similarity index 100% rename from config rename to scripts-v3/config diff --git a/scripts-v3/config.sh b/scripts-v3/config.sh new file mode 100755 index 0000000..31aec28 --- /dev/null +++ b/scripts-v3/config.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +echo "Pika Kernel - Applying configuration" + +scripts-v3/config -k -d CONFIG_GENERIC_CPU +scripts-v3/config -k -e CONFIG_GENERIC_CPU3 +scripts-v3/config -e CACHY +scripts-v3/config -e SCHED_BORE + +scripts-v3/config -e HZ_300 --set-val HZ 750 +scripts-v3/config -d HZ_PERIODIC -d NO_HZ_IDLE -d CONTEXT_TRACKING_FORCE -e NO_HZ_FULL_NODEF -e NO_HZ_FULL -e NO_HZ -e NO_HZ_COMMON -e CONTEXT_TRACKING +scripts-v3/config -e PREEMPT_BUILD -d PREEMPT_NONE -d PREEMPT_VOLUNTARY -e PREEMPT -e PREEMPT_COUNT -e PREEMPTION -e PREEMPT_DYNAMIC + +scripts-v3/config -d CC_OPTIMIZE_FOR_PERFORMANCE \ + -e CC_OPTIMIZE_FOR_PERFORMANCE_O3 + +scripts-v3/config -m TCP_CONG_CUBIC \ + -d DEFAULT_CUBIC \ + -e TCP_CONG_BBR \ + -e DEFAULT_BBR \ + --set-str DEFAULT_TCP_CONG bbr + +scripts-v3/config -m NET_SCH_FQ_CODEL \ + -e NET_SCH_FQ \ + -d DEFAULT_FQ_CODEL \ + -e DEFAULT_FQ \ + --set-str DEFAULT_NET_SCH fq + +scripts-v3/config -e LRU_GEN -e LRU_GEN_ENABLED -d LRU_GEN_STATS + +scripts-v3/config -d TRANSPARENT_HUGEPAGE_MADVISE -e TRANSPARENT_HUGEPAGE_ALWAYS + +scripts-v3/config -e PER_VMA_LOCK -d PER_VMA_LOCK_STATS + +scripts-v3/config -e DAMON \ + -e DAMON_VADDR \ + -e DAMON_DBGFS \ + -e DAMON_SYSFS \ + -e DAMON_PADDR \ + -e DAMON_RECLAIM \ + -e DAMON_LRU_SORT + +scripts-v3/config --set-val MODULE_COMPRESS_ZSTD_LEVEL 19 -e MODULE_COMPRESS_ZSTD_ULTRA --set-val MODULE_COMPRESS_ZSTD_LEVEL_ULTRA 22 --set-val ZSTD_COMP_VAL 22 + +scripts-v3/config -e EFI_HANDOVER_PROTOCOL + +scripts-v3/config -e USER_NS + +make prepare diff --git a/scripts/output.sh b/scripts-v3/output.sh similarity index 100% rename from scripts/output.sh rename to scripts-v3/output.sh diff --git a/scripts-v3/patch.sh b/scripts-v3/patch.sh new file mode 100755 index 0000000..fdfa7af --- /dev/null +++ b/scripts-v3/patch.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "Pika Kernel - Applying patches" + +if [ -f ../patches/series ] +then + for i in $(cat ../patches/series | grep -v '^#') ; do echo "Applying Patch: $i" && patch -Np1 -i ../patches/$i || bash -c "echo "Applying Patch $i Failed!" && exit 2"; done +fi \ No newline at end of file diff --git a/scripts/source.sh b/scripts-v3/source.sh similarity index 100% rename from scripts/source.sh rename to scripts-v3/source.sh diff --git a/scripts/config.sh b/scripts/config.sh deleted file mode 100755 index b1719da..0000000 --- a/scripts/config.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -echo "Pika Kernel - Applying configuration" - -cp ../config .config - -scripts/config -k -d CONFIG_GENERIC_CPU -scripts/config -k -e CONFIG_GENERIC_CPU3 -scripts/config -e CACHY -scripts/config -e SCHED_BORE - -scripts/config -e HZ_300 --set-val HZ 750 -scripts/config -d HZ_PERIODIC -d NO_HZ_IDLE -d CONTEXT_TRACKING_FORCE -e NO_HZ_FULL_NODEF -e NO_HZ_FULL -e NO_HZ -e NO_HZ_COMMON -e CONTEXT_TRACKING -scripts/config -e PREEMPT_BUILD -d PREEMPT_NONE -d PREEMPT_VOLUNTARY -e PREEMPT -e PREEMPT_COUNT -e PREEMPTION -e PREEMPT_DYNAMIC - -scripts/config -d CC_OPTIMIZE_FOR_PERFORMANCE \ - -e CC_OPTIMIZE_FOR_PERFORMANCE_O3 - -scripts/config -m TCP_CONG_CUBIC \ - -d DEFAULT_CUBIC \ - -e TCP_CONG_BBR \ - -e DEFAULT_BBR \ - --set-str DEFAULT_TCP_CONG bbr - -scripts/config -m NET_SCH_FQ_CODEL \ - -e NET_SCH_FQ \ - -d DEFAULT_FQ_CODEL \ - -e DEFAULT_FQ \ - --set-str DEFAULT_NET_SCH fq - -scripts/config -e LRU_GEN -e LRU_GEN_ENABLED -d LRU_GEN_STATS - -scripts/config -d TRANSPARENT_HUGEPAGE_MADVISE -e TRANSPARENT_HUGEPAGE_ALWAYS - -scripts/config -e PER_VMA_LOCK -d PER_VMA_LOCK_STATS - -scripts/config -e DAMON \ - -e DAMON_VADDR \ - -e DAMON_DBGFS \ - -e DAMON_SYSFS \ - -e DAMON_PADDR \ - -e DAMON_RECLAIM \ - -e DAMON_LRU_SORT - -scripts/config --set-val MODULE_COMPRESS_ZSTD_LEVEL 19 -e MODULE_COMPRESS_ZSTD_ULTRA --set-val MODULE_COMPRESS_ZSTD_LEVEL_ULTRA 22 --set-val ZSTD_COMP_VAL 22 - -scripts/config -e EFI_HANDOVER_PROTOCOL - -scripts/config -e USER_NS - -make prepare diff --git a/scripts/patch.sh b/scripts/patch.sh deleted file mode 100755 index a54d3e4..0000000 --- a/scripts/patch.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -echo "Pika Kernel - Applying patches" - -for i in $(cat ../patches/series | grep -v '^#') ; do echo "Applying Patch: $i" && patch -Np1 -i ../patches/$i || bash -c "echo "Applying Patch $i Failed!" && exit 2"; done